diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/libwebrtc/modules/video_coding/codecs/vp9 | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/video_coding/codecs/vp9')
14 files changed, 6588 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS b/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS new file mode 100644 index 0000000000..cc5cd70142 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+media/base", +] diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h new file mode 100644 index 0000000000..79d403ded3 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_ + +#include <memory> +#include <vector> + +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/sdp_video_format.h" +#include "media/base/codec.h" +#include "modules/video_coding/include/video_codec_interface.h" + +namespace webrtc { + +// Returns a vector with all supported internal VP9 profiles that we can +// negotiate in SDP, in order of preference. +std::vector<SdpVideoFormat> SupportedVP9Codecs( + bool add_scalability_modes = false); + +// Returns a vector with all supported internal VP9 decode profiles in order of +// preference. These will be availble for receive-only connections. +std::vector<SdpVideoFormat> SupportedVP9DecoderCodecs(); + +class VP9Encoder : public VideoEncoder { + public: + // Deprecated. Returns default implementation using VP9 Profile 0. + // TODO(emircan): Remove once this is no longer used. + static std::unique_ptr<VP9Encoder> Create(); + // Parses VP9 Profile from `codec` and returns the appropriate implementation. + static std::unique_ptr<VP9Encoder> Create(const cricket::VideoCodec& codec); + static bool SupportsScalabilityMode(ScalabilityMode scalability_mode); + + ~VP9Encoder() override {} +}; + +class VP9Decoder : public VideoDecoder { + public: + static std::unique_ptr<VP9Decoder> Create(); + + ~VP9Decoder() override {} +}; +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h new file mode 100644 index 0000000000..f67215ec77 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains codec dependent definitions that are needed in +// order to compile the WebRTC codebase, even if this codec is not used. + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_ + +#include <stdint.h> + +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits +const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits +const uint8_t kNoSpatialIdx = 0xFF; +const uint8_t kNoGofIdx = 0xFF; +const uint8_t kNumVp9Buffers = 8; +const size_t kMaxVp9RefPics = 3; +const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits +const size_t kMaxVp9NumberOfSpatialLayers = 8; + +const size_t kMinVp9SpatialLayerLongSideLength = 240; +const size_t kMinVp9SpatialLayerShortSideLength = 135; + +enum TemporalStructureMode { + kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP... + kTemporalStructureMode2, // 2 temporal layers 01... + kTemporalStructureMode3, // 3 temporal layers 0212... +}; + +struct GofInfoVP9 { + void SetGofInfoVP9(TemporalStructureMode tm) { + switch (tm) { + case kTemporalStructureMode1: + num_frames_in_gof = 1; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 1; + break; + case kTemporalStructureMode2: + num_frames_in_gof = 2; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 2; + + temporal_idx[1] = 1; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + break; + case kTemporalStructureMode3: + num_frames_in_gof = 4; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 4; + + temporal_idx[1] = 2; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + + temporal_idx[2] = 1; + temporal_up_switch[2] = true; + num_ref_pics[2] = 1; + pid_diff[2][0] = 2; + + temporal_idx[3] = 2; + temporal_up_switch[3] = true; + num_ref_pics[3] = 1; + pid_diff[3][0] = 1; + break; + default: + RTC_DCHECK_NOTREACHED(); + } + } + + void CopyGofInfoVP9(const GofInfoVP9& src) { + num_frames_in_gof = src.num_frames_in_gof; + for (size_t i = 0; i < num_frames_in_gof; ++i) { + temporal_idx[i] = src.temporal_idx[i]; + temporal_up_switch[i] = src.temporal_up_switch[i]; + num_ref_pics[i] = src.num_ref_pics[i]; + for (uint8_t r = 0; r < num_ref_pics[i]; ++r) { + pid_diff[i][r] = src.pid_diff[i][r]; + } + } + } + + size_t num_frames_in_gof; + uint8_t temporal_idx[kMaxVp9FramesInGof]; + bool temporal_up_switch[kMaxVp9FramesInGof]; + uint8_t num_ref_pics[kMaxVp9FramesInGof]; + uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics]; + uint16_t pid_start; +}; + +struct RTPVideoHeaderVP9 { + void InitRTPVideoHeaderVP9() { + inter_pic_predicted = false; + flexible_mode = false; + beginning_of_frame = false; + end_of_frame = false; + ss_data_available = false; + non_ref_for_inter_layer_pred = false; + picture_id = kNoPictureId; + max_picture_id = kMaxTwoBytePictureId; + tl0_pic_idx = kNoTl0PicIdx; + temporal_idx = kNoTemporalIdx; + spatial_idx = kNoSpatialIdx; + temporal_up_switch = false; + inter_layer_predicted = false; + gof_idx = kNoGofIdx; + num_ref_pics = 0; + num_spatial_layers = 1; + first_active_layer = 0; + end_of_picture = true; + } + + bool inter_pic_predicted; // This layer frame is dependent on previously + // coded frame(s). + bool flexible_mode; // This frame is in flexible mode. + bool beginning_of_frame; // True if this packet is the first in a VP9 layer + // frame. + bool end_of_frame; // True if this packet is the last in a VP9 layer frame. + bool ss_data_available; // True if SS data is available in this payload + // descriptor. + bool non_ref_for_inter_layer_pred; // True for frame which is not used as + // reference for inter-layer prediction. + int16_t picture_id; // PictureID index, 15 bits; + // kNoPictureId if PictureID does not exist. + int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF; + int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits; + // kNoTl0PicIdx means no value provided. + uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx. + uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx. + bool temporal_up_switch; // True if upswitch to higher frame rate is possible + // meaning subsequent higher temporal layer pictures + // will not depend on any picture before the current + // picture (in coding order) with temporal layer ID + // greater than `temporal_idx` of this frame. + bool inter_layer_predicted; // Frame is dependent on directly lower spatial + // layer frame. + + uint8_t gof_idx; // Index to predefined temporal frame info in SS data. + + uint8_t num_ref_pics; // Number of reference pictures used by this layer + // frame. + uint8_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID + // of the reference pictures. + int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures. + + // SS data. + size_t num_spatial_layers; // Always populated. + size_t first_active_layer; // Not sent on wire, used to adjust ss data. + bool spatial_layer_resolution_present; + uint16_t width[kMaxVp9NumberOfSpatialLayers]; + uint16_t height[kMaxVp9NumberOfSpatialLayers]; + GofInfoVP9 gof; + + bool end_of_picture; // This frame is the last frame in picture. +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc new file mode 100644 index 0000000000..a981f259cf --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc @@ -0,0 +1,403 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifdef RTC_ENABLE_VP9 + +#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h" + +#include <algorithm> + +#include "absl/strings/match.h" +#include "api/transport/field_trial_based_config.h" +#include "api/video/color_space.h" +#include "api/video/i010_buffer.h" +#include "common_video/include/video_frame_buffer.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "third_party/libyuv/include/libyuv/convert.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +namespace webrtc { +namespace { + +// Helper class for extracting VP9 colorspace. +ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t, + vpx_color_range_t range_t, + unsigned int bit_depth) { + ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified; + ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified; + ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified; + switch (space_t) { + case VPX_CS_BT_601: + case VPX_CS_SMPTE_170: + primaries = ColorSpace::PrimaryID::kSMPTE170M; + transfer = ColorSpace::TransferID::kSMPTE170M; + matrix = ColorSpace::MatrixID::kSMPTE170M; + break; + case VPX_CS_SMPTE_240: + primaries = ColorSpace::PrimaryID::kSMPTE240M; + transfer = ColorSpace::TransferID::kSMPTE240M; + matrix = ColorSpace::MatrixID::kSMPTE240M; + break; + case VPX_CS_BT_709: + primaries = ColorSpace::PrimaryID::kBT709; + transfer = ColorSpace::TransferID::kBT709; + matrix = ColorSpace::MatrixID::kBT709; + break; + case VPX_CS_BT_2020: + primaries = ColorSpace::PrimaryID::kBT2020; + switch (bit_depth) { + case 8: + transfer = ColorSpace::TransferID::kBT709; + break; + case 10: + transfer = ColorSpace::TransferID::kBT2020_10; + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + matrix = ColorSpace::MatrixID::kBT2020_NCL; + break; + case VPX_CS_SRGB: + primaries = ColorSpace::PrimaryID::kBT709; + transfer = ColorSpace::TransferID::kIEC61966_2_1; + matrix = ColorSpace::MatrixID::kBT709; + break; + default: + break; + } + + ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid; + switch (range_t) { + case VPX_CR_STUDIO_RANGE: + range = ColorSpace::RangeID::kLimited; + break; + case VPX_CR_FULL_RANGE: + range = ColorSpace::RangeID::kFull; + break; + default: + break; + } + return ColorSpace(primaries, transfer, matrix, range); +} + +} // namespace + +LibvpxVp9Decoder::LibvpxVp9Decoder() + : decode_complete_callback_(nullptr), + inited_(false), + decoder_(nullptr), + key_frame_required_(true) {} + +LibvpxVp9Decoder::~LibvpxVp9Decoder() { + inited_ = true; // in order to do the actual release + Release(); + int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse(); + if (num_buffers_in_use > 0) { + // The frame buffers are reference counted and frames are exposed after + // decoding. There may be valid usage cases where previous frames are still + // referenced after ~LibvpxVp9Decoder that is not a leak. + RTC_LOG(LS_INFO) << num_buffers_in_use + << " Vp9FrameBuffers are still " + "referenced during ~LibvpxVp9Decoder."; + } +} + +bool LibvpxVp9Decoder::Configure(const Settings& settings) { + if (Release() < 0) { + return false; + } + + if (decoder_ == nullptr) { + decoder_ = new vpx_codec_ctx_t; + memset(decoder_, 0, sizeof(*decoder_)); + } + vpx_codec_dec_cfg_t cfg; + memset(&cfg, 0, sizeof(cfg)); + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // We focus on webrtc fuzzing here, not libvpx itself. Use single thread for + // fuzzing, because: + // - libvpx's VP9 single thread decoder is more fuzzer friendly. It detects + // errors earlier than the multi-threads version. + // - Make peak CPU usage under control (not depending on input) + cfg.threads = 1; +#else + const RenderResolution& resolution = settings.max_render_resolution(); + if (!resolution.Valid()) { + // Postpone configuring number of threads until resolution is known. + cfg.threads = 1; + } else { + // We want to use multithreading when decoding high resolution videos. But + // not too many in order to avoid overhead when many stream are decoded + // concurrently. + // Set 2 thread as target for 1280x720 pixel count, and then scale up + // linearly from there - but cap at physical core count. + // For common resolutions this results in: + // 1 for 360p + // 2 for 720p + // 4 for 1080p + // 8 for 1440p + // 18 for 4K + int num_threads = std::max( + 1, 2 * resolution.Width() * resolution.Height() / (1280 * 720)); + cfg.threads = std::min(settings.number_of_cores(), num_threads); + } +#endif + + current_settings_ = settings; + + vpx_codec_flags_t flags = 0; + if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) { + return false; + } + + if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) { + return false; + } + + inited_ = true; + // Always start with a complete key frame. + key_frame_required_ = true; + if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) { + if (!libvpx_buffer_pool_.Resize(*buffer_pool_size)) { + return false; + } + } + + vpx_codec_err_t status = + vpx_codec_control(decoder_, VP9D_SET_LOOP_FILTER_OPT, 1); + if (status != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Failed to enable VP9D_SET_LOOP_FILTER_OPT. " + << vpx_codec_error(decoder_); + return false; + } + + return true; +} + +int LibvpxVp9Decoder::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (decode_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + if (input_image._frameType == VideoFrameType::kVideoFrameKey) { + absl::optional<Vp9UncompressedHeader> frame_info = + ParseUncompressedVp9Header( + rtc::MakeArrayView(input_image.data(), input_image.size())); + if (frame_info) { + RenderResolution frame_resolution(frame_info->frame_width, + frame_info->frame_height); + if (frame_resolution != current_settings_.max_render_resolution()) { + // Resolution has changed, tear down and re-init a new decoder in + // order to get correct sizing. + Release(); + current_settings_.set_max_render_resolution(frame_resolution); + if (!Configure(current_settings_)) { + RTC_LOG(LS_WARNING) << "Failed to re-init decoder."; + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + } + } else { + RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame."; + } + } + + // Always start with a complete key frame. + if (key_frame_required_) { + if (input_image._frameType != VideoFrameType::kVideoFrameKey) + return WEBRTC_VIDEO_CODEC_ERROR; + key_frame_required_ = false; + } + vpx_codec_iter_t iter = nullptr; + vpx_image_t* img; + const uint8_t* buffer = input_image.data(); + if (input_image.size() == 0) { + buffer = nullptr; // Triggers full frame concealment. + } + // During decode libvpx may get and release buffers from + // `libvpx_buffer_pool_`. In practice libvpx keeps a few (~3-4) buffers alive + // at a time. + if (vpx_codec_decode(decoder_, buffer, + static_cast<unsigned int>(input_image.size()), 0, + VPX_DL_REALTIME)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + // `img->fb_priv` contains the image data, a reference counted Vp9FrameBuffer. + // It may be released by libvpx during future vpx_codec_decode or + // vpx_codec_destroy calls. + img = vpx_codec_get_frame(decoder_, &iter); + int qp; + vpx_codec_err_t vpx_ret = + vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp); + RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK); + int ret = + ReturnFrame(img, input_image.Timestamp(), qp, input_image.ColorSpace()); + if (ret != 0) { + return ret; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Decoder::ReturnFrame( + const vpx_image_t* img, + uint32_t timestamp, + int qp, + const webrtc::ColorSpace* explicit_color_space) { + if (img == nullptr) { + // Decoder OK and nullptr image => No show frame. + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + + // This buffer contains all of `img`'s image data, a reference counted + // Vp9FrameBuffer. (libvpx is done with the buffers after a few + // vpx_codec_decode calls or vpx_codec_destroy). + rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> img_buffer( + static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv)); + + // The buffer can be used directly by the VideoFrame (without copy) by + // using a Wrapped*Buffer. + rtc::scoped_refptr<VideoFrameBuffer> img_wrapped_buffer; + switch (img->fmt) { + case VPX_IMG_FMT_I420: + img_wrapped_buffer = WrapI420Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI420Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release `img_buffer`. + [img_buffer] {}); + break; + case VPX_IMG_FMT_I422: + img_wrapped_buffer = WrapI422Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI444Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release `img_buffer`. + [img_buffer] {}); + break; + case VPX_IMG_FMT_I444: + img_wrapped_buffer = WrapI444Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI444Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release `img_buffer`. + [img_buffer] {}); + break; + case VPX_IMG_FMT_I42016: + img_wrapped_buffer = WrapI010Buffer( + img->d_w, img->d_h, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]), + img->stride[VPX_PLANE_Y] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]), + img->stride[VPX_PLANE_U] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]), + img->stride[VPX_PLANE_V] / 2, [img_buffer] {}); + break; + case VPX_IMG_FMT_I42216: + img_wrapped_buffer = WrapI210Buffer( + img->d_w, img->d_h, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]), + img->stride[VPX_PLANE_Y] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]), + img->stride[VPX_PLANE_U] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]), + img->stride[VPX_PLANE_V] / 2, [img_buffer] {}); + break; + case VPX_IMG_FMT_I44416: + img_wrapped_buffer = WrapI410Buffer( + img->d_w, img->d_h, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]), + img->stride[VPX_PLANE_Y] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]), + img->stride[VPX_PLANE_U] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]), + img->stride[VPX_PLANE_V] / 2, [img_buffer] {}); + break; + default: + RTC_LOG(LS_ERROR) << "Unsupported pixel format produced by the decoder: " + << static_cast<int>(img->fmt); + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + + auto builder = VideoFrame::Builder() + .set_video_frame_buffer(img_wrapped_buffer) + .set_timestamp_rtp(timestamp); + if (explicit_color_space) { + builder.set_color_space(*explicit_color_space); + } else { + builder.set_color_space( + ExtractVP9ColorSpace(img->cs, img->range, img->bit_depth)); + } + VideoFrame decoded_image = builder.build(); + + decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp); + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Decoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + decode_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Decoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + if (decoder_ != nullptr) { + if (inited_) { + // When a codec is destroyed libvpx will release any buffers of + // `libvpx_buffer_pool_` it is currently using. + if (vpx_codec_destroy(decoder_)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + delete decoder_; + decoder_ = nullptr; + } + // Releases buffers from the pool. Any buffers not in use are deleted. Buffers + // still referenced externally are deleted once fully released, not returning + // to the pool. + libvpx_buffer_pool_.ClearPool(); + inited_ = false; + return ret_val; +} + +VideoDecoder::DecoderInfo LibvpxVp9Decoder::GetDecoderInfo() const { + DecoderInfo info; + info.implementation_name = "libvpx"; + info.is_hardware_accelerated = false; + return info; +} + +const char* LibvpxVp9Decoder::ImplementationName() const { + return "libvpx"; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h new file mode 100644 index 0000000000..65fc553b82 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_ + +#ifdef RTC_ENABLE_VP9 + +#include "api/video_codecs/video_decoder.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" +#include "vpx/vp8cx.h" + +namespace webrtc { + +class LibvpxVp9Decoder : public VP9Decoder { + public: + LibvpxVp9Decoder(); + virtual ~LibvpxVp9Decoder(); + + bool Configure(const Settings& settings) override; + + int Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) override; + + int RegisterDecodeCompleteCallback(DecodedImageCallback* callback) override; + + int Release() override; + + DecoderInfo GetDecoderInfo() const override; + const char* ImplementationName() const override; + + private: + int ReturnFrame(const vpx_image_t* img, + uint32_t timestamp, + int qp, + const webrtc::ColorSpace* explicit_color_space); + + // Memory pool used to share buffers between libvpx and webrtc. + Vp9FrameBufferPool libvpx_buffer_pool_; + DecodedImageCallback* decode_complete_callback_; + bool inited_; + vpx_codec_ctx_t* decoder_; + bool key_frame_required_; + Settings current_settings_; +}; +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc new file mode 100644 index 0000000000..5877373b76 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc @@ -0,0 +1,2194 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include <memory> +#ifdef RTC_ENABLE_VP9 + +#include <algorithm> +#include <limits> +#include <tuple> +#include <utility> +#include <vector> + +#include "absl/algorithm/container.h" +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "absl/types/optional.h" +#include "api/video/color_space.h" +#include "api/video/i010_buffer.h" +#include "api/video_codecs/scalability_mode.h" +#include "common_video/include/video_frame_buffer.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" +#include "modules/video_coding/svc/svc_rate_allocator.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_list.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/rate_control_settings.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "third_party/libyuv/include/libyuv/convert.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +namespace webrtc { + +namespace { +// Maps from gof_idx to encoder internal reference frame buffer index. These +// maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames. +uint8_t kRefBufIdx[4] = {0, 0, 0, 1}; +uint8_t kUpdBufIdx[4] = {0, 0, 1, 0}; + +// Maximum allowed PID difference for differnet per-layer frame-rate case. +const int kMaxAllowedPidDiff = 30; + +// TODO(ilink): Tune these thresholds further. +// Selected using ConverenceMotion_1280_720_50.yuv clip. +// No toggling observed on any link capacity from 100-2000kbps. +// HD was reached consistently when link capacity was 1500kbps. +// Set resolutions are a bit more conservative than svc_config.cc sets, e.g. +// for 300kbps resolution converged to 270p instead of 360p. +constexpr int kLowVp9QpThreshold = 149; +constexpr int kHighVp9QpThreshold = 205; + +std::pair<size_t, size_t> GetActiveLayers( + const VideoBitrateAllocation& allocation) { + for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) { + if (allocation.GetSpatialLayerSum(sl_idx) > 0) { + size_t last_layer = sl_idx + 1; + while (last_layer < kMaxSpatialLayers && + allocation.GetSpatialLayerSum(last_layer) > 0) { + ++last_layer; + } + return std::make_pair(sl_idx, last_layer); + } + } + return {0, 0}; +} + +using Vp9ScalabilityStructure = + std::tuple<std::unique_ptr<ScalableVideoController>, ScalabilityMode>; +absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure( + const VideoCodec& codec) { + int num_spatial_layers = codec.VP9().numberOfSpatialLayers; + int num_temporal_layers = + std::max(1, int{codec.VP9().numberOfTemporalLayers}); + if (num_spatial_layers == 1 && num_temporal_layers == 1) { + return absl::make_optional<Vp9ScalabilityStructure>( + std::make_unique<ScalableVideoControllerNoLayering>(), + ScalabilityMode::kL1T1); + } + + char name[20]; + rtc::SimpleStringBuilder ss(name); + if (codec.mode == VideoCodecMode::kScreensharing) { + // TODO(bugs.webrtc.org/11999): Compose names of the structures when they + // are implemented. + return absl::nullopt; + } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn || + num_spatial_layers == 1) { + ss << "L" << num_spatial_layers << "T" << num_temporal_layers; + } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) { + ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY"; + } else { + RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff); + ss << "S" << num_spatial_layers << "T" << num_temporal_layers; + } + + // Check spatial ratio. + if (num_spatial_layers > 1 && codec.spatialLayers[0].targetBitrate > 0) { + if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width || + codec.height != codec.spatialLayers[num_spatial_layers - 1].height) { + RTC_LOG(LS_WARNING) + << "Top layer resolution expected to match overall resolution"; + return absl::nullopt; + } + // Check if the ratio is one of the supported. + int numerator; + int denominator; + if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) { + numerator = 1; + denominator = 2; + // no suffix for 1:2 ratio. + } else if (2 * codec.spatialLayers[1].width == + 3 * codec.spatialLayers[0].width) { + numerator = 2; + denominator = 3; + ss << "h"; + } else { + RTC_LOG(LS_WARNING) << "Unsupported scalability ratio " + << codec.spatialLayers[0].width << ":" + << codec.spatialLayers[1].width; + return absl::nullopt; + } + // Validate ratio is consistent for all spatial layer transitions. + for (int sid = 1; sid < num_spatial_layers; ++sid) { + if (codec.spatialLayers[sid].width * numerator != + codec.spatialLayers[sid - 1].width * denominator || + codec.spatialLayers[sid].height * numerator != + codec.spatialLayers[sid - 1].height * denominator) { + RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator + << ":" << denominator; + return absl::nullopt; + } + } + } + + absl::optional<ScalabilityMode> scalability_mode = + ScalabilityModeFromString(name); + if (!scalability_mode.has_value()) { + RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name; + return absl::nullopt; + } + auto scalability_structure_controller = + CreateScalabilityStructure(*scalability_mode); + if (scalability_structure_controller == nullptr) { + RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name; + } else { + RTC_LOG(LS_INFO) << "Created scalability structure " << name; + } + return absl::make_optional<Vp9ScalabilityStructure>( + std::move(scalability_structure_controller), *scalability_mode); +} + +vpx_svc_ref_frame_config_t Vp9References( + rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers) { + vpx_svc_ref_frame_config_t ref_config = {}; + for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) { + const auto& buffers = layer_frame.Buffers(); + RTC_DCHECK_LE(buffers.size(), 3); + int sid = layer_frame.SpatialId(); + if (!buffers.empty()) { + ref_config.lst_fb_idx[sid] = buffers[0].id; + ref_config.reference_last[sid] = buffers[0].referenced; + if (buffers[0].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id); + } + } + if (buffers.size() > 1) { + ref_config.gld_fb_idx[sid] = buffers[1].id; + ref_config.reference_golden[sid] = buffers[1].referenced; + if (buffers[1].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id); + } + } + if (buffers.size() > 2) { + ref_config.alt_fb_idx[sid] = buffers[2].id; + ref_config.reference_alt_ref[sid] = buffers[2].referenced; + if (buffers[2].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id); + } + } + } + // TODO(bugs.webrtc.org/11999): Fill ref_config.duration + return ref_config; +} + +bool AllowDenoising() { + // Do not enable the denoiser on ARM since optimization is pending. + // Denoiser is on by default on other platforms. +#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) && \ + !defined(ANDROID) + return true; +#else + return false; +#endif +} + +} // namespace + +void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, + void* user_data) { + LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data); + enc->GetEncodedLayerFrame(pkt); +} + +LibvpxVp9Encoder::LibvpxVp9Encoder(const cricket::VideoCodec& codec, + std::unique_ptr<LibvpxInterface> interface, + const FieldTrialsView& trials) + : libvpx_(std::move(interface)), + encoded_image_(), + encoded_complete_callback_(nullptr), + profile_( + ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)), + inited_(false), + timestamp_(0), + rc_max_intra_target_(0), + encoder_(nullptr), + config_(nullptr), + raw_(nullptr), + input_image_(nullptr), + force_key_frame_(true), + pics_since_key_(0), + num_temporal_layers_(0), + num_spatial_layers_(0), + num_active_spatial_layers_(0), + first_active_layer_(0), + layer_deactivation_requires_key_frame_(absl::StartsWith( + trials.Lookup("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation"), + "Enabled")), + is_svc_(false), + inter_layer_pred_(InterLayerPredMode::kOn), + external_ref_control_(false), // Set in InitEncode because of tests. + trusted_rate_controller_( + RateControlSettings::ParseFromKeyValueConfig(&trials) + .LibvpxVp9TrustedRateController()), + layer_buffering_(false), + full_superframe_drop_(true), + first_frame_in_picture_(true), + ss_info_needed_(false), + force_all_active_layers_(false), + num_cores_(0), + is_flexible_mode_(false), + variable_framerate_experiment_(ParseVariableFramerateConfig(trials)), + variable_framerate_controller_( + variable_framerate_experiment_.framerate_limit), + quality_scaler_experiment_(ParseQualityScalerConfig(trials)), + external_ref_ctrl_( + !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"), + "Disabled")), + performance_flags_(ParsePerformanceFlagsFromTrials(trials)), + num_steady_state_frames_(0), + config_changed_(true) { + codec_ = {}; + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); +} + +LibvpxVp9Encoder::~LibvpxVp9Encoder() { + Release(); +} + +void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) { + // Ignored. +} + +int LibvpxVp9Encoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + if (encoder_ != nullptr) { + if (inited_) { + if (libvpx_->codec_destroy(encoder_)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + delete encoder_; + encoder_ = nullptr; + } + if (config_ != nullptr) { + delete config_; + config_ = nullptr; + } + if (raw_ != nullptr) { + libvpx_->img_free(raw_); + raw_ = nullptr; + } + inited_ = false; + return ret_val; +} + +bool LibvpxVp9Encoder::ExplicitlyConfiguredSpatialLayers() const { + // We check target_bitrate_bps of the 0th layer to see if the spatial layers + // (i.e. bitrates) were explicitly configured. + return codec_.spatialLayers[0].targetBitrate > 0; +} + +bool LibvpxVp9Encoder::SetSvcRates( + const VideoBitrateAllocation& bitrate_allocation) { + std::pair<size_t, size_t> current_layers = + GetActiveLayers(current_bitrate_allocation_); + std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation); + + const bool layer_activation_requires_key_frame = + inter_layer_pred_ == InterLayerPredMode::kOff || + inter_layer_pred_ == InterLayerPredMode::kOnKeyPic; + const bool lower_layers_enabled = new_layers.first < current_layers.first; + const bool higher_layers_enabled = new_layers.second > current_layers.second; + const bool disabled_layers = new_layers.first > current_layers.first || + new_layers.second < current_layers.second; + + if (lower_layers_enabled || + (higher_layers_enabled && layer_activation_requires_key_frame) || + (disabled_layers && layer_deactivation_requires_key_frame_)) { + force_key_frame_ = true; + } + + if (current_layers != new_layers) { + ss_info_needed_ = true; + } + + config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps(); + + if (ExplicitlyConfiguredSpatialLayers()) { + for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0); + config_->ss_target_bitrate[sl_idx] = + bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000; + + for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) { + config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] = + bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000; + } + + if (!was_layer_active) { + // Reset frame rate controller if layer is resumed after pause. + framerate_controller_[sl_idx].Reset(); + } + + framerate_controller_[sl_idx].SetTargetRate( + codec_.spatialLayers[sl_idx].maxFramerate); + } + } else { + float rate_ratio[VPX_MAX_LAYERS] = {0}; + float total = 0; + for (int i = 0; i < num_spatial_layers_; ++i) { + if (svc_params_.scaling_factor_num[i] <= 0 || + svc_params_.scaling_factor_den[i] <= 0) { + RTC_LOG(LS_ERROR) << "Scaling factors not specified!"; + return false; + } + rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) / + svc_params_.scaling_factor_den[i]; + total += rate_ratio[i]; + } + + for (int i = 0; i < num_spatial_layers_; ++i) { + RTC_CHECK_GT(total, 0); + config_->ss_target_bitrate[i] = static_cast<unsigned int>( + config_->rc_target_bitrate * rate_ratio[i] / total); + if (num_temporal_layers_ == 1) { + config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i]; + } else if (num_temporal_layers_ == 2) { + config_->layer_target_bitrate[i * num_temporal_layers_] = + config_->ss_target_bitrate[i] * 2 / 3; + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = + config_->ss_target_bitrate[i]; + } else if (num_temporal_layers_ == 3) { + config_->layer_target_bitrate[i * num_temporal_layers_] = + config_->ss_target_bitrate[i] / 2; + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = + config_->layer_target_bitrate[i * num_temporal_layers_] + + (config_->ss_target_bitrate[i] / 4); + config_->layer_target_bitrate[i * num_temporal_layers_ + 2] = + config_->ss_target_bitrate[i]; + } else { + RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: " + << num_temporal_layers_; + return false; + } + + framerate_controller_[i].SetTargetRate(codec_.maxFramerate); + } + } + + num_active_spatial_layers_ = 0; + first_active_layer_ = 0; + bool seen_active_layer = false; + bool expect_no_more_active_layers = false; + for (int i = 0; i < num_spatial_layers_; ++i) { + if (config_->ss_target_bitrate[i] > 0) { + RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is " + "deactivated."; + if (!seen_active_layer) { + first_active_layer_ = i; + } + num_active_spatial_layers_ = i + 1; + seen_active_layer = true; + } else { + expect_no_more_active_layers = seen_active_layer; + } + } + + if (seen_active_layer && performance_flags_.use_per_layer_speed) { + bool denoiser_on = + AllowDenoising() && codec_.VP9()->denoisingOn && + performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1] + .allow_denoising; + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + denoiser_on ? 1 : 0); + } + + if (higher_layers_enabled && !force_key_frame_) { + // Prohibit drop of all layers for the next frame, so newly enabled + // layer would have a valid spatial reference. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = 0; + } + force_all_active_layers_ = true; + } + + if (svc_controller_) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Bitrates in `layer_target_bitrate` are accumulated for each temporal + // layer but in `VideoBitrateAllocation` they should be separated. + int previous_bitrate_kbps = 0; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + int accumulated_bitrate_kbps = + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid]; + int single_layer_bitrate_kbps = + accumulated_bitrate_kbps - previous_bitrate_kbps; + RTC_DCHECK_GE(single_layer_bitrate_kbps, 0); + current_bitrate_allocation_.SetBitrate( + sid, tid, single_layer_bitrate_kbps * 1'000); + previous_bitrate_kbps = accumulated_bitrate_kbps; + } + } + svc_controller_->OnRatesUpdated(current_bitrate_allocation_); + } else { + current_bitrate_allocation_ = bitrate_allocation; + } + config_changed_ = true; + return true; +} + +void LibvpxVp9Encoder::DisableSpatialLayer(int sid) { + RTC_DCHECK_LT(sid, num_spatial_layers_); + if (config_->ss_target_bitrate[sid] == 0) { + return; + } + config_->ss_target_bitrate[sid] = 0; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0; + } + config_changed_ = true; +} + +void LibvpxVp9Encoder::EnableSpatialLayer(int sid) { + RTC_DCHECK_LT(sid, num_spatial_layers_); + if (config_->ss_target_bitrate[sid] > 0) { + return; + } + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = + current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000; + } + config_->ss_target_bitrate[sid] = + current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000; + RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0); + config_changed_ = true; +} + +void LibvpxVp9Encoder::SetActiveSpatialLayers() { + // Svc controller may decide to skip a frame at certain spatial layer even + // when bitrate for it is non-zero, however libvpx uses configured bitrate as + // a signal which layers should be produced. + RTC_DCHECK(svc_controller_); + RTC_DCHECK(!layer_frames_.empty()); + RTC_DCHECK(absl::c_is_sorted( + layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs, + const ScalableVideoController::LayerFrameConfig& rhs) { + return lhs.SpatialId() < rhs.SpatialId(); + })); + + auto frame_it = layer_frames_.begin(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) { + EnableSpatialLayer(sid); + ++frame_it; + } else { + DisableSpatialLayer(sid); + } + } +} + +void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) { + if (!inited_) { + RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized."; + return; + } + if (encoder_->err) { + RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err; + return; + } + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Unsupported framerate: " + << parameters.framerate_fps; + return; + } + + codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5); + + bool res = SetSvcRates(parameters.bitrate); + RTC_DCHECK(res) << "Failed to set new bitrate allocation"; + config_changed_ = true; +} + +// TODO(eladalon): s/inst/codec_settings/g. +int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, + const Settings& settings) { + if (inst == nullptr) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // Allow zero to represent an unspecified maxBitRate + if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (settings.number_of_cores < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->VP9().numberOfTemporalLayers > 3) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // libvpx probably does not support more than 3 spatial layers. + if (inst->VP9().numberOfSpatialLayers > 3) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + absl::optional<vpx_img_fmt_t> previous_img_fmt = + raw_ ? absl::make_optional<vpx_img_fmt_t>(raw_->fmt) : absl::nullopt; + + int ret_val = Release(); + if (ret_val < 0) { + return ret_val; + } + if (encoder_ == nullptr) { + encoder_ = new vpx_codec_ctx_t; + memset(encoder_, 0, sizeof(*encoder_)); + } + if (config_ == nullptr) { + config_ = new vpx_codec_enc_cfg_t; + memset(config_, 0, sizeof(*config_)); + } + timestamp_ = 0; + if (&codec_ != inst) { + codec_ = *inst; + } + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); + + force_key_frame_ = true; + pics_since_key_ = 0; + num_cores_ = settings.number_of_cores; + + scalability_mode_ = inst->GetScalabilityMode(); + if (scalability_mode_.has_value()) { + // Use settings from `ScalabilityMode` identifier. + RTC_LOG(LS_INFO) << "Create scalability structure " + << ScalabilityModeToString(*scalability_mode_); + svc_controller_ = CreateScalabilityStructure(*scalability_mode_); + if (!svc_controller_) { + RTC_LOG(LS_WARNING) << "Failed to create scalability structure."; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + ScalableVideoController::StreamLayersConfig info = + svc_controller_->StreamConfig(); + num_spatial_layers_ = info.num_spatial_layers; + num_temporal_layers_ = info.num_temporal_layers; + inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_); + } else { + num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; + RTC_DCHECK_GT(num_spatial_layers_, 0); + num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; + if (num_temporal_layers_ == 0) { + num_temporal_layers_ = 1; + } + inter_layer_pred_ = inst->VP9().interLayerPred; + auto vp9_scalability = CreateVp9ScalabilityStructure(*inst); + if (vp9_scalability.has_value()) { + std::tie(svc_controller_, scalability_mode_) = + std::move(vp9_scalability.value()); + } else { + svc_controller_ = nullptr; + scalability_mode_ = absl::nullopt; + } + } + + framerate_controller_ = std::vector<FramerateControllerDeprecated>( + num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate)); + + is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1); + + // Populate encoder configuration with default values. + if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE; + unsigned int bits_for_storage = 8; + switch (profile_) { + case VP9Profile::kProfile0: + img_fmt = previous_img_fmt.value_or(VPX_IMG_FMT_I420); + bits_for_storage = 8; + config_->g_bit_depth = VPX_BITS_8; + config_->g_profile = 0; + config_->g_input_bit_depth = 8; + break; + case VP9Profile::kProfile1: + // Encoding of profile 1 is not implemented. It would require extended + // support for I444, I422, and I440 buffers. + RTC_DCHECK_NOTREACHED(); + break; + case VP9Profile::kProfile2: + img_fmt = VPX_IMG_FMT_I42016; + bits_for_storage = 16; + config_->g_bit_depth = VPX_BITS_10; + config_->g_profile = 2; + config_->g_input_bit_depth = 10; + break; + case VP9Profile::kProfile3: + // Encoding of profile 3 is not implemented. + RTC_DCHECK_NOTREACHED(); + break; + } + + // Creating a wrapper to the image - setting image data to nullptr. Actual + // pointer will be set in encode. Setting align to 1, as it is meaningless + // (actual memory is not allocated). + raw_ = libvpx_->img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1, + nullptr); + raw_->bit_depth = bits_for_storage; + + config_->g_w = codec_.width; + config_->g_h = codec_.height; + config_->rc_target_bitrate = inst->startBitrate; // in kbit/s + config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0; + // Setting the time base of the codec. + config_->g_timebase.num = 1; + config_->g_timebase.den = 90000; + config_->g_lag_in_frames = 0; // 0- no frame lagging + config_->g_threads = 1; + // Rate control settings. + config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0; + config_->rc_end_usage = VPX_CBR; + config_->g_pass = VPX_RC_ONE_PASS; + config_->rc_min_quantizer = + codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2; + config_->rc_max_quantizer = 52; + config_->rc_undershoot_pct = 50; + config_->rc_overshoot_pct = 50; + config_->rc_buf_initial_sz = 500; + config_->rc_buf_optimal_sz = 600; + config_->rc_buf_sz = 1000; + // Set the maximum target size of any key-frame. + rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz); + // Key-frame interval is enforced manually by this wrapper. + config_->kf_mode = VPX_KF_DISABLED; + // TODO(webm:1592): work-around for libvpx issue, as it can still + // put some key-frames at will even in VPX_KF_DISABLED kf_mode. + config_->kf_max_dist = inst->VP9().keyFrameInterval; + config_->kf_min_dist = config_->kf_max_dist; + if (quality_scaler_experiment_.enabled) { + // In that experiment webrtc wide quality scaler is used instead of libvpx + // internal scaler. + config_->rc_resize_allowed = 0; + } else { + config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0; + } + // Determine number of threads based on the image size and #cores. + config_->g_threads = + NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores); + + is_flexible_mode_ = inst->VP9().flexibleMode; + + if (num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) { + RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with " + "several spatial layers"; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // External reference control is required for different frame rate on spatial + // layers because libvpx generates rtp incompatible references in this case. + external_ref_control_ = external_ref_ctrl_ || + (num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing) || + inter_layer_pred_ == InterLayerPredMode::kOn; + + if (num_temporal_layers_ == 1) { + gof_.SetGofInfoVP9(kTemporalStructureMode1); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; + config_->ts_number_layers = 1; + config_->ts_rate_decimator[0] = 1; + config_->ts_periodicity = 1; + config_->ts_layer_id[0] = 0; + } else if (num_temporal_layers_ == 2) { + gof_.SetGofInfoVP9(kTemporalStructureMode2); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101; + config_->ts_number_layers = 2; + config_->ts_rate_decimator[0] = 2; + config_->ts_rate_decimator[1] = 1; + config_->ts_periodicity = 2; + config_->ts_layer_id[0] = 0; + config_->ts_layer_id[1] = 1; + } else if (num_temporal_layers_ == 3) { + gof_.SetGofInfoVP9(kTemporalStructureMode3); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212; + config_->ts_number_layers = 3; + config_->ts_rate_decimator[0] = 4; + config_->ts_rate_decimator[1] = 2; + config_->ts_rate_decimator[2] = 1; + config_->ts_periodicity = 4; + config_->ts_layer_id[0] = 0; + config_->ts_layer_id[1] = 2; + config_->ts_layer_id[2] = 1; + config_->ts_layer_id[3] = 2; + } else { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + if (external_ref_control_) { + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing) { + // External reference control for several temporal layers with different + // frame rates on spatial layers is not implemented yet. + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + } + ref_buf_ = {}; + + return InitAndSetControlSettings(inst); +} + +int LibvpxVp9Encoder::NumberOfThreads(int width, + int height, + int number_of_cores) { + // Keep the number of encoder threads equal to the possible number of column + // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS. + if (width * height >= 1280 * 720 && number_of_cores > 4) { + return 4; + } else if (width * height >= 640 * 360 && number_of_cores > 2) { + return 2; + } else { +// Use 2 threads for low res on ARM. +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) + if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } +#endif + // 1 thread less than VGA. + return 1; + } +} + +int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { + // Set QP-min/max per spatial and temporal layer. + int tot_num_layers = num_spatial_layers_ * num_temporal_layers_; + for (int i = 0; i < tot_num_layers; ++i) { + svc_params_.max_quantizers[i] = config_->rc_max_quantizer; + svc_params_.min_quantizers[i] = config_->rc_min_quantizer; + } + config_->ss_number_layers = num_spatial_layers_; + if (svc_controller_) { + auto stream_config = svc_controller_->StreamConfig(); + for (int i = 0; i < stream_config.num_spatial_layers; ++i) { + svc_params_.scaling_factor_num[i] = stream_config.scaling_factor_num[i]; + svc_params_.scaling_factor_den[i] = stream_config.scaling_factor_den[i]; + } + } else if (ExplicitlyConfiguredSpatialLayers()) { + for (int i = 0; i < num_spatial_layers_; ++i) { + const auto& layer = codec_.spatialLayers[i]; + RTC_CHECK_GT(layer.width, 0); + const int scale_factor = codec_.width / layer.width; + RTC_DCHECK_GT(scale_factor, 0); + + // Ensure scaler factor is integer. + if (scale_factor * layer.width != codec_.width) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Ensure scale factor is the same in both dimensions. + if (scale_factor * layer.height != codec_.height) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Ensure scale factor is power of two. + const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0; + if (!is_pow_of_two) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + svc_params_.scaling_factor_num[i] = 1; + svc_params_.scaling_factor_den[i] = scale_factor; + + RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0); + RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate); + if (i > 0) { + // Frame rate of high spatial layer is supposed to be equal or higher + // than frame rate of low spatial layer. + RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate, + codec_.spatialLayers[i - 1].maxFramerate); + } + } + } else { + int scaling_factor_num = 256; + for (int i = num_spatial_layers_ - 1; i >= 0; --i) { + // 1:2 scaling in each dimension. + svc_params_.scaling_factor_num[i] = scaling_factor_num; + svc_params_.scaling_factor_den[i] = 256; + } + } + + UpdatePerformanceFlags(); + RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(), + static_cast<size_t>(num_spatial_layers_)); + + SvcRateAllocator init_allocator(codec_); + current_bitrate_allocation_ = + init_allocator.Allocate(VideoBitrateAllocationParameters( + inst->startBitrate * 1000, inst->maxFramerate)); + if (!SetSvcRates(current_bitrate_allocation_)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + const vpx_codec_err_t rv = libvpx_->codec_enc_init( + encoder_, vpx_codec_vp9_cx(), config_, + config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH); + if (rv != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + if (performance_flags_.use_per_layer_speed) { + for (int si = 0; si < num_spatial_layers_; ++si) { + svc_params_.speed_per_layer[si] = + performance_flags_by_spatial_index_[si].base_layer_speed; + svc_params_.loopfilter_ctrl[si] = + performance_flags_by_spatial_index_[si].deblock_mode; + } + bool denoiser_on = + AllowDenoising() && inst->VP9().denoisingOn && + performance_flags_by_spatial_index_[num_spatial_layers_ - 1] + .allow_denoising; + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + denoiser_on ? 1 : 0); + } + + libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE, + inst->VP9().adaptiveQpMode ? 3 : 0); + + libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0); + + if (is_svc_) { + libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + if (!is_svc_ || !performance_flags_.use_per_layer_speed) { + libvpx_->codec_control( + encoder_, VP8E_SET_CPUUSED, + performance_flags_by_spatial_index_.rbegin()->base_layer_speed); + } + + if (num_spatial_layers_ > 1) { + switch (inter_layer_pred_) { + case InterLayerPredMode::kOn: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0); + break; + case InterLayerPredMode::kOff: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1); + break; + case InterLayerPredMode::kOnKeyPic: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_)); + const bool reverse_constrained_drop_mode = + inter_layer_pred_ == InterLayerPredMode::kOn && + codec_.mode == VideoCodecMode::kScreensharing && + num_spatial_layers_ > 1; + if (reverse_constrained_drop_mode) { + // Screenshare dropping mode: drop a layer only together with all lower + // layers. This ensures that drops on lower layers won't reduce frame-rate + // for higher layers and reference structure is RTP-compatible. + svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP; + svc_drop_frame_.max_consec_drop = 5; + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + // No buffering is needed because the highest layer is always present in + // all frames in CONSTRAINED_FROM_ABOVE drop mode. + layer_buffering_ = false; + } else { + // Configure encoder to drop entire superframe whenever it needs to drop + // a layer. This mode is preferred over per-layer dropping which causes + // quality flickering and is not compatible with RTP non-flexible mode. + svc_drop_frame_.framedrop_mode = + full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP; + // Buffering is needed only for constrained layer drop, as it's not clear + // which frame is the last. + layer_buffering_ = !full_superframe_drop_; + svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max(); + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } + libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + + // Register callback for getting each spatial layer. + vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { + LibvpxVp9Encoder::EncoderOutputCodedPacketCallback, + reinterpret_cast<void*>(this)}; + libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, + reinterpret_cast<void*>(&cbp)); + + // Control function to set the number of column tiles in encoding a frame, in + // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns. + // The number tile columns will be capped by the encoder based on image size + // (minimum width of tile column is 256 pixels, maximum is 4096). + libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS, + static_cast<int>((config_->g_threads >> 1))); + + // Turn on row-based multithreading. + libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1); + + if (AllowDenoising() && !performance_flags_.use_per_layer_speed) { + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + inst->VP9().denoisingOn ? 1 : 0); + } + + if (codec_.mode == VideoCodecMode::kScreensharing) { + // Adjust internal parameters to screen content. + libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1); + } + // Enable encoder skip of static/low content blocks. + libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1); + inited_ = true; + config_changed_ = true; + return WEBRTC_VIDEO_CODEC_OK; +} + +uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) { + // Set max to the optimal buffer level (normalized by target BR), + // and scaled by a scale_par. + // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps]. + // This value is presented in percentage of perFrameBw: + // perFrameBw = targetBR[Kbps] * 1000 / framerate. + // The target in % is as follows: + float scale_par = 0.5; + uint32_t target_pct = + optimal_buffer_size * scale_par * codec_.maxFramerate / 10; + // Don't go below 3 times the per frame bandwidth. + const uint32_t min_intra_size = 300; + return (target_pct < min_intra_size) ? min_intra_size : target_pct; +} + +int LibvpxVp9Encoder::Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (encoded_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (num_active_spatial_layers_ == 0) { + // All spatial layers are disabled, return without encoding anything. + return WEBRTC_VIDEO_CODEC_OK; + } + + // We only support one stream at the moment. + if (frame_types && !frame_types->empty()) { + if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) { + force_key_frame_ = true; + } + } + + if (pics_since_key_ + 1 == + static_cast<size_t>(codec_.VP9()->keyFrameInterval)) { + force_key_frame_ = true; + } + + if (svc_controller_) { + layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_); + if (layer_frames_.empty()) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + if (layer_frames_.front().IsKeyframe()) { + force_key_frame_ = true; + } + } + + vpx_svc_layer_id_t layer_id = {0}; + if (!force_key_frame_) { + const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; + layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; + + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint32_t frame_timestamp_ms = + 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; + + // To ensure that several rate-limiters with different limits don't + // interfere, they must be queried in order of increasing limit. + + bool use_steady_state_limiter = + variable_framerate_experiment_.enabled && + input_image.update_rect().IsEmpty() && + num_steady_state_frames_ >= + variable_framerate_experiment_.frames_before_steady_state; + + // Need to check all frame limiters, even if lower layers are disabled, + // because variable frame-rate limiter should be checked after the first + // layer. It's easier to overwrite active layers after, then check all + // cases. + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + const float layer_fps = + framerate_controller_[layer_id.spatial_layer_id].GetTargetRate(); + // Use steady state rate-limiter at the correct place. + if (use_steady_state_limiter && + layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) { + if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) { + layer_id.spatial_layer_id = num_active_spatial_layers_; + } + // Break always: if rate limiter triggered frame drop, no need to + // continue; otherwise, the rate is less than the next limiters. + break; + } + if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { + ++layer_id.spatial_layer_id; + } else { + break; + } + } + + if (use_steady_state_limiter && + layer_id.spatial_layer_id < num_active_spatial_layers_) { + variable_framerate_controller_.AddFrame(frame_timestamp_ms); + } + } + + if (force_all_active_layers_) { + layer_id.spatial_layer_id = first_active_layer_; + force_all_active_layers_ = false; + } + + RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); + if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { + // Drop entire picture. + return WEBRTC_VIDEO_CODEC_OK; + } + } + + // Need to set temporal layer id on ALL layers, even disabled ones. + // Otherwise libvpx might produce frames on a disabled layer: + // http://crbug.com/1051476 + for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id; + } + + if (layer_id.spatial_layer_id < first_active_layer_) { + layer_id.spatial_layer_id = first_active_layer_; + } + + if (svc_controller_) { + layer_id.spatial_layer_id = layer_frames_.front().SpatialId(); + layer_id.temporal_layer_id = layer_frames_.front().TemporalId(); + for (const auto& layer : layer_frames_) { + layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] = + layer.TemporalId(); + } + SetActiveSpatialLayers(); + } + + if (is_svc_ && performance_flags_.use_per_layer_speed) { + // Update speed settings that might depend on temporal index. + bool speed_updated = false; + for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const int target_speed = + layer_id.temporal_layer_id_per_spatial[sl_idx] == 0 + ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed + : performance_flags_by_spatial_index_[sl_idx].high_layer_speed; + if (svc_params_.speed_per_layer[sl_idx] != target_speed) { + svc_params_.speed_per_layer[sl_idx] = target_speed; + speed_updated = true; + } + } + if (speed_updated) { + libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + } + + libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); + + if (num_spatial_layers_ > 1) { + // Update frame dropping settings as they may change on per-frame basis. + libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + + if (config_changed_) { + if (libvpx_->codec_enc_config_set(encoder_, config_)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (!performance_flags_.use_per_layer_speed) { + // Not setting individual speeds per layer, find the highest active + // resolution instead and base the speed on that. + for (int i = num_spatial_layers_ - 1; i >= 0; --i) { + if (config_->ss_target_bitrate[i] > 0) { + int width = (svc_params_.scaling_factor_num[i] * config_->g_w) / + svc_params_.scaling_factor_den[i]; + int height = (svc_params_.scaling_factor_num[i] * config_->g_h) / + svc_params_.scaling_factor_den[i]; + int speed = + std::prev(performance_flags_.settings_by_resolution.lower_bound( + width * height)) + ->second.base_layer_speed; + libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed); + break; + } + } + } + config_changed_ = false; + } + + if (input_image.width() != codec_.width || + input_image.height() != codec_.height) { + int ret = UpdateCodecFrameSize(input_image); + if (ret < 0) { + return ret; + } + } + + RTC_DCHECK_EQ(input_image.width(), raw_->d_w); + RTC_DCHECK_EQ(input_image.height(), raw_->d_h); + + // Set input image for use in the callback. + // This was necessary since you need some information from input_image. + // You can save only the necessary information (such as timestamp) instead of + // doing this. + input_image_ = &input_image; + + // In case we need to map the buffer, `mapped_buffer` is used to keep it alive + // through reference counting until after encoding has finished. + rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer; + const I010BufferInterface* i010_buffer; + rtc::scoped_refptr<const I010BufferInterface> i010_copy; + switch (profile_) { + case VP9Profile::kProfile0: { + mapped_buffer = + PrepareBufferForProfile0(input_image.video_frame_buffer()); + if (!mapped_buffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + break; + } + case VP9Profile::kProfile1: { + RTC_DCHECK_NOTREACHED(); + break; + } + case VP9Profile::kProfile2: { + // We can inject kI010 frames directly for encode. All other formats + // should be converted to it. + switch (input_image.video_frame_buffer()->type()) { + case VideoFrameBuffer::Type::kI010: { + i010_buffer = input_image.video_frame_buffer()->GetI010(); + break; + } + default: { + auto i420_buffer = input_image.video_frame_buffer()->ToI420(); + if (!i420_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_image.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + i010_copy = I010Buffer::Copy(*i420_buffer); + i010_buffer = i010_copy.get(); + } + } + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataY())); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataU())); + raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataV())); + raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2; + raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2; + raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2; + break; + } + case VP9Profile::kProfile3: { + RTC_DCHECK_NOTREACHED(); + break; + } + } + + vpx_enc_frame_flags_t flags = 0; + if (force_key_frame_) { + flags = VPX_EFLAG_FORCE_KF; + } + + if (svc_controller_) { + vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_config); + } else if (external_ref_control_) { + vpx_svc_ref_frame_config_t ref_config = + SetReferences(force_key_frame_, layer_id.spatial_layer_id); + + if (VideoCodecMode::kScreensharing == codec_.mode) { + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + ref_config.duration[sl_idx] = static_cast<int64_t>( + 90000 / (std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[sl_idx].GetTargetRate()))); + } + } + + libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_config); + } + + first_frame_in_picture_ = true; + + // TODO(ssilkin): Frame duration should be specified per spatial layer + // since their frame rate can be different. For now calculate frame duration + // based on target frame rate of the highest spatial layer, which frame rate + // is supposed to be equal or higher than frame rate of low spatial layers. + // Also, timestamp should represent actual time passed since previous frame + // (not 'expected' time). Then rate controller can drain buffer more + // accurately. + RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_); + float target_framerate_fps = + (codec_.mode == VideoCodecMode::kScreensharing) + ? std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[num_active_spatial_layers_ - 1] + .GetTargetRate()) + : codec_.maxFramerate; + uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps); + const vpx_codec_err_t rv = libvpx_->codec_encode( + encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME); + if (rv != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv) + << "\n" + "Details: " + << libvpx_->codec_error(encoder_) << "\n" + << libvpx_->codec_error_detail(encoder_); + return WEBRTC_VIDEO_CODEC_ERROR; + } + timestamp_ += duration; + + if (layer_buffering_) { + const bool end_of_picture = true; + DeliverBufferedFrame(end_of_picture); + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Encoder::UpdateCodecFrameSize( + const VideoFrame& input_image) { + RTC_LOG(LS_INFO) << "Reconfiging VP from " << + codec_.width << "x" << codec_.height << " to " << + input_image.width() << "x" << input_image.height(); + // Preserve latest bitrate/framerate setting + // TODO: Mozilla - see below, we need to save more state here. + //uint32_t old_bitrate_kbit = config_->rc_target_bitrate; + //uint32_t old_framerate = codec_.maxFramerate; + + codec_.width = input_image.width(); + codec_.height = input_image.height(); + + vpx_img_free(raw_); + raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, + 1, NULL); + // Update encoder context for new frame size. + config_->g_w = codec_.width; + config_->g_h = codec_.height; + + // Determine number of threads based on the image size and #cores. + config_->g_threads = NumberOfThreads(codec_.width, codec_.height, + num_cores_); + + // NOTE: We would like to do this the same way vp8 does it + // (with vpx_codec_enc_config_set()), but that causes asserts + // in AQ 3 (cyclic); and in AQ 0 it works, but on a resize to smaller + // than 1/2 x 1/2 original it asserts in convolve(). Given these + // bugs in trying to do it the "right" way, we basically re-do + // the initialization. + vpx_codec_destroy(encoder_); // clean up old state + int result = InitAndSetControlSettings(&codec_); + if (result == WEBRTC_VIDEO_CODEC_OK) { + // TODO: Mozilla rates have become much more complicated, we need to store + // more state or find another way of doing this. + //return SetRates(old_bitrate_kbit, old_framerate); + RTC_CHECK(false); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + return result; +} + +bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + absl::optional<int>* spatial_idx, + absl::optional<int>* temporal_idx, + const vpx_codec_cx_pkt& pkt) { + RTC_CHECK(codec_specific != nullptr); + codec_specific->codecType = kVideoCodecVP9; + CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9); + + vp9_info->first_frame_in_picture = first_frame_in_picture_; + vp9_info->flexible_mode = is_flexible_mode_; + + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { + pics_since_key_ = 0; + } else if (first_frame_in_picture_) { + ++pics_since_key_; + } + + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + // Can't have keyframe with non-zero temporal layer. + RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0); + + RTC_CHECK_GT(num_temporal_layers_, 0); + RTC_CHECK_GT(num_active_spatial_layers_, 0); + if (num_temporal_layers_ == 1) { + RTC_CHECK_EQ(layer_id.temporal_layer_id, 0); + vp9_info->temporal_idx = kNoTemporalIdx; + *temporal_idx = absl::nullopt; + } else { + vp9_info->temporal_idx = layer_id.temporal_layer_id; + *temporal_idx = layer_id.temporal_layer_id; + } + if (num_active_spatial_layers_ == 1) { + RTC_CHECK_EQ(layer_id.spatial_layer_id, 0); + *spatial_idx = absl::nullopt; + } else { + *spatial_idx = layer_id.spatial_layer_id; + } + + const bool is_key_pic = (pics_since_key_ == 0); + const bool is_inter_layer_pred_allowed = + (inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic)); + + // Always set inter_layer_predicted to true on high layer frame if inter-layer + // prediction (ILP) is allowed even if encoder didn't actually use it. + // Setting inter_layer_predicted to false would allow receiver to decode high + // layer frame without decoding low layer frame. If that would happen (e.g. + // if low layer frame is lost) then receiver won't be able to decode next high + // layer frame which uses ILP. + vp9_info->inter_layer_predicted = + first_frame_in_picture_ ? false : is_inter_layer_pred_allowed; + + // Mark all low spatial layer frames as references (not just frames of + // active low spatial layers) if inter-layer prediction is enabled since + // these frames are indirect references of high spatial layer, which can + // later be enabled without key frame. + vp9_info->non_ref_for_inter_layer_pred = + !is_inter_layer_pred_allowed || + layer_id.spatial_layer_id + 1 == num_spatial_layers_; + + // Always populate this, so that the packetizer can properly set the marker + // bit. + vp9_info->num_spatial_layers = num_active_spatial_layers_; + vp9_info->first_active_layer = first_active_layer_; + + vp9_info->num_ref_pics = 0; + FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted, + vp9_info); + if (vp9_info->flexible_mode) { + vp9_info->gof_idx = kNoGofIdx; + if (!svc_controller_) { + if (num_temporal_layers_ == 1) { + vp9_info->temporal_up_switch = true; + } else { + // In flexible mode with > 1 temporal layer but no SVC controller we + // can't techincally determine if a frame is an upswitch point, use + // gof-based data as proxy for now. + // TODO(sprang): Remove once SVC controller is the only choice. + vp9_info->gof_idx = + static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); + vp9_info->temporal_up_switch = + gof_.temporal_up_switch[vp9_info->gof_idx]; + } + } + } else { + vp9_info->gof_idx = + static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); + vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx]; + RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] || + vp9_info->num_ref_pics == 0); + } + + vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0); + + // Write SS on key frame of independently coded spatial layers and on base + // temporal/spatial layer frame if number of layers changed without issuing + // of key picture (inter-layer prediction is enabled). + const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted; + if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 && + layer_id.spatial_layer_id == first_active_layer_)) { + vp9_info->ss_data_available = true; + vp9_info->spatial_layer_resolution_present = true; + // Signal disabled layers. + for (size_t i = 0; i < first_active_layer_; ++i) { + vp9_info->width[i] = 0; + vp9_info->height[i] = 0; + } + for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) { + vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] / + svc_params_.scaling_factor_den[i]; + vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] / + svc_params_.scaling_factor_den[i]; + } + if (vp9_info->flexible_mode) { + vp9_info->gof.num_frames_in_gof = 0; + } else { + vp9_info->gof.CopyGofInfoVP9(gof_); + } + + ss_info_needed_ = false; + } else { + vp9_info->ss_data_available = false; + } + + first_frame_in_picture_ = false; + + // Populate codec-agnostic section in the codec specific structure. + if (svc_controller_) { + auto it = absl::c_find_if( + layer_frames_, + [&](const ScalableVideoController::LayerFrameConfig& config) { + return config.SpatialId() == layer_id.spatial_layer_id; + }); + if (it == layer_frames_.end()) { + RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S" + << layer_id.spatial_layer_id << "T" + << layer_id.temporal_layer_id + << " that wasn't requested."; + return false; + } + codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it); + if (is_key_frame) { + codec_specific->template_structure = + svc_controller_->DependencyStructure(); + auto& resolutions = codec_specific->template_structure->resolutions; + resolutions.resize(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + resolutions[sid] = RenderResolution( + /*width=*/codec_.width * svc_params_.scaling_factor_num[sid] / + svc_params_.scaling_factor_den[sid], + /*height=*/codec_.height * svc_params_.scaling_factor_num[sid] / + svc_params_.scaling_factor_den[sid]); + } + } + if (is_flexible_mode_) { + // Populate data for legacy temporal-upswitch state. + // We can switch up to a higher temporal layer only if all temporal layers + // higher than this (within the current spatial layer) are switch points. + vp9_info->temporal_up_switch = true; + for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_; + ++i) { + // Assumes decode targets are always ordered first by spatial then by + // temporal id. + size_t dti_index = + (layer_id.spatial_layer_id * num_temporal_layers_) + i; + vp9_info->temporal_up_switch &= + (codec_specific->generic_frame_info + ->decode_target_indications[dti_index] == + DecodeTargetIndication::kSwitch); + } + } + } + codec_specific->scalability_mode = scalability_mode_; + return true; +} + +void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, + const size_t pic_num, + const bool inter_layer_predicted, + CodecSpecificInfoVP9* vp9_info) { + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + const bool is_key_frame = + (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + + std::vector<RefFrameBuffer> ref_buf_list; + + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, + &enc_layer_conf); + char ref_buf_flags[] = "00000000"; + // There should be one character per buffer + 1 termination '\0'. + static_assert(sizeof(ref_buf_flags) == kNumVp9Buffers + 1); + + if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags[fb_idx] = '1'; + } + } + + if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags[fb_idx] = '1'; + } + } + + if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags[fb_idx] = '1'; + } + } + + RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " + << layer_id.spatial_layer_id << " tl " + << layer_id.temporal_layer_id << " refered buffers " + << ref_buf_flags; + + } else if (!is_key_frame) { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-SVC mode encoder doesn't provide reference list. Assume each frame + // refers previous one, which is stored in buffer 0. + ref_buf_list.push_back(ref_buf_[0]); + } + + std::vector<size_t> ref_pid_list; + + vp9_info->num_ref_pics = 0; + for (const RefFrameBuffer& ref_buf : ref_buf_list) { + RTC_DCHECK_LE(ref_buf.pic_num, pic_num); + if (ref_buf.pic_num < pic_num) { + if (inter_layer_pred_ != InterLayerPredMode::kOn) { + // RTP spec limits temporal prediction to the same spatial layer. + // It is safe to ignore this requirement if inter-layer prediction is + // enabled for all frames when all base frames are relayed to receiver. + RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); + } else { + RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); + } + RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id); + + // Encoder may reference several spatial layers on the same previous + // frame in case if some spatial layers are skipped on the current frame. + // We shouldn't put duplicate references as it may break some old + // clients and isn't RTP compatible. + if (std::find(ref_pid_list.begin(), ref_pid_list.end(), + ref_buf.pic_num) != ref_pid_list.end()) { + continue; + } + ref_pid_list.push_back(ref_buf.pic_num); + + const size_t p_diff = pic_num - ref_buf.pic_num; + RTC_DCHECK_LE(p_diff, 127UL); + + vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff); + ++vp9_info->num_ref_pics; + } else { + RTC_DCHECK(inter_layer_predicted); + // RTP spec only allows to use previous spatial layer for inter-layer + // prediction. + RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id); + } + } +} + +void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, + const size_t pic_num) { + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + RefFrameBuffer frame_buf = {.pic_num = pic_num, + .spatial_layer_id = layer_id.spatial_layer_id, + .temporal_layer_id = layer_id.temporal_layer_id}; + + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, + &enc_layer_conf); + const int update_buffer_slot = + enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id]; + + for (size_t i = 0; i < ref_buf_.size(); ++i) { + if (update_buffer_slot & (1 << i)) { + ref_buf_[i] = frame_buf; + } + } + + RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " + << layer_id.spatial_layer_id << " tl " + << layer_id.temporal_layer_id << " updated buffers " + << (update_buffer_slot & (1 << 0) ? 1 : 0) + << (update_buffer_slot & (1 << 1) ? 1 : 0) + << (update_buffer_slot & (1 << 2) ? 1 : 0) + << (update_buffer_slot & (1 << 3) ? 1 : 0) + << (update_buffer_slot & (1 << 4) ? 1 : 0) + << (update_buffer_slot & (1 << 5) ? 1 : 0) + << (update_buffer_slot & (1 << 6) ? 1 : 0) + << (update_buffer_slot & (1 << 7) ? 1 : 0); + } else { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-svc mode encoder doesn't provide reference list. Assume each frame + // is reference and stored in buffer 0. + ref_buf_[0] = frame_buf; + } +} + +vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences( + bool is_key_pic, + int first_active_spatial_layer_id) { + // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs. + RTC_DCHECK_LE(gof_.num_frames_in_gof, 4); + + vpx_svc_ref_frame_config_t ref_config; + memset(&ref_config, 0, sizeof(ref_config)); + + const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1); + const bool is_inter_layer_pred_allowed = + inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic); + absl::optional<int> last_updated_buf_idx; + + // Put temporal reference to LAST and spatial reference to GOLDEN. Update + // frame buffer (i.e. store encoded frame) if current frame is a temporal + // reference (i.e. it belongs to a low temporal layer) or it is a spatial + // reference. In later case, always store spatial reference in the last + // reference frame buffer. + // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers + // for temporal references plus 1 buffer for spatial reference. 7 buffers + // in total. + + for (int sl_idx = first_active_spatial_layer_id; + sl_idx < num_active_spatial_layers_; ++sl_idx) { + const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1; + const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof; + + if (!is_key_pic) { + // Set up temporal reference. + const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx]; + + // Last reference frame buffer is reserved for spatial reference. It is + // not supposed to be used for temporal prediction. + RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1); + + const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num; + // Incorrect spatial layer may be in the buffer due to a key-frame. + const bool same_spatial_layer = + ref_buf_[buf_idx].spatial_layer_id == sl_idx; + bool correct_pid = false; + if (is_flexible_mode_) { + correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff; + } else { + // Below code assumes single temporal referecence. + RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1); + correct_pid = pid_diff == gof_.pid_diff[gof_idx][0]; + } + + if (same_spatial_layer && correct_pid) { + ref_config.lst_fb_idx[sl_idx] = buf_idx; + ref_config.reference_last[sl_idx] = 1; + } else { + // This reference doesn't match with one specified by GOF. This can + // only happen if spatial layer is enabled dynamically without key + // frame. Spatial prediction is supposed to be enabled in this case. + RTC_DCHECK(is_inter_layer_pred_allowed && + sl_idx > first_active_spatial_layer_id); + } + } + + if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) { + // Set up spatial reference. + RTC_DCHECK(last_updated_buf_idx); + ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx; + ref_config.reference_golden[sl_idx] = 1; + } else { + RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || + sl_idx == first_active_spatial_layer_id || + inter_layer_pred_ == InterLayerPredMode::kOff); + } + + last_updated_buf_idx.reset(); + + if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 || + num_temporal_layers_ == 1) { + last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx]; + + // Ensure last frame buffer is not used for temporal prediction (it is + // reserved for spatial reference). + RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1); + } else if (is_inter_layer_pred_allowed) { + last_updated_buf_idx = kNumVp9Buffers - 1; + } + + if (last_updated_buf_idx) { + ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx; + } + } + + return ref_config; +} + +void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { + RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT); + + if (pkt->data.frame.sz == 0) { + // Ignore dropped frame. + return; + } + + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + if (layer_buffering_) { + // Deliver buffered low spatial layer frame. + const bool end_of_picture = false; + DeliverBufferedFrame(end_of_picture); + } + + encoded_image_.SetEncodedData(EncodedImageBuffer::Create( + static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz)); + + codec_specific_ = {}; + absl::optional<int> spatial_index; + absl::optional<int> temporal_index; + if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index, + *pkt)) { + // Drop the frame. + encoded_image_.set_size(0); + return; + } + encoded_image_.SetSpatialIndex(spatial_index); + encoded_image_.SetTemporalIndex(temporal_index); + + const bool is_key_frame = + ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) && + !codec_specific_.codecSpecific.VP9.inter_layer_predicted; + + // Ensure encoder issued key frame on request. + RTC_DCHECK(is_key_frame || !force_key_frame_); + + // Check if encoded frame is a key frame. + encoded_image_._frameType = VideoFrameType::kVideoFrameDelta; + if (is_key_frame) { + encoded_image_._frameType = VideoFrameType::kVideoFrameKey; + force_key_frame_ = false; + } + + UpdateReferenceBuffers(*pkt, pics_since_key_); + + TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size()); + encoded_image_.SetTimestamp(input_image_->timestamp()); + encoded_image_.SetColorSpace(input_image_->color_space()); + encoded_image_._encodedHeight = + pkt->data.frame.height[layer_id.spatial_layer_id]; + encoded_image_._encodedWidth = + pkt->data.frame.width[layer_id.spatial_layer_id]; + int qp = -1; + libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp); + encoded_image_.qp_ = qp; + + if (!layer_buffering_) { + const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 == + num_active_spatial_layers_; + DeliverBufferedFrame(end_of_picture); + } +} + +void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) { + if (encoded_image_.size() > 0) { + if (num_spatial_layers_ > 1) { + // Restore frame dropping settings, as dropping may be temporary forbidden + // due to dynamically enabled layers. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } + + codec_specific_.end_of_picture = end_of_picture; + + encoded_complete_callback_->OnEncodedImage(encoded_image_, + &codec_specific_); + + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0); + const uint32_t frame_timestamp_ms = + 1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency; + framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms); + + const size_t steady_state_size = SteadyStateSize( + spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx); + + // Only frames on spatial layers, which may be limited in a steady state + // are considered for steady state detection. + if (framerate_controller_[spatial_idx].GetTargetRate() > + variable_framerate_experiment_.framerate_limit + 1e-9) { + if (encoded_image_.qp_ <= + variable_framerate_experiment_.steady_state_qp && + encoded_image_.size() <= steady_state_size) { + ++num_steady_state_frames_; + } else { + num_steady_state_frames_ = 0; + } + } + } + encoded_image_.set_size(0); + } +} + +int LibvpxVp9Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "libvpx"; + if (quality_scaler_experiment_.enabled && inited_ && + codec_.VP9().automaticResizeOn) { + info.scaling_settings = VideoEncoder::ScalingSettings( + quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp); + } else { + info.scaling_settings = VideoEncoder::ScalingSettings::kOff; + } + info.has_trusted_rate_controller = trusted_rate_controller_; + info.is_hardware_accelerated = false; + if (inited_) { + // Find the max configured fps of any active spatial layer. + float max_fps = 0.0; + for (size_t si = 0; si < num_spatial_layers_; ++si) { + if (codec_.spatialLayers[si].active && + codec_.spatialLayers[si].maxFramerate > max_fps) { + max_fps = codec_.spatialLayers[si].maxFramerate; + } + } + + for (size_t si = 0; si < num_spatial_layers_; ++si) { + info.fps_allocation[si].clear(); + if (!codec_.spatialLayers[si].active) { + continue; + } + + // This spatial layer may already use a fraction of the total frame rate. + const float sl_fps_fraction = + codec_.spatialLayers[si].maxFramerate / max_fps; + for (size_t ti = 0; ti < num_temporal_layers_; ++ti) { + const uint32_t decimator = + num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti]; + RTC_DCHECK_GT(decimator, 0); + info.fps_allocation[si].push_back( + rtc::saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction * + (sl_fps_fraction / decimator))); + } + } + if (profile_ == VP9Profile::kProfile0) { + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + } + } + if (!encoder_info_override_.resolution_bitrate_limits().empty()) { + info.resolution_bitrate_limits = + encoder_info_override_.resolution_bitrate_limits(); + } + return info; +} + +size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) { + const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate( + sid, tid == kNoTemporalIdx ? 0 : tid); + const float fps = (codec_.mode == VideoCodecMode::kScreensharing) + ? std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[sid].GetTargetRate()) + : codec_.maxFramerate; + return static_cast<size_t>( + bitrate_bps / (8 * fps) * + (100 - + variable_framerate_experiment_.steady_state_undershoot_percentage) / + 100 + + 0.5); +} + +// static +LibvpxVp9Encoder::VariableFramerateExperiment +LibvpxVp9Encoder::ParseVariableFramerateConfig(const FieldTrialsView& trials) { + FieldTrialFlag enabled = FieldTrialFlag("Enabled"); + FieldTrialParameter<double> framerate_limit("min_fps", 5.0); + FieldTrialParameter<int> qp("min_qp", 32); + FieldTrialParameter<int> undershoot_percentage("undershoot", 30); + FieldTrialParameter<int> frames_before_steady_state( + "frames_before_steady_state", 5); + ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage, + &frames_before_steady_state}, + trials.Lookup("WebRTC-VP9VariableFramerateScreenshare")); + VariableFramerateExperiment config; + config.enabled = enabled.Get(); + config.framerate_limit = framerate_limit.Get(); + config.steady_state_qp = qp.Get(); + config.steady_state_undershoot_percentage = undershoot_percentage.Get(); + config.frames_before_steady_state = frames_before_steady_state.Get(); + + return config; +} + +// static +LibvpxVp9Encoder::QualityScalerExperiment +LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) { + FieldTrialFlag disabled = FieldTrialFlag("Disabled"); + FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold); + FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold); + ParseFieldTrial({&disabled, &low_qp, &high_qp}, + trials.Lookup("WebRTC-VP9QualityScaler")); + QualityScalerExperiment config; + config.enabled = !disabled.Get(); + RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is " + << (config.enabled ? "enabled." : "disabled"); + config.low_qp = low_qp.Get(); + config.high_qp = high_qp.Get(); + + return config; +} + +void LibvpxVp9Encoder::UpdatePerformanceFlags() { + flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution; + if (codec_.GetVideoEncoderComplexity() == + VideoCodecComplexity::kComplexityLow) { + // For low tier devices, always use speed 9. Only disable upper + // layer deblocking below QCIF. + params_by_resolution[0] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 1, + .allow_denoising = true}; + params_by_resolution[352 * 288] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 0, + .allow_denoising = true}; + } else { + params_by_resolution = performance_flags_.settings_by_resolution; + } + + const auto find_speed = [&](int min_pixel_count) { + RTC_DCHECK(!params_by_resolution.empty()); + auto it = params_by_resolution.upper_bound(min_pixel_count); + return std::prev(it)->second; + }; + performance_flags_by_spatial_index_.clear(); + + if (is_svc_) { + for (int si = 0; si < num_spatial_layers_; ++si) { + performance_flags_by_spatial_index_.push_back(find_speed( + codec_.spatialLayers[si].width * codec_.spatialLayers[si].height)); + } + } else { + performance_flags_by_spatial_index_.push_back( + find_speed(codec_.width * codec_.height)); + } +} + +// static +LibvpxVp9Encoder::PerformanceFlags +LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials( + const FieldTrialsView& trials) { + struct Params : public PerformanceFlags::ParameterSet { + int min_pixel_count = 0; + }; + + FieldTrialStructList<Params> trials_list( + {FieldTrialStructMember("min_pixel_count", + [](Params* p) { return &p->min_pixel_count; }), + FieldTrialStructMember("high_layer_speed", + [](Params* p) { return &p->high_layer_speed; }), + FieldTrialStructMember("base_layer_speed", + [](Params* p) { return &p->base_layer_speed; }), + FieldTrialStructMember("deblock_mode", + [](Params* p) { return &p->deblock_mode; }), + FieldTrialStructMember("denoiser", + [](Params* p) { return &p->allow_denoising; })}, + {}); + + FieldTrialFlag per_layer_speed("use_per_layer_speed"); + + ParseFieldTrial({&trials_list, &per_layer_speed}, + trials.Lookup("WebRTC-VP9-PerformanceFlags")); + + PerformanceFlags flags; + flags.use_per_layer_speed = per_layer_speed.Get(); + + constexpr int kMinSpeed = 1; + constexpr int kMaxSpeed = 9; + for (auto& f : trials_list.Get()) { + if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed || + f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed || + f.deblock_mode < 0 || f.deblock_mode > 2) { + RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: " + << "min_pixel_count = " << f.min_pixel_count + << ", high_layer_speed = " << f.high_layer_speed + << ", base_layer_speed = " << f.base_layer_speed + << ", deblock_mode = " << f.deblock_mode; + continue; + } + flags.settings_by_resolution[f.min_pixel_count] = f; + } + + if (flags.settings_by_resolution.empty()) { + return GetDefaultPerformanceFlags(); + } + + return flags; +} + +// static +LibvpxVp9Encoder::PerformanceFlags +LibvpxVp9Encoder::GetDefaultPerformanceFlags() { + PerformanceFlags flags; + flags.use_per_layer_speed = true; +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID) + // Speed 8 on all layers for all resolutions. + flags.settings_by_resolution[0] = {.base_layer_speed = 8, + .high_layer_speed = 8, + .deblock_mode = 0, + .allow_denoising = true}; +#else + + // For smaller resolutions, use lower speed setting for the temporal base + // layer (get some coding gain at the cost of increased encoding complexity). + // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and + // disable deblocking for upper-most temporal layers. + flags.settings_by_resolution[0] = {.base_layer_speed = 5, + .high_layer_speed = 8, + .deblock_mode = 1, + .allow_denoising = true}; + + // Use speed 7 for QCIF and above. + // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and + // enable deblocking for all temporal layers. + flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7, + .high_layer_speed = 8, + .deblock_mode = 0, + .allow_denoising = true}; + + // For very high resolution (1080p and up), turn the speed all the way up + // since this is very CPU intensive. Also disable denoising to save CPU, at + // these resolutions denoising appear less effective and hopefully you also + // have a less noisy video source at this point. + flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 0, + .allow_denoising = false}; + +#endif + return flags; +} + +void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) { + if (!raw_) { + raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1, + nullptr); + } else if (raw_->fmt != fmt) { + RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to " + << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420"); + libvpx_->img_free(raw_); + raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1, + nullptr); + } + // else no-op since the image is already in the right format. +} + +rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0( + rtc::scoped_refptr<VideoFrameBuffer> buffer) { + absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> + supported_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + + rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer; + if (buffer->type() != VideoFrameBuffer::Type::kNative) { + // `buffer` is already mapped. + mapped_buffer = buffer; + } else { + // Attempt to map to one of the supported formats. + mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats); + } + if (!mapped_buffer || + (absl::c_find(supported_formats, mapped_buffer->type()) == + supported_formats.end() && + mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) { + // Unknown pixel format or unable to map, convert to I420 and prepare that + // buffer instead to ensure Scale() is safe to use. + auto converted_buffer = buffer->ToI420(); + if (!converted_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString(buffer->type()) + << " image to I420. Can't encode frame."; + return {}; + } + RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 || + converted_buffer->type() == VideoFrameBuffer::Type::kI420A); + + // Because `buffer` had to be converted, use `converted_buffer` instead. + buffer = mapped_buffer = converted_buffer; + } + + // Prepare `raw_` from `mapped_buffer`. + switch (mapped_buffer->type()) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI420A: { + MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420); + const I420BufferInterface* i420_buffer = mapped_buffer->GetI420(); + RTC_DCHECK(i420_buffer); + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY()); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU()); + raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV()); + raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY(); + raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU(); + raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV(); + break; + } + case VideoFrameBuffer::Type::kNV12: { + MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12); + const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12(); + RTC_DCHECK(nv12_buffer); + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY()); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV()); + raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1; + raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY(); + raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV(); + raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV(); + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + return mapped_buffer; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h new file mode 100644 index 0000000000..bb871f8498 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_ + +#ifdef RTC_ENABLE_VP9 + +#include <array> +#include <memory> +#include <vector> + +#include "api/fec_controller_override.h" +#include "api/field_trials_view.h" +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp9_profile.h" +#include "common_video/include/video_frame_buffer_pool.h" +#include "modules/video_coding/codecs/interface/libvpx_interface.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/utility/framerate_controller_deprecated.h" +#include "rtc_base/containers/flat_map.h" +#include "rtc_base/experiments/encoder_info_settings.h" +#include "vpx/vp8cx.h" + +namespace webrtc { + +class LibvpxVp9Encoder : public VP9Encoder { + public: + LibvpxVp9Encoder(const cricket::VideoCodec& codec, + std::unique_ptr<LibvpxInterface> interface, + const FieldTrialsView& trials); + + ~LibvpxVp9Encoder() override; + + void SetFecControllerOverride( + FecControllerOverride* fec_controller_override) override; + + int Release() override; + + int InitEncode(const VideoCodec* codec_settings, + const Settings& settings) override; + + int Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) override; + + int RegisterEncodeCompleteCallback(EncodedImageCallback* callback) override; + + void SetRates(const RateControlParameters& parameters) override; + + EncoderInfo GetEncoderInfo() const override; + + private: + // Determine number of encoder threads to use. + int NumberOfThreads(int width, int height, int number_of_cores); + + // Call encoder initialize function and set control settings. + int InitAndSetControlSettings(const VideoCodec* inst); + + // Update frame size for codec. + int UpdateCodecFrameSize(const VideoFrame& input_image); + + bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + absl::optional<int>* spatial_idx, + absl::optional<int>* temporal_idx, + const vpx_codec_cx_pkt& pkt); + void FillReferenceIndices(const vpx_codec_cx_pkt& pkt, + size_t pic_num, + bool inter_layer_predicted, + CodecSpecificInfoVP9* vp9_info); + void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, size_t pic_num); + vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic, + int first_active_spatial_layer_id); + + bool ExplicitlyConfiguredSpatialLayers() const; + bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation); + + // Configures which spatial layers libvpx should encode according to + // configuration provided by svc_controller_. + void EnableSpatialLayer(int sid); + void DisableSpatialLayer(int sid); + void SetActiveSpatialLayers(); + + void GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt); + + // Callback function for outputting packets per spatial layer. + static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, + void* user_data); + + void DeliverBufferedFrame(bool end_of_picture); + + bool DropFrame(uint8_t spatial_idx, uint32_t rtp_timestamp); + + // Determine maximum target for Intra frames + // + // Input: + // - optimal_buffer_size : Optimal buffer size + // Return Value : Max target size for Intra frames represented as + // percentage of the per frame bandwidth + uint32_t MaxIntraTarget(uint32_t optimal_buffer_size); + + size_t SteadyStateSize(int sid, int tid); + + void MaybeRewrapRawWithFormat(vpx_img_fmt fmt); + // Prepares `raw_` to reference image data of `buffer`, or of mapped or scaled + // versions of `buffer`. Returns the buffer that got referenced as a result, + // allowing the caller to keep a reference to it until after encoding has + // finished. On failure to convert the buffer, null is returned. + rtc::scoped_refptr<VideoFrameBuffer> PrepareBufferForProfile0( + rtc::scoped_refptr<VideoFrameBuffer> buffer); + + const std::unique_ptr<LibvpxInterface> libvpx_; + EncodedImage encoded_image_; + CodecSpecificInfo codec_specific_; + EncodedImageCallback* encoded_complete_callback_; + VideoCodec codec_; + const VP9Profile profile_; + bool inited_; + int64_t timestamp_; + uint32_t rc_max_intra_target_; + vpx_codec_ctx_t* encoder_; + vpx_codec_enc_cfg_t* config_; + vpx_image_t* raw_; + vpx_svc_extra_cfg_t svc_params_; + const VideoFrame* input_image_; + GofInfoVP9 gof_; // Contains each frame's temporal information for + // non-flexible mode. + bool force_key_frame_; + size_t pics_since_key_; + uint8_t num_temporal_layers_; + uint8_t num_spatial_layers_; // Number of configured SLs + uint8_t num_active_spatial_layers_; // Number of actively encoded SLs + uint8_t first_active_layer_; + bool layer_deactivation_requires_key_frame_; + bool is_svc_; + InterLayerPredMode inter_layer_pred_; + bool external_ref_control_; + const bool trusted_rate_controller_; + bool layer_buffering_; + const bool full_superframe_drop_; + vpx_svc_frame_drop_t svc_drop_frame_; + bool first_frame_in_picture_; + VideoBitrateAllocation current_bitrate_allocation_; + bool ss_info_needed_; + bool force_all_active_layers_; + uint8_t num_cores_; + + std::unique_ptr<ScalableVideoController> svc_controller_; + absl::optional<ScalabilityMode> scalability_mode_; + std::vector<FramerateControllerDeprecated> framerate_controller_; + + // Used for flexible mode. + bool is_flexible_mode_; + struct RefFrameBuffer { + bool operator==(const RefFrameBuffer& o) { + return pic_num == o.pic_num && spatial_layer_id == o.spatial_layer_id && + temporal_layer_id == o.temporal_layer_id; + } + + size_t pic_num = 0; + int spatial_layer_id = 0; + int temporal_layer_id = 0; + }; + std::array<RefFrameBuffer, kNumVp9Buffers> ref_buf_; + std::vector<ScalableVideoController::LayerFrameConfig> layer_frames_; + + // Variable frame-rate related fields and methods. + const struct VariableFramerateExperiment { + bool enabled; + // Framerate is limited to this value in steady state. + float framerate_limit; + // This qp or below is considered a steady state. + int steady_state_qp; + // Frames of at least this percentage below ideal for configured bitrate are + // considered in a steady state. + int steady_state_undershoot_percentage; + // Number of consecutive frames with good QP and size required to detect + // the steady state. + int frames_before_steady_state; + } variable_framerate_experiment_; + static VariableFramerateExperiment ParseVariableFramerateConfig( + const FieldTrialsView& trials); + FramerateControllerDeprecated variable_framerate_controller_; + + const struct QualityScalerExperiment { + int low_qp; + int high_qp; + bool enabled; + } quality_scaler_experiment_; + static QualityScalerExperiment ParseQualityScalerConfig( + const FieldTrialsView& trials); + const bool external_ref_ctrl_; + + // Flags that can affect speed vs quality tradeoff, and are configureable per + // resolution ranges. + struct PerformanceFlags { + // If false, a lookup will be made in `settings_by_resolution` base on the + // highest currently active resolution, and the overall speed then set to + // to the `base_layer_speed` matching that entry. + // If true, each active resolution will have it's speed and deblock_mode set + // based on it resolution, and the high layer speed configured for non + // base temporal layer frames. + bool use_per_layer_speed = false; + + struct ParameterSet { + int base_layer_speed = -1; // Speed setting for TL0. + int high_layer_speed = -1; // Speed setting for TL1-TL3. + // 0 = deblock all temporal layers (TL) + // 1 = disable deblock for top-most TL + // 2 = disable deblock for all TLs + int deblock_mode = 0; + bool allow_denoising = true; + }; + // Map from min pixel count to settings for that resolution and above. + // E.g. if you want some settings A if below wvga (640x360) and some other + // setting B at wvga and above, you'd use map {{0, A}, {230400, B}}. + flat_map<int, ParameterSet> settings_by_resolution; + }; + // Performance flags, ordered by `min_pixel_count`. + const PerformanceFlags performance_flags_; + // Caching of of `speed_configs_`, where index i maps to the resolution as + // specified in `codec_.spatialLayer[i]`. + std::vector<PerformanceFlags::ParameterSet> + performance_flags_by_spatial_index_; + void UpdatePerformanceFlags(); + static PerformanceFlags ParsePerformanceFlagsFromTrials( + const FieldTrialsView& trials); + static PerformanceFlags GetDefaultPerformanceFlags(); + + int num_steady_state_frames_; + // Only set config when this flag is set. + bool config_changed_; + + const LibvpxVp9EncoderInfoSettings encoder_info_override_; +}; + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc new file mode 100644 index 0000000000..3a32a43622 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp9/svc_config.h" + +#include <algorithm> +#include <cmath> +#include <memory> +#include <vector> + +#include "media/base/video_common.h" +#include "modules/video_coding/codecs/vp9/include/vp9_globals.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { +const size_t kMinVp9SvcBitrateKbps = 30; + +const size_t kMaxNumLayersForScreenSharing = 3; +const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 10.0, 30.0}; +const size_t kMinScreenSharingLayerBitrateKbps[] = {30, 200, 500}; +const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950}; +const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950}; + +// Gets limited number of layers for given resolution. +size_t GetLimitedNumSpatialLayers(size_t width, size_t height) { + const bool is_landscape = width >= height; + const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength + : kMinVp9SpatialLayerShortSideLength; + const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength + : kMinVp9SpatialLayerLongSideLength; + const size_t num_layers_fit_horz = static_cast<size_t>( + std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width)))); + const size_t num_layers_fit_vert = static_cast<size_t>( + std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height)))); + return std::min(num_layers_fit_horz, num_layers_fit_vert); +} +} // namespace + +std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t num_spatial_layers) { + num_spatial_layers = + std::min(num_spatial_layers, kMaxNumLayersForScreenSharing); + std::vector<SpatialLayer> spatial_layers; + + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + SpatialLayer spatial_layer = {0}; + spatial_layer.width = input_width; + spatial_layer.height = input_height; + spatial_layer.maxFramerate = + std::min(kMaxScreenSharingLayerFramerateFps[sl_idx], max_framerate_fps); + spatial_layer.numberOfTemporalLayers = 1; + spatial_layer.minBitrate = + static_cast<int>(kMinScreenSharingLayerBitrateKbps[sl_idx]); + spatial_layer.maxBitrate = + static_cast<int>(kMaxScreenSharingLayerBitrateKbps[sl_idx]); + spatial_layer.targetBitrate = + static_cast<int>(kTargetScreenSharingLayerBitrateKbps[sl_idx]); + spatial_layer.active = true; + spatial_layers.push_back(spatial_layer); + } + + return spatial_layers; +} + +std::vector<SpatialLayer> ConfigureSvcNormalVideo( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + absl::optional<ScalableVideoController::StreamLayersConfig> config) { + RTC_DCHECK_LT(first_active_layer, num_spatial_layers); + + // Limit number of layers for given resolution. + size_t limited_num_spatial_layers = + GetLimitedNumSpatialLayers(input_width, input_height); + if (limited_num_spatial_layers < num_spatial_layers) { + RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from " + << num_spatial_layers << " to " + << limited_num_spatial_layers + << " due to low input resolution."; + num_spatial_layers = limited_num_spatial_layers; + } + + // First active layer must be configured. + num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1); + + // Ensure top layer is even enough. + int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1); + if (config) { + required_divisiblity = 1; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + required_divisiblity = cricket::LeastCommonMultiple( + required_divisiblity, config->scaling_factor_den[sl_idx]); + } + } + input_width = input_width - input_width % required_divisiblity; + input_height = input_height - input_height % required_divisiblity; + + std::vector<SpatialLayer> spatial_layers; + for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers; + ++sl_idx) { + SpatialLayer spatial_layer = {0}; + spatial_layer.width = input_width >> (num_spatial_layers - sl_idx - 1); + spatial_layer.height = input_height >> (num_spatial_layers - sl_idx - 1); + spatial_layer.maxFramerate = max_framerate_fps; + spatial_layer.numberOfTemporalLayers = num_temporal_layers; + spatial_layer.active = true; + + if (config) { + spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] / + config->scaling_factor_den[sl_idx]; + spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] / + config->scaling_factor_den[sl_idx]; + } + + // minBitrate and maxBitrate formulas were derived from + // subjective-quality data to determing bit rates below which video + // quality is unacceptable and above which additional bits do not provide + // benefit. The formulas express rate in units of kbps. + + // TODO(ssilkin): Add to the comment PSNR/SSIM we get at encoding certain + // video to min/max bitrate specified by those formulas. + const size_t num_pixels = spatial_layer.width * spatial_layer.height; + int min_bitrate = + static_cast<int>((600. * std::sqrt(num_pixels) - 95000.) / 1000.); + min_bitrate = std::max(min_bitrate, 0); + spatial_layer.minBitrate = + std::max(static_cast<size_t>(min_bitrate), kMinVp9SvcBitrateKbps); + spatial_layer.maxBitrate = + static_cast<int>((1.6 * num_pixels + 50 * 1000) / 1000); + spatial_layer.targetBitrate = + (spatial_layer.minBitrate + spatial_layer.maxBitrate) / 2; + spatial_layers.push_back(spatial_layer); + } + + // A workaround for situation when single HD layer is left with minBitrate + // about 500kbps. This would mean that there will always be at least 500kbps + // allocated to video regardless of how low is the actual BWE. + // Also, boost maxBitrate for the first layer to account for lost ability to + // predict from previous layers. + if (first_active_layer > 0) { + spatial_layers[0].minBitrate = kMinVp9SvcBitrateKbps; + // TODO(ilnik): tune this value or come up with a different formula to + // ensure that all singlecast configurations look good and not too much + // bitrate is added. + spatial_layers[0].maxBitrate *= 1.1; + } + + return spatial_layers; +} + +// Uses scalability mode to configure spatial layers. +std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& codec) { + RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9); + + absl::optional<ScalabilityMode> scalability_mode = codec.GetScalabilityMode(); + RTC_DCHECK(scalability_mode.has_value()); + + // Limit number of spatial layers for given resolution. + int limited_num_spatial_layers = + GetLimitedNumSpatialLayers(codec.width, codec.height); + if (limited_num_spatial_layers < + ScalabilityModeToNumSpatialLayers(*scalability_mode)) { + ScalabilityMode limited_scalability_mode = + LimitNumSpatialLayers(*scalability_mode, limited_num_spatial_layers); + RTC_LOG(LS_WARNING) + << "Reducing number of spatial layers due to low input resolution: " + << ScalabilityModeToString(*scalability_mode) << " to " + << ScalabilityModeToString(limited_scalability_mode); + scalability_mode = limited_scalability_mode; + codec.SetScalabilityMode(limited_scalability_mode); + } + + absl::optional<ScalableVideoController::StreamLayersConfig> info = + ScalabilityStructureConfig(*scalability_mode); + if (!info.has_value()) { + RTC_LOG(LS_WARNING) << "Failed to create structure " + << ScalabilityModeToString(*scalability_mode); + return {}; + } + + // TODO(bugs.webrtc.org/11607): Add support for screensharing. + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(codec.width, codec.height, codec.maxFramerate, + /*first_active_layer=*/0, info->num_spatial_layers, + info->num_temporal_layers, /*is_screen_sharing=*/false, + codec.GetScalabilityMode() ? info : absl::nullopt); + RTC_DCHECK(!spatial_layers.empty()); + + // Use codec bitrate limits if spatial layering is not requested. + if (info->num_spatial_layers == 1) { + spatial_layers.back().minBitrate = codec.minBitrate; + spatial_layers.back().targetBitrate = codec.maxBitrate; + spatial_layers.back().maxBitrate = codec.maxBitrate; + } + + return spatial_layers; +} + +std::vector<SpatialLayer> GetSvcConfig( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing, + absl::optional<ScalableVideoController::StreamLayersConfig> config) { + RTC_DCHECK_GT(input_width, 0); + RTC_DCHECK_GT(input_height, 0); + RTC_DCHECK_GT(num_spatial_layers, 0); + RTC_DCHECK_GT(num_temporal_layers, 0); + + if (is_screen_sharing) { + return ConfigureSvcScreenSharing(input_width, input_height, + max_framerate_fps, num_spatial_layers); + } else { + return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps, + first_active_layer, num_spatial_layers, + num_temporal_layers, config); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h new file mode 100644 index 0000000000..adeaf0f161 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_ + +#include <stddef.h> + +#include <vector> + +#include "api/video_codecs/spatial_layer.h" +#include "api/video_codecs/video_codec.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +// Uses scalability mode to configure spatial layers. +std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& video_codec); + +std::vector<SpatialLayer> GetSvcConfig( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing, + absl::optional<ScalableVideoController::StreamLayersConfig> config = + absl::nullopt); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc new file mode 100644 index 0000000000..762fd39287 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp9/svc_config.h" + +#include <cstddef> +#include <vector> + +#include "modules/video_coding/codecs/vp9/include/vp9_globals.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::ElementsAre; +using ::testing::Field; + +namespace webrtc { +TEST(SvcConfig, NumSpatialLayers) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 0; + const size_t num_spatial_layers = 2; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30, + first_active_layer, max_num_spatial_layers, 1, false); + + EXPECT_EQ(spatial_layers.size(), num_spatial_layers); +} + +TEST(SvcConfig, NumSpatialLayersPortrait) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 0; + const size_t num_spatial_layers = 2; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), 30, + first_active_layer, max_num_spatial_layers, 1, false); + + EXPECT_EQ(spatial_layers.size(), num_spatial_layers); +} + +TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 960; + codec.height = 540; + codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY); + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270), + Field(&SpatialLayer::height, 540))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 480; + codec.height = 270; + codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY); + + // Scalability mode updated. + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T3_KEY); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 270; + codec.height = 480; + codec.SetScalabilityMode(ScalabilityMode::kL3T1); + + // Scalability mode updated. + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135), + Field(&SpatialLayer::width, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1), + Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1); +} + +TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 270; + codec.height = 480; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180), + Field(&SpatialLayer::width, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1), + Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 320; + codec.height = 180; + codec.SetScalabilityMode(ScalabilityMode::kL3T1h); // 1.5:1 + + // Scalability mode updated. + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL1T1); +} + +TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 5; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength, kMinVp9SpatialLayerShortSideLength, 30, + first_active_layer, max_num_spatial_layers, 1, false); + EXPECT_EQ(spatial_layers.size(), 1u); + EXPECT_EQ(spatial_layers.back().width, kMinVp9SpatialLayerLongSideLength); +} + +TEST(SvcConfig, AlwaysSendsAtLeastOneLayerPortrait) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 5; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerShortSideLength, kMinVp9SpatialLayerLongSideLength, 30, + first_active_layer, max_num_spatial_layers, 1, false); + EXPECT_EQ(spatial_layers.size(), 1u); + EXPECT_EQ(spatial_layers.back().width, kMinVp9SpatialLayerShortSideLength); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParity) { + const size_t max_num_spatial_layers = 3; + const size_t kOddSize = 1023; + + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(kOddSize, kOddSize, 30, + /*first_active_layer=*/1, max_num_spatial_layers, 1, false); + // Since there are 2 layers total (1, 2), divisiblity by 2 is required. + EXPECT_EQ(spatial_layers.back().width, kOddSize - 1); + EXPECT_EQ(spatial_layers.back().width, kOddSize - 1); + + spatial_layers = + GetSvcConfig(kOddSize, kOddSize, 30, + /*first_active_layer=*/0, max_num_spatial_layers, 1, false); + // Since there are 3 layers total (0, 1, 2), divisiblity by 4 is required. + EXPECT_EQ(spatial_layers.back().width, kOddSize - 3); + EXPECT_EQ(spatial_layers.back().width, kOddSize - 3); + + spatial_layers = + GetSvcConfig(kOddSize, kOddSize, 30, + /*first_active_layer=*/2, max_num_spatial_layers, 1, false); + // Since there is only 1 layer active (2), divisiblity by 1 is required. + EXPECT_EQ(spatial_layers.back().width, kOddSize); + EXPECT_EQ(spatial_layers.back().width, kOddSize); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 1023; + codec.height = 1023; + codec.SetScalabilityMode(ScalabilityMode::kL3T1); + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 4 required. + ElementsAre(Field(&SpatialLayer::width, 255), + Field(&SpatialLayer::width, 510), + Field(&SpatialLayer::width, 1020))); + + codec.SetScalabilityMode(ScalabilityMode::kL2T1); + spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 2 required. + ElementsAre(Field(&SpatialLayer::width, 511), + Field(&SpatialLayer::width, 1022))); + + codec.SetScalabilityMode(ScalabilityMode::kL1T1); + spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 1 required. + ElementsAre(Field(&SpatialLayer::width, 1023))); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 1280; + codec.height = 1280; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 3 required. + ElementsAre(Field(&SpatialLayer::width, 852), + Field(&SpatialLayer::width, 1278))); +} + +TEST(SvcConfig, SkipsInactiveLayers) { + const size_t num_spatial_layers = 4; + const size_t first_active_layer = 2; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30, + first_active_layer, num_spatial_layers, 1, false); + EXPECT_EQ(spatial_layers.size(), 2u); + EXPECT_EQ(spatial_layers.back().width, + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1)); +} + +TEST(SvcConfig, BitrateThresholds) { + const size_t first_active_layer = 0; + const size_t num_spatial_layers = 3; + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30, + first_active_layer, num_spatial_layers, 1, false); + + EXPECT_EQ(spatial_layers.size(), num_spatial_layers); + + for (const SpatialLayer& layer : spatial_layers) { + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} + +TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 960; + codec.height = 540; + codec.SetScalabilityMode(ScalabilityMode::kS3T3); + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270), + Field(&SpatialLayer::height, 540))); + + for (const SpatialLayer& layer : spatial_layers) { + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} + +TEST(SvcConfig, ScreenSharing) { + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(1920, 1080, 30, 1, 3, 3, true); + + EXPECT_EQ(spatial_layers.size(), 3UL); + + for (size_t i = 0; i < 3; ++i) { + const SpatialLayer& layer = spatial_layers[i]; + EXPECT_EQ(layer.width, 1920); + EXPECT_EQ(layer.height, 1080); + EXPECT_EQ(layer.maxFramerate, (i < 1) ? 5 : (i < 2 ? 10 : 30)); + EXPECT_EQ(layer.numberOfTemporalLayers, 1); + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc new file mode 100644 index 0000000000..b6293a342e --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -0,0 +1,2446 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/memory/memory.h" +#include "api/test/create_frame_generator.h" +#include "api/test/frame_generator_interface.h" +#include "api/test/mock_video_encoder.h" +#include "api/video/color_space.h" +#include "api/video/i420_buffer.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp9_profile.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/codecs/interface/libvpx_interface.h" +#include "modules/video_coding/codecs/interface/mock_libvpx_interface.h" +#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h" +#include "modules/video_coding/codecs/test/video_codec_unittest.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/codecs/vp9/svc_config.h" +#include "rtc_base/strings/string_builder.h" +#include "test/explicit_key_value_config.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mappable_native_buffer.h" +#include "test/video_codec_settings.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::A; +using ::testing::AllOf; +using ::testing::An; +using ::testing::AnyNumber; +using ::testing::ByRef; +using ::testing::DoAll; +using ::testing::Each; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Field; +using ::testing::IsEmpty; +using ::testing::Mock; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::SafeMatcherCast; +using ::testing::SaveArgPointee; +using ::testing::SetArgPointee; +using ::testing::SizeIs; +using ::testing::TypedEq; +using ::testing::UnorderedElementsAreArray; +using ::testing::WithArg; +using EncoderInfo = webrtc::VideoEncoder::EncoderInfo; +using FramerateFractions = + absl::InlinedVector<uint8_t, webrtc::kMaxTemporalStreams>; + +constexpr size_t kWidth = 1280; +constexpr size_t kHeight = 720; + +const VideoEncoder::Capabilities kCapabilities(false); +const VideoEncoder::Settings kSettings(kCapabilities, + /*number_of_cores=*/1, + /*max_payload_size=*/0); + +VideoCodec DefaultCodecSettings() { + VideoCodec codec_settings; + webrtc::test::CodecSettings(kVideoCodecVP9, &codec_settings); + codec_settings.width = kWidth; + codec_settings.height = kHeight; + codec_settings.VP9()->numberOfTemporalLayers = 1; + codec_settings.VP9()->numberOfSpatialLayers = 1; + return codec_settings; +} + +void ConfigureSvc(VideoCodec& codec_settings, + int num_spatial_layers, + int num_temporal_layers = 1) { + codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers; + codec_settings.SetFrameDropEnabled(false); + + std::vector<SpatialLayer> layers = GetSvcConfig( + codec_settings.width, codec_settings.height, codec_settings.maxFramerate, + /*first_active_layer=*/0, num_spatial_layers, num_temporal_layers, false); + for (size_t i = 0; i < layers.size(); ++i) { + codec_settings.spatialLayers[i] = layers[i]; + } +} + +} // namespace + +class TestVp9Impl : public VideoCodecUnitTest { + protected: + std::unique_ptr<VideoEncoder> CreateEncoder() override { + return VP9Encoder::Create(); + } + + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return VP9Decoder::Create(); + } + + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kVideoCodecVP9, codec_settings); + codec_settings->width = kWidth; + codec_settings->height = kHeight; + codec_settings->VP9()->numberOfTemporalLayers = 1; + codec_settings->VP9()->numberOfSpatialLayers = 1; + } +}; + +class TestVp9ImplForPixelFormat + : public TestVp9Impl, + public ::testing::WithParamInterface< + test::FrameGeneratorInterface::OutputType> { + protected: + void SetUp() override { + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, GetParam(), absl::optional<int>()); + TestVp9Impl::SetUp(); + } +}; + +// Disabled on ios as flake, see https://crbug.com/webrtc/7057 +#if defined(WEBRTC_IOS) +TEST_P(TestVp9ImplForPixelFormat, DISABLED_EncodeDecode) { +#else +TEST_P(TestVp9ImplForPixelFormat, EncodeDecode) { +#endif + VideoFrame input_frame = NextInputFrame(); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + EXPECT_GT(I420PSNR(&input_frame, decoded_frame.get()), 36); + + const ColorSpace color_space = *decoded_frame->color_space(); + EXPECT_EQ(ColorSpace::PrimaryID::kUnspecified, color_space.primaries()); + EXPECT_EQ(ColorSpace::TransferID::kUnspecified, color_space.transfer()); + EXPECT_EQ(ColorSpace::MatrixID::kUnspecified, color_space.matrix()); + EXPECT_EQ(ColorSpace::RangeID::kLimited, color_space.range()); + EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified, + color_space.chroma_siting_horizontal()); + EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified, + color_space.chroma_siting_vertical()); +} + +TEST_P(TestVp9ImplForPixelFormat, EncodeNativeBuffer) { + VideoFrame input_frame = NextInputFrame(); + // Replace the input frame with a fake native buffer of the same size and + // underlying pixel format. Do not allow ToI420() for non-I420 buffers, + // ensuring zero-conversion. + input_frame = test::CreateMappableNativeFrame( + input_frame.ntp_time_ms(), input_frame.video_frame_buffer()->type(), + input_frame.width(), input_frame.height()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // After encoding, we would expect a single mapping to have happened. + rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer = + test::GetMappableNativeBufferFromVideoFrame(input_frame); + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers = + mappable_buffer->GetMappedFramedBuffers(); + ASSERT_EQ(mapped_buffers.size(), 1u); + EXPECT_EQ(mapped_buffers[0]->type(), mappable_buffer->mappable_type()); + EXPECT_EQ(mapped_buffers[0]->width(), input_frame.width()); + EXPECT_EQ(mapped_buffers[0]->height(), input_frame.height()); + EXPECT_FALSE(mappable_buffer->DidConvertToI420()); +} + +TEST_P(TestVp9ImplForPixelFormat, DecodedColorSpaceFromBitstream) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Encoded frame without explicit color space information. + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + // Color space present from encoded bitstream. + ASSERT_TRUE(decoded_frame->color_space()); + // No HDR metadata present. + EXPECT_FALSE(decoded_frame->color_space()->hdr_metadata()); +} + +TEST_P(TestVp9ImplForPixelFormat, DecodedQpEqualsEncodedQp) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + ASSERT_TRUE(decoded_qp); + EXPECT_EQ(encoded_frame.qp_, *decoded_qp); +} + +TEST_F(TestVp9Impl, SwitchInputPixelFormatsWithoutReconfigure) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Change the input frame type from I420 to NV12, encoding should still work. + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12, + absl::optional<int>()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Flipping back to I420, encoding should still work. + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, + absl::optional<int>()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); +} + +TEST(Vp9ImplTest, ParserQpEqualsEncodedQp) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + encoder->InitEncode(&codec_settings, kSettings); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(1) + .SetResolution({kWidth, kHeight}) + .Encode(); + ASSERT_THAT(frames, SizeIs(1)); + const auto& encoded_frame = frames.front().encoded_image; + int qp = 0; + ASSERT_TRUE(vp9::GetQp(encoded_frame.data(), encoded_frame.size(), &qp)); + EXPECT_EQ(encoded_frame.qp_, qp); +} + +TEST(Vp9ImplTest, EncodeAttachesTemplateStructureWithSvcController) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(2) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(2)); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + EXPECT_TRUE(frames[0].codec_specific_info.generic_frame_info); + + EXPECT_FALSE(frames[1].codec_specific_info.template_structure); + EXPECT_TRUE(frames[1].codec_specific_info.generic_frame_info); +} + +TEST(Vp9ImplTest, EncoderWith2TemporalLayers) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfTemporalLayers = 2; + // Tl0PidIdx is only used in non-flexible mode. + codec_settings.VP9()->flexibleMode = false; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(4) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(4)); + EXPECT_EQ(frames[0].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[1].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); + EXPECT_EQ(frames[2].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[3].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); +} + +TEST(Vp9ImplTest, EncodeTemporalLayersWithSvcController) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfTemporalLayers = 2; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(4) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(4)); + EXPECT_EQ(frames[0].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[1].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); + EXPECT_EQ(frames[2].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[3].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); + // Verify codec agnostic part + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[1].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[2].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[3].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->temporal_id, 0); + EXPECT_EQ(frames[1].codec_specific_info.generic_frame_info->temporal_id, 1); + EXPECT_EQ(frames[2].codec_specific_info.generic_frame_info->temporal_id, 0); + EXPECT_EQ(frames[3].codec_specific_info.generic_frame_info->temporal_id, 1); +} + +TEST(Vp9ImplTest, EncoderWith2SpatialLayers) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfSpatialLayers = 2; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(1) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].encoded_image.SpatialIndex(), 0); + EXPECT_EQ(frames[1].encoded_image.SpatialIndex(), 1); +} + +TEST(Vp9ImplTest, EncodeSpatialLayersWithSvcController) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfSpatialLayers = 2; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(2) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(4)); + EXPECT_EQ(frames[0].encoded_image.SpatialIndex(), 0); + EXPECT_EQ(frames[1].encoded_image.SpatialIndex(), 1); + EXPECT_EQ(frames[2].encoded_image.SpatialIndex(), 0); + EXPECT_EQ(frames[3].encoded_image.SpatialIndex(), 1); + // Verify codec agnostic part + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[1].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[2].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[3].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 0); + EXPECT_EQ(frames[1].codec_specific_info.generic_frame_info->spatial_id, 1); + EXPECT_EQ(frames[2].codec_specific_info.generic_frame_info->spatial_id, 0); + EXPECT_EQ(frames[3].codec_specific_info.generic_frame_info->spatial_id, 1); +} + +TEST_F(TestVp9Impl, EncoderExplicitLayering) { + // Override default settings. + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->numberOfSpatialLayers = 2; + + codec_settings_.width = 960; + codec_settings_.height = 540; + codec_settings_.spatialLayers[0].minBitrate = 200; + codec_settings_.spatialLayers[0].maxBitrate = 500; + codec_settings_.spatialLayers[0].targetBitrate = + (codec_settings_.spatialLayers[0].minBitrate + + codec_settings_.spatialLayers[0].maxBitrate) / + 2; + codec_settings_.spatialLayers[0].active = true; + + codec_settings_.spatialLayers[1].minBitrate = 400; + codec_settings_.spatialLayers[1].maxBitrate = 1500; + codec_settings_.spatialLayers[1].targetBitrate = + (codec_settings_.spatialLayers[1].minBitrate + + codec_settings_.spatialLayers[1].maxBitrate) / + 2; + codec_settings_.spatialLayers[1].active = true; + + codec_settings_.spatialLayers[0].width = codec_settings_.width / 2; + codec_settings_.spatialLayers[0].height = codec_settings_.height / 2; + codec_settings_.spatialLayers[0].maxFramerate = codec_settings_.maxFramerate; + codec_settings_.spatialLayers[1].width = codec_settings_.width; + codec_settings_.spatialLayers[1].height = codec_settings_.height; + codec_settings_.spatialLayers[1].maxFramerate = codec_settings_.maxFramerate; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Ensure it fails if scaling factors in horz/vert dimentions are different. + codec_settings_.spatialLayers[0].width = codec_settings_.width; + codec_settings_.spatialLayers[0].height = codec_settings_.height / 2; + codec_settings_.spatialLayers[1].width = codec_settings_.width; + codec_settings_.spatialLayers[1].height = codec_settings_.height; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_PARAMETER, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Ensure it fails if scaling factor is not power of two. + codec_settings_.spatialLayers[0].width = codec_settings_.width / 3; + codec_settings_.spatialLayers[0].height = codec_settings_.height / 3; + codec_settings_.spatialLayers[1].width = codec_settings_.width; + codec_settings_.spatialLayers[1].height = codec_settings_.height; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_PARAMETER, + encoder_->InitEncode(&codec_settings_, kSettings)); +} + +TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { + // Configure encoder to produce N spatial layers. Encode frames of layer 0 + // then enable layer 1 and encode more frames and so on until layer N-1. + // Then disable layers one by one in the same way. + // Note: bit rate allocation is high to avoid frame dropping due to rate + // control, the encoder should always produce a frame. A dropped + // frame indicates a problem and the test will fail. + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(true); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + } + + for (size_t i = 0; i < num_spatial_layers - 1; ++i) { + const size_t sl_idx = num_spatial_layers - i - 1; + bitrate_allocation.SetBitrate(sl_idx, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + } +} + +TEST(Vp9ImplTest, EnableDisableSpatialLayersWithSvcController) { + const int num_spatial_layers = 3; + // Configure encoder to produce 3 spatial layers. Encode frames of layer 0 + // then enable layer 1 and encode more frames and so on. + // Then disable layers one by one in the same way. + // Note: bit rate allocation is high to avoid frame dropping due to rate + // control, the encoder should always produce a frame. A dropped + // frame indicates a problem and the test will fail. + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, num_spatial_layers); + codec_settings.SetFrameDropEnabled(true); + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + // Encode a key frame to validate all other frames are delta frames. + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + producer.SetNumInputFrames(1).Encode(); + ASSERT_THAT(frames, Not(IsEmpty())); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + + const size_t num_frames_to_encode = 5; + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + // With (sl_idx+1) spatial layers expect (sl_idx+1) frames per input frame. + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * (sl_idx + 1))); + for (size_t i = 0; i < frames.size(); ++i) { + EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + } + } + + for (int sl_idx = num_spatial_layers - 1; sl_idx > 0; --sl_idx) { + bitrate_allocation.SetBitrate(sl_idx, 0, 0); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + // With `sl_idx` spatial layer disabled, there are `sl_idx` spatial layers + // left. + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * sl_idx)); + for (size_t i = 0; i < frames.size(); ++i) { + EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + } + } +} + +MATCHER_P2(GenericLayerIs, spatial_id, temporal_id, "") { + if (arg.codec_specific_info.generic_frame_info == absl::nullopt) { + *result_listener << " miss generic_frame_info"; + return false; + } + const auto& layer = *arg.codec_specific_info.generic_frame_info; + if (layer.spatial_id != spatial_id || layer.temporal_id != temporal_id) { + *result_listener << " frame from layer (" << layer.spatial_id << ", " + << layer.temporal_id << ")"; + return false; + } + return true; +} + +TEST(Vp9ImplTest, SpatialUpswitchNotAtGOFBoundary) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3); + codec_settings.SetFrameDropEnabled(true); + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + // Disable all but spatial_layer = 0; + VideoBitrateAllocation bitrate_allocation; + int layer_bitrate_bps = codec_settings.spatialLayers[0].targetBitrate * 1000; + bitrate_allocation.SetBitrate(0, 0, layer_bitrate_bps); + bitrate_allocation.SetBitrate(0, 1, layer_bitrate_bps); + bitrate_allocation.SetBitrate(0, 2, layer_bitrate_bps); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + EXPECT_THAT(producer.SetNumInputFrames(3).Encode(), + ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(0, 2), + GenericLayerIs(0, 1))); + + // Upswitch to spatial_layer = 1 + layer_bitrate_bps = codec_settings.spatialLayers[1].targetBitrate * 1000; + bitrate_allocation.SetBitrate(1, 0, layer_bitrate_bps); + bitrate_allocation.SetBitrate(1, 1, layer_bitrate_bps); + bitrate_allocation.SetBitrate(1, 2, layer_bitrate_bps); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + // Expect upswitch doesn't happen immediately since there is no S1 frame that + // S1T2 frame can reference. + EXPECT_THAT(producer.SetNumInputFrames(1).Encode(), + ElementsAre(GenericLayerIs(0, 2))); + // Expect spatial upswitch happens now, at T0 frame. + EXPECT_THAT(producer.SetNumInputFrames(1).Encode(), + ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(1, 0))); +} +// TODO(bugs.webrtc.org/13442) Enable once a forward fix has landed in WebRTC. +TEST_F(TestVp9Impl, DISABLED_DisableEnableBaseLayerTriggersKeyFrame) { + // Configure encoder to produce N spatial layers. Encode frames for all + // layers. Then disable all but the last layer. Then reenable all back again. + test::ScopedFieldTrials override_field_trials( + "WebRTC-Vp9ExternalRefCtrl/Enabled/"); + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 3; + // Must not be multiple of temporal period to exercise all code paths. + const size_t num_frames_to_encode = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings_.mode = VideoCodecMode::kRealtimeVideo; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + } + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + + // Disable all but top layer. + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0); + } + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + bool seen_ss_data = false; + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // SS available immediatly after switching on base temporal layer. + if (seen_ss_data) { + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + false); + } else { + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + codec_specific_info[0].codecSpecific.VP9.temporal_idx == 0); + seen_ss_data |= + codec_specific_info[0].codecSpecific.VP9.ss_data_available; + } + // No key-frames generated for disabling layers. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + } + EXPECT_TRUE(seen_ss_data); + + // Force key-frame. + std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey}; + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), &frame_types)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // Key-frame should be produced. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + + // Encode some more frames. + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + } + + // Enable the second layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 1, tl_idx, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 2u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1); + EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2); + } + + // Enable the first layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 0, tl_idx, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 3u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + } +} +// TODO(bugs.webrtc.org/13442) Enable once a forward fix has landed in WebRTC. +TEST(Vp9ImplTest, + DISABLED_DisableEnableBaseLayerWithSvcControllerTriggersKeyFrame) { + // Configure encoder to produce N spatial layers. Encode frames for all + // layers. Then disable all but the last layer. Then reenable all back again. + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 3; + // Must not be multiple of temporal period to exercise all code paths. + const size_t num_frames_to_encode = 5; + + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, num_spatial_layers, num_temporal_layers); + codec_settings.SetFrameDropEnabled(false); + codec_settings.VP9()->flexibleMode = false; + codec_settings.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings.mode = VideoCodecMode::kRealtimeVideo; + + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + } + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * num_spatial_layers)); + + // Disable all but top spatial layer. + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0); + } + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + EXPECT_THAT(frames, SizeIs(num_frames_to_encode)); + for (const auto& frame : frames) { + // Expect no key-frames generated. + EXPECT_FALSE(frame.codec_specific_info.template_structure); + ASSERT_TRUE(frame.codec_specific_info.generic_frame_info); + EXPECT_EQ(frame.codec_specific_info.generic_frame_info->spatial_id, 2); + } + + frames = producer.ForceKeyFrame().SetNumInputFrames(1).Encode(); + ASSERT_THAT(frames, SizeIs(1)); + // Key-frame should be produced. + EXPECT_EQ(frames[0].encoded_image._frameType, VideoFrameType::kVideoFrameKey); + ASSERT_TRUE(frames[0].codec_specific_info.template_structure); + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 2); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode)); + for (const auto& frame : frames) { + EXPECT_EQ(frame.encoded_image._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_FALSE(frame.codec_specific_info.template_structure); + ASSERT_TRUE(frame.codec_specific_info.generic_frame_info); + EXPECT_EQ(frame.codec_specific_info.generic_frame_info->spatial_id, 2); + } + + // Enable the second layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 1, tl_idx, codec_settings.spatialLayers[0].targetBitrate * 1000 * 2); + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * 2)); + EXPECT_EQ(frames[0].encoded_image._frameType, VideoFrameType::kVideoFrameKey); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 1); + for (size_t i = 1; i < frames.size(); ++i) { + EXPECT_EQ(frames[i].encoded_image._frameType, + VideoFrameType::kVideoFrameDelta); + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + ASSERT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[i].codec_specific_info.generic_frame_info->spatial_id, + 1 + static_cast<int>(i % 2)); + } + + // Enable the first layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 0, tl_idx, codec_settings.spatialLayers[1].targetBitrate * 1000 * 2); + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * 3)); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 0); + for (size_t i = 1; i < frames.size(); ++i) { + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + ASSERT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[i].codec_specific_info.generic_frame_info->spatial_id, + static_cast<int>(i % 3)); + } +} + +TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrameForScreenshare) { + // Configure encoder to produce N spatial layers. Encode frames for all + // layers. Then disable all but the last layer. Then reenable all back again. + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + + // Disable all but top layer. + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + bitrate_allocation.SetBitrate(sl_idx, 0, 0); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // SS available immediatly after switching off. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // No key-frames generated for disabling layers. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + } + + // Force key-frame. + std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey}; + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), &frame_types)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // Key-frame should be produced. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey); + + // Enable the second layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 2u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1); + EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2); + } + + // Enable the first layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 3u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + } +} + +TEST_F(TestVp9Impl, EndOfPicture) { + const size_t num_spatial_layers = 2; + ConfigureSvc(codec_settings_, num_spatial_layers); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Encode both base and upper layers. Check that end-of-superframe flag is + // set on upper layer frame but not on base layer frame. + VideoBitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + + std::vector<EncodedImage> frames; + std::vector<CodecSpecificInfo> codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + EXPECT_FALSE(codec_specific[0].end_of_picture); + EXPECT_TRUE(codec_specific[1].end_of_picture); + + // Encode only base layer. Check that end-of-superframe flag is + // set on base layer frame. + bitrate_allocation.SetBitrate(1, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + EXPECT_FALSE(frames[0].SpatialIndex()); + EXPECT_TRUE(codec_specific[0].end_of_picture); +} + +TEST_F(TestVp9Impl, InterLayerPred) { + const size_t num_spatial_layers = 2; + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + + VideoBitrateAllocation bitrate_allocation; + for (size_t i = 0; i < num_spatial_layers; ++i) { + bitrate_allocation.SetBitrate( + i, 0, codec_settings_.spatialLayers[i].targetBitrate * 1000); + } + + const std::vector<InterLayerPredMode> inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + + std::vector<EncodedImage> frames; + std::vector<CodecSpecificInfo> codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + + // Key frame. + ASSERT_EQ(frames[0].SpatialIndex(), 0); + ASSERT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred, + inter_layer_pred == InterLayerPredMode::kOff); + EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.ss_data_available); + + ASSERT_EQ(frames[1].SpatialIndex(), 1); + ASSERT_FALSE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted); + EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted, + inter_layer_pred == InterLayerPredMode::kOn || + inter_layer_pred == InterLayerPredMode::kOnKeyPic); + EXPECT_EQ(codec_specific[1].codecSpecific.VP9.ss_data_available, + inter_layer_pred == InterLayerPredMode::kOff); + EXPECT_TRUE( + codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred); + + // Delta frame. + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + + ASSERT_EQ(frames[0].SpatialIndex(), 0); + ASSERT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred, + inter_layer_pred != InterLayerPredMode::kOn); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.ss_data_available); + + ASSERT_EQ(frames[1].SpatialIndex(), 1); + ASSERT_TRUE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted); + EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted, + inter_layer_pred == InterLayerPredMode::kOn); + EXPECT_TRUE( + codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred); + EXPECT_FALSE(codec_specific[1].codecSpecific.VP9.ss_data_available); + } +} + +TEST_F(TestVp9Impl, + EnablingUpperLayerTriggersKeyFrameIfInterLayerPredIsDisabled) { + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + + const std::vector<InterLayerPredMode> inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; + ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + + const bool is_first_upper_layer_frame = (sl_idx > 0 && frame_num == 0); + if (is_first_upper_layer_frame) { + if (inter_layer_pred == InterLayerPredMode::kOn) { + EXPECT_EQ(encoded_frame[0]._frameType, + VideoFrameType::kVideoFrameDelta); + } else { + EXPECT_EQ(encoded_frame[0]._frameType, + VideoFrameType::kVideoFrameKey); + } + } else if (sl_idx == 0 && frame_num == 0) { + EXPECT_EQ(encoded_frame[0]._frameType, + VideoFrameType::kVideoFrameKey); + } else { + for (size_t i = 0; i <= sl_idx; ++i) { + EXPECT_EQ(encoded_frame[i]._frameType, + VideoFrameType::kVideoFrameDelta); + } + } + } + } + } +} + +TEST_F(TestVp9Impl, + EnablingUpperLayerUnsetsInterPicPredictedInInterlayerPredModeOn) { + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + + const std::vector<InterLayerPredMode> inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; + ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + + ASSERT_EQ(codec_specific_info.size(), sl_idx + 1); + + for (size_t i = 0; i <= sl_idx; ++i) { + const bool is_keyframe = + encoded_frame[0]._frameType == VideoFrameType::kVideoFrameKey; + const bool is_first_upper_layer_frame = + (i == sl_idx && frame_num == 0); + // Interframe references are there, unless it's a keyframe, + // or it's a first activated frame in a upper layer + const bool expect_no_references = + is_keyframe || (is_first_upper_layer_frame && + inter_layer_pred == InterLayerPredMode::kOn); + EXPECT_EQ( + codec_specific_info[i].codecSpecific.VP9.inter_pic_predicted, + !expect_no_references); + } + } + } + } +} + +TEST_F(TestVp9Impl, EnablingDisablingUpperLayerInTheSameGof) { + const size_t num_spatial_layers = 2; + const size_t num_temporal_layers = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + + // Enable both spatial and both temporal layers. + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + + // Encode 3 frames. + for (int i = 0; i < 3; ++i) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + } + + // Disable SL1 layer. + bitrate_allocation.SetBitrate(1, 0, 0); + bitrate_allocation.SetBitrate(1, 1, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 1 frame. + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 1u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true); + + // Enable SL1 layer. + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 1 frame. + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true); + EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted, true); +} + +TEST_F(TestVp9Impl, EnablingDisablingUpperLayerAccrossGof) { + const size_t num_spatial_layers = 2; + const size_t num_temporal_layers = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + + // Enable both spatial and both temporal layers. + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + + // Encode 3 frames. + for (int i = 0; i < 3; ++i) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + } + + // Disable SL1 layer. + bitrate_allocation.SetBitrate(1, 0, 0); + bitrate_allocation.SetBitrate(1, 1, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 11 frames. More than Gof length 2, and odd to end at TL1 frame. + for (int i = 0; i < 11; ++i) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 1u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1 - i % 2); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, + true); + } + + // Enable SL1 layer. + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 1 frame. + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true); + EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted, + false); +} + +TEST_F(TestVp9Impl, EnablingNewLayerInScreenshareForcesAllLayersWithSS) { + const size_t num_spatial_layers = 3; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + const size_t num_frames_to_encode_before_drop = 1; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.spatialLayers[0].maxFramerate = 5.0; + // use 30 for the SL 1 instead of 10, so even if SL 0 frame is dropped due to + // framerate capping we would still get back at least a middle layer. It + // simplifies the test. + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Enable all but the last layer. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode enough frames to force drop due to framerate capping. + for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + // Enable the last layer. + bitrate_allocation.SetBitrate( + num_spatial_layers - 1, 0, + codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate * + 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // All layers are encoded, even though frame dropping should happen. + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + // Now all 3 layers should be encoded. + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(encoded_frames.size(), 3u); + // Scalability structure has to be triggered. + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, ScreenshareFrameDropping) { + const int num_spatial_layers = 3; + const int num_frames_to_detect_drops = 2; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(codec_settings_, num_spatial_layers); + // use 30 for the SL0 and SL1 because it simplifies the test. + codec_settings_.spatialLayers[0].maxFramerate = 30.0; + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.SetFrameDropEnabled(true); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Enable all but the last layer. + VideoBitrateAllocation bitrate_allocation; + // Very low bitrate for the lowest spatial layer to ensure rate-control drops. + bitrate_allocation.SetBitrate(0, 0, 1000); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000); + // Disable highest layer. + bitrate_allocation.SetBitrate(2, 0, 0); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + bool frame_dropped = false; + // Encode enough frames to force drop due to rate-control. + for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops; + ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_LE(encoded_frames.size(), 2u); + EXPECT_GE(encoded_frames.size(), 1u); + if (encoded_frames.size() == 1) { + frame_dropped = true; + // Dropped frame is on the SL0. + EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1); + } + } + EXPECT_TRUE(frame_dropped); + + // Enable the last layer. + bitrate_allocation.SetBitrate( + 2, 0, codec_settings_.spatialLayers[2].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // No drop allowed. + EXPECT_EQ(encoded_frames.size(), 3u); + + // Verify that frame-dropping is re-enabled back. + frame_dropped = false; + // Encode enough frames to force drop due to rate-control. + for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops; + ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_LE(encoded_frames.size(), 3u); + EXPECT_GE(encoded_frames.size(), 2u); + if (encoded_frames.size() == 2) { + frame_dropped = true; + // Dropped frame is on the SL0. + EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1); + EXPECT_EQ(encoded_frames[1].SpatialIndex(), 2); + } + } + EXPECT_TRUE(frame_dropped); +} + +TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) { + const size_t num_spatial_layers = 3; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + const size_t num_frames_to_encode_before_drop = 1; + // Chosen by hand, exactly 5 frames are dropped for input fps=30 and max + // framerate = 5. + const size_t num_dropped_frames = 5; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.spatialLayers[0].maxFramerate = 5.0; + // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to + // framerate capping we would still get back at least a middle layer. It + // simplifies the test. + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // All layers are enabled from the start. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode enough frames to force drop due to framerate capping. + for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + // Now the first layer should not have frames in it. + for (size_t frame_num = 0; frame_num < num_dropped_frames - 2; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is skipped. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1); + } + + // Disable the last layer. + bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Still expected to drop first layer. Last layer has to be disable also. + for (size_t frame_num = num_dropped_frames - 2; + frame_num < num_dropped_frames; ++frame_num) { + // Expect back one frame. + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is skipped. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1); + // No SS data on non-base spatial layer. + EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + } + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is not skipped now. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 0); + // SS data should be present. + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, DisableNewLayerInVideoDelaysSsInfoTillTL0) { + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 2; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kRealtimeVideo; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings_.VP9()->flexibleMode = false; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Enable all the layers. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 / + num_temporal_layers); + } + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + + // Encode one TL0 frame + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u); + + // Disable the last layer. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(num_spatial_layers - 1, tl_idx, 0); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Next is TL1 frame. The last layer is disabled immediately, but SS structure + // is not provided here. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u); + EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + + // Next is TL0 frame, which should have delayed SS structure. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u); + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + EXPECT_TRUE(codec_specific_info[0] + .codecSpecific.VP9.spatial_layer_resolution_present); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.num_spatial_layers, + num_spatial_layers - 1); +} + +TEST_F(TestVp9Impl, + LowLayerMarkedAsRefIfHighLayerNotEncodedAndInterLayerPredIsEnabled) { + ConfigureSvc(codec_settings_, 3); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_info)); + EXPECT_TRUE(codec_info.codecSpecific.VP9.ss_data_available); + EXPECT_FALSE(codec_info.codecSpecific.VP9.non_ref_for_inter_layer_pred); +} + +TEST_F(TestVp9Impl, ScalabilityStructureIsAvailableInFlexibleMode) { + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + EXPECT_TRUE(codec_specific_info.codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, Profile0PreferredPixelFormats) { + EXPECT_THAT(encoder_->GetEncoderInfo().preferred_pixel_formats, + testing::UnorderedElementsAre(VideoFrameBuffer::Type::kNV12, + VideoFrameBuffer::Type::kI420)); +} + +TEST_F(TestVp9Impl, EncoderInfoWithoutResolutionBitrateLimits) { + EXPECT_TRUE(encoder_->GetEncoderInfo().resolution_bitrate_limits.empty()); +} + +TEST_F(TestVp9Impl, EncoderInfoWithBitrateLimitsFromFieldTrial) { + test::ScopedFieldTrials field_trials( + "WebRTC-VP9-GetEncoderInfoOverride/" + "frame_size_pixels:123|456|789," + "min_start_bitrate_bps:11000|22000|33000," + "min_bitrate_bps:44000|55000|66000," + "max_bitrate_bps:77000|88000|99000/"); + SetUp(); + + EXPECT_THAT( + encoder_->GetEncoderInfo().resolution_bitrate_limits, + ::testing::ElementsAre( + VideoEncoder::ResolutionBitrateLimits{123, 11000, 44000, 77000}, + VideoEncoder::ResolutionBitrateLimits{456, 22000, 55000, 88000}, + VideoEncoder::ResolutionBitrateLimits{789, 33000, 66000, 99000})); +} + +TEST_F(TestVp9Impl, EncoderInfoFpsAllocation) { + const uint8_t kNumSpatialLayers = 3; + const uint8_t kNumTemporalLayers = 3; + + codec_settings_.maxFramerate = 30; + codec_settings_.VP9()->numberOfSpatialLayers = kNumSpatialLayers; + codec_settings_.VP9()->numberOfTemporalLayers = kNumTemporalLayers; + + for (uint8_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) { + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].active = true; + codec_settings_.spatialLayers[sl_idx].maxFramerate = + codec_settings_.maxFramerate; + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 4); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction); + expected_fps_allocation[1] = expected_fps_allocation[0]; + expected_fps_allocation[2] = expected_fps_allocation[0]; + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ElementsAreArray(expected_fps_allocation)); +} + +TEST_F(TestVp9Impl, EncoderInfoFpsAllocationFlexibleMode) { + const uint8_t kNumSpatialLayers = 3; + + codec_settings_.maxFramerate = 30; + codec_settings_.VP9()->numberOfSpatialLayers = kNumSpatialLayers; + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->flexibleMode = true; + + VideoEncoder::RateControlParameters rate_params; + for (uint8_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) { + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].active = true; + // Force different frame rates for different layers, to verify that total + // fraction is correct. + codec_settings_.spatialLayers[sl_idx].maxFramerate = + codec_settings_.maxFramerate / (kNumSpatialLayers - sl_idx); + rate_params.bitrate.SetBitrate(sl_idx, 0, + codec_settings_.startBitrate * 1000); + } + rate_params.bandwidth_allocation = + DataRate::BitsPerSec(rate_params.bitrate.get_sum_bps()); + rate_params.framerate_fps = codec_settings_.maxFramerate; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // No temporal layers allowed when spatial layers have different fps targets. + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 3); + expected_fps_allocation[1].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[2].push_back(EncoderInfo::kMaxFramerateFraction); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); + + // SetRates with current fps does not alter outcome. + encoder_->SetRates(rate_params); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); + + // Higher fps than the codec wants, should still not affect outcome. + rate_params.framerate_fps *= 2; + encoder_->SetRates(rate_params); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +class Vp9ImplWithLayeringTest + : public ::testing::TestWithParam<std::tuple<int, int, bool>> { + protected: + Vp9ImplWithLayeringTest() + : num_spatial_layers_(std::get<0>(GetParam())), + num_temporal_layers_(std::get<1>(GetParam())), + override_field_trials_(std::get<2>(GetParam()) + ? "WebRTC-Vp9ExternalRefCtrl/Enabled/" + : "") {} + + const uint8_t num_spatial_layers_; + const uint8_t num_temporal_layers_; + const test::ScopedFieldTrials override_field_trials_; +}; + +TEST_P(Vp9ImplWithLayeringTest, FlexibleMode) { + // In flexible mode encoder wrapper obtains actual list of references from + // encoder and writes it into RTP payload descriptor. Check that reference + // list in payload descriptor matches the predefined one, which is used + // in non-flexible mode. + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->flexibleMode = true; + codec_settings.SetFrameDropEnabled(false); + codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers_; + codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers_; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + GofInfoVP9 gof; + if (num_temporal_layers_ == 1) { + gof.SetGofInfoVP9(kTemporalStructureMode1); + } else if (num_temporal_layers_ == 2) { + gof.SetGofInfoVP9(kTemporalStructureMode2); + } else if (num_temporal_layers_ == 3) { + gof.SetGofInfoVP9(kTemporalStructureMode3); + } + + // Encode at least (num_frames_in_gof + 1) frames to verify references + // of non-key frame with gof_idx = 0. + int num_input_frames = gof.num_frames_in_gof + 1; + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(num_input_frames) + .SetResolution({kWidth, kHeight}) + .Encode(); + ASSERT_THAT(frames, SizeIs(num_input_frames * num_spatial_layers_)); + + for (size_t i = 0; i < frames.size(); ++i) { + const EncodedVideoFrameProducer::EncodedFrame& frame = frames[i]; + const size_t picture_idx = i / num_spatial_layers_; + const size_t gof_idx = picture_idx % gof.num_frames_in_gof; + + const CodecSpecificInfoVP9& vp9 = + frame.codec_specific_info.codecSpecific.VP9; + EXPECT_EQ(frame.encoded_image.SpatialIndex(), + num_spatial_layers_ == 1 + ? absl::nullopt + : absl::optional<int>(i % num_spatial_layers_)) + << "Frame " << i; + EXPECT_EQ(vp9.temporal_idx, num_temporal_layers_ == 1 + ? kNoTemporalIdx + : gof.temporal_idx[gof_idx]) + << "Frame " << i; + EXPECT_EQ(vp9.temporal_up_switch, gof.temporal_up_switch[gof_idx]) + << "Frame " << i; + if (picture_idx == 0) { + EXPECT_EQ(vp9.num_ref_pics, 0) << "Frame " << i; + } else { + EXPECT_THAT(rtc::MakeArrayView(vp9.p_diff, vp9.num_ref_pics), + UnorderedElementsAreArray(gof.pid_diff[gof_idx], + gof.num_ref_pics[gof_idx])) + << "Frame " << i; + } + } +} + +INSTANTIATE_TEST_SUITE_P(All, + Vp9ImplWithLayeringTest, + ::testing::Combine(::testing::Values(1, 2, 3), + ::testing::Values(1, 2, 3), + ::testing::Bool())); + +class TestVp9ImplFrameDropping : public TestVp9Impl { + protected: + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kVideoCodecVP9, codec_settings); + // We need to encode quite a lot of frames in this test. Use low resolution + // to reduce execution time. + codec_settings->width = 64; + codec_settings->height = 64; + codec_settings->mode = VideoCodecMode::kScreensharing; + } +}; + +TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) { + const size_t num_frames_to_encode = 100; + const float input_framerate_fps = 30.0; + const float video_duration_secs = num_frames_to_encode / input_framerate_fps; + const float expected_framerate_fps = 5.0f; + const float max_abs_framerate_error_fps = expected_framerate_fps * 0.1f; + + codec_settings_.maxFramerate = static_cast<uint32_t>(expected_framerate_fps); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoFrame input_frame = NextInputFrame(); + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + const size_t timestamp = input_frame.timestamp() + + kVideoPayloadTypeFrequency / input_framerate_fps; + input_frame.set_timestamp(static_cast<uint32_t>(timestamp)); + } + + const size_t num_encoded_frames = GetNumEncodedFrames(); + const float encoded_framerate_fps = num_encoded_frames / video_duration_secs; + EXPECT_NEAR(encoded_framerate_fps, expected_framerate_fps, + max_abs_framerate_error_fps); +} + +TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) { + // Assign different frame rate to spatial layers and check that result frame + // rate is close to the assigned one. + const uint8_t num_spatial_layers = 3; + const float input_framerate_fps = 30.0; + const size_t video_duration_secs = 3; + const size_t num_input_frames = video_duration_secs * input_framerate_fps; + + codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = true; + + VideoBitrateAllocation bitrate_allocation; + for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Frame rate increases from low to high layer. + const uint32_t framerate_fps = 10 * (sl_idx + 1); + + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].maxFramerate = framerate_fps; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].active = true; + + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + VideoFrame input_frame = NextInputFrame(); + for (size_t frame_num = 0; frame_num < num_input_frames; ++frame_num) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + const size_t timestamp = input_frame.timestamp() + + kVideoPayloadTypeFrequency / input_framerate_fps; + input_frame.set_timestamp(static_cast<uint32_t>(timestamp)); + } + + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_infos; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_infos)); + + std::vector<size_t> num_encoded_frames(num_spatial_layers, 0); + for (EncodedImage& encoded_frame : encoded_frames) { + ++num_encoded_frames[encoded_frame.SpatialIndex().value_or(0)]; + } + + for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + const float layer_target_framerate_fps = + codec_settings_.spatialLayers[sl_idx].maxFramerate; + const float layer_output_framerate_fps = + static_cast<float>(num_encoded_frames[sl_idx]) / video_duration_secs; + const float max_framerate_error_fps = layer_target_framerate_fps * 0.1f; + EXPECT_NEAR(layer_output_framerate_fps, layer_target_framerate_fps, + max_framerate_error_fps); + } +} + +class TestVp9ImplProfile2 : public TestVp9Impl { + protected: + void SetUp() override { + // Profile 2 might not be available on some platforms until + // https://bugs.chromium.org/p/webm/issues/detail?id=1544 is solved. + bool profile_2_is_supported = false; + for (const auto& codec : SupportedVP9Codecs()) { + if (ParseSdpForVP9Profile(codec.parameters) + .value_or(VP9Profile::kProfile0) == VP9Profile::kProfile2) { + profile_2_is_supported = true; + } + } + if (!profile_2_is_supported) + return; + + TestVp9Impl::SetUp(); + input_frame_generator_ = test::CreateSquareFrameGenerator( + codec_settings_.width, codec_settings_.height, + test::FrameGeneratorInterface::OutputType::kI010, + absl::optional<int>()); + } + + std::unique_ptr<VideoEncoder> CreateEncoder() override { + cricket::VideoCodec profile2_codec; + profile2_codec.SetParam(kVP9FmtpProfileId, + VP9ProfileToString(VP9Profile::kProfile2)); + return VP9Encoder::Create(profile2_codec); + } + + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return VP9Decoder::Create(); + } +}; + +TEST_F(TestVp9ImplProfile2, EncodeDecode) { + if (!encoder_) + return; + + VideoFrame input_frame = NextInputFrame(); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + + // TODO(emircan): Add PSNR for different color depths. + EXPECT_GT(I420PSNR(*input_frame.video_frame_buffer()->ToI420(), + *decoded_frame->video_frame_buffer()->ToI420()), + 31); +} + +TEST_F(TestVp9Impl, EncodeWithDynamicRate) { + // Configured dynamic rate field trial and re-create the encoder. + test::ScopedFieldTrials field_trials( + "WebRTC-VideoRateControl/vp9_dynamic_rate:true/"); + SetUp(); + + // Set 300kbps target with 100% headroom. + VideoEncoder::RateControlParameters params; + params.bandwidth_allocation = DataRate::BitsPerSec(300000); + params.bitrate.SetBitrate(0, 0, params.bandwidth_allocation.bps()); + params.framerate_fps = 30.0; + + encoder_->SetRates(params); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Set no headroom and encode again. + params.bandwidth_allocation = DataRate::Zero(); + encoder_->SetRates(params); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); +} + +TEST_F(TestVp9Impl, ReenablingUpperLayerAfterKFWithInterlayerPredIsEnabled) { + const size_t num_spatial_layers = 2; + const int num_frames_to_encode = 10; + codec_settings_.VP9()->flexibleMode = true; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + // Force low frame-rate, so all layers are present for all frames. + codec_settings_.maxFramerate = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific; + + for (int i = 0; i < num_frames_to_encode; ++i) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers); + } + + // Disable the last layer. + bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (int i = 0; i < num_frames_to_encode; ++i) { + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1); + } + + std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey}; + + // Force a key-frame with the last layer still disabled. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), &frame_types)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1); + ASSERT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameKey); + + // Re-enable the last layer. + bitrate_allocation.SetBitrate( + num_spatial_layers - 1, 0, + codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate * + 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers); + EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta); +} + +TEST_F(TestVp9Impl, HandlesEmptyDecoderConfigure) { + std::unique_ptr<VideoDecoder> decoder = CreateDecoder(); + // Check that default settings are ok for decoder. + EXPECT_TRUE(decoder->Configure({})); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release()); +} + +INSTANTIATE_TEST_SUITE_P( + TestVp9ImplForPixelFormat, + TestVp9ImplForPixelFormat, + ::testing::Values(test::FrameGeneratorInterface::OutputType::kI420, + test::FrameGeneratorInterface::OutputType::kNV12), + [](const auto& info) { + return test::FrameGeneratorInterface::OutputTypeToString(info.param); + }); + +// Helper function to populate an vpx_image_t instance with dimensions and +// potential image data. +std::function<vpx_image_t*(vpx_image_t*, + vpx_img_fmt_t, + unsigned int, + unsigned int, + unsigned int, + unsigned char* img_data)> +GetWrapImageFunction(vpx_image_t* img) { + return [img](vpx_image_t* /*img*/, vpx_img_fmt_t fmt, unsigned int d_w, + unsigned int d_h, unsigned int /*stride_align*/, + unsigned char* img_data) { + img->fmt = fmt; + img->d_w = d_w; + img->d_h = d_h; + img->img_data = img_data; + return img; + }; +} + +TEST(Vp9SpeedSettingsTrialsTest, NoSvcUsesGlobalSpeedFromTl0InLayerConfig) { + // TL0 speed 8 at >= 480x270, 5 if below that. + test::ExplicitKeyValueConfig trials( + "WebRTC-VP9-PerformanceFlags/" + "use_per_layer_speed," + "min_pixel_count:0|129600," + "base_layer_speed:4|8," + "high_layer_speed:5|9," + "deblock_mode:1|0/"); + + // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise + // passed on to LibvpxVp9Encoder. + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + absl::WrapUnique<LibvpxInterface>(vpx), trials); + + VideoCodec settings = DefaultCodecSettings(); + settings.width = 480; + settings.height = 270; + vpx_image_t img; + + ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img)); + ON_CALL(*vpx, codec_enc_config_default) + .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) { + memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t)); + }), + Return(VPX_CODEC_OK))); + EXPECT_CALL(*vpx, codec_control(_, _, An<int>())).Times(AnyNumber()); + + EXPECT_CALL(*vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS, + A<vpx_svc_extra_cfg_t*>())) + .Times(0); + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(8))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); + + encoder.Release(); + settings.width = 352; + settings.height = 216; + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(4))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); +} + +TEST(Vp9SpeedSettingsTrialsTest, + NoPerLayerFlagUsesGlobalSpeedFromTopLayerInConfig) { + // TL0 speed 8 at >= 480x270, 5 if below that. + test::ExplicitKeyValueConfig trials( + "WebRTC-VP9-PerformanceFlags/" + "min_pixel_count:0|129600," + "base_layer_speed:4|8," + "high_layer_speed:5|9," + "deblock_mode:1|0/"); + + // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise + // passed on to LibvpxVp9Encoder. + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + absl::WrapUnique<LibvpxInterface>(vpx), trials); + + VideoCodec settings = DefaultCodecSettings(); + settings.width = 480; + settings.height = 270; + ConfigureSvc(settings, 2, 3); + vpx_image_t img; + + ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img)); + ON_CALL(*vpx, codec_enc_config_default) + .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) { + memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t)); + }), + Return(VPX_CODEC_OK))); + EXPECT_CALL(*vpx, codec_control(_, _, An<int>())).Times(AnyNumber()); + + // Speed settings not populated when 'use_per_layer_speed' flag is absent. + EXPECT_CALL(*vpx, + codec_control( + _, VP9E_SET_SVC_PARAMETERS, + SafeMatcherCast<vpx_svc_extra_cfg_t*>(AllOf( + Field(&vpx_svc_extra_cfg_t::speed_per_layer, Each(0)), + Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, Each(0)))))) + .Times(2); + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(8))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); + + encoder.Release(); + settings.width = 476; + settings.height = 268; + settings.spatialLayers[0].width = settings.width / 2; + settings.spatialLayers[0].height = settings.height / 2; + settings.spatialLayers[1].width = settings.width; + settings.spatialLayers[1].height = settings.height; + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(4))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); +} + +TEST(Vp9SpeedSettingsTrialsTest, DefaultPerLayerFlagsWithSvc) { + // Per-temporal and spatial layer speed settings: + // SL0: TL0 = speed 5, TL1/TL2 = speed 8. + // SL1/2: TL0 = speed 7, TL1/TL2 = speed 8. + // Deblocking-mode per spatial layer: + // SL0: mode 1, SL1/2: mode 0. + test::ExplicitKeyValueConfig trials( + "WebRTC-VP9-PerformanceFlags/" + "use_per_layer_speed," + "min_pixel_count:0|129600," + "base_layer_speed:5|7," + "high_layer_speed:8|8," + "deblock_mode:1|0/"); + + // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise + // passed on to LibvpxVp9Encoder. + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + absl::WrapUnique<LibvpxInterface>(vpx), trials); + + VideoCodec settings = DefaultCodecSettings(); + constexpr int kNumSpatialLayers = 3; + constexpr int kNumTemporalLayers = 3; + ConfigureSvc(settings, kNumSpatialLayers, kNumTemporalLayers); + VideoBitrateAllocation bitrate_allocation; + for (int si = 0; si < kNumSpatialLayers; ++si) { + for (int ti = 0; ti < kNumTemporalLayers; ++ti) { + uint32_t bitrate_bps = + settings.spatialLayers[si].targetBitrate * 1'000 / kNumTemporalLayers; + bitrate_allocation.SetBitrate(si, ti, bitrate_bps); + } + } + vpx_image_t img; + + // Speed settings per spatial layer, for TL0. + const int kBaseTlSpeed[VPX_MAX_LAYERS] = {5, 7, 7}; + // Speed settings per spatial layer, for TL1, TL2. + const int kHighTlSpeed[VPX_MAX_LAYERS] = {8, 8, 8}; + // Loopfilter settings are handled within libvpx, so this array is valid for + // both TL0 and higher. + const int kLoopFilter[VPX_MAX_LAYERS] = {1, 0, 0}; + + ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img)); + ON_CALL(*vpx, codec_enc_init) + .WillByDefault(WithArg<0>([](vpx_codec_ctx_t* ctx) { + memset(ctx, 0, sizeof(*ctx)); + return VPX_CODEC_OK; + })); + ON_CALL(*vpx, codec_enc_config_default) + .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) { + memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t)); + }), + Return(VPX_CODEC_OK))); + EXPECT_CALL( + *vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS, + SafeMatcherCast<vpx_svc_extra_cfg_t*>( + AllOf(Field(&vpx_svc_extra_cfg_t::speed_per_layer, + ElementsAreArray(kBaseTlSpeed)), + Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, + ElementsAreArray(kLoopFilter)))))); + + // Capture the callback into the vp9 wrapper. + vpx_codec_priv_output_cx_pkt_cb_pair_t callback_pointer = {}; + EXPECT_CALL(*vpx, codec_control(_, VP9E_REGISTER_CX_CALLBACK, A<void*>())) + .WillOnce(WithArg<2>([&](void* cbp) { + callback_pointer = + *reinterpret_cast<vpx_codec_priv_output_cx_pkt_cb_pair_t*>(cbp); + return VPX_CODEC_OK; + })); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); + + encoder.SetRates(VideoEncoder::RateControlParameters(bitrate_allocation, + settings.maxFramerate)); + + MockEncodedImageCallback callback; + encoder.RegisterEncodeCompleteCallback(&callback); + auto frame_generator = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, 10); + Mock::VerifyAndClearExpectations(vpx); + + uint8_t data[1] = {0}; + vpx_codec_cx_pkt encoded_data = {}; + encoded_data.data.frame.buf = &data; + encoded_data.data.frame.sz = 1; + + const auto kImageOk = + EncodedImageCallback::Result(EncodedImageCallback::Result::OK); + + int spatial_id = 0; + int temporal_id = 0; + EXPECT_CALL(*vpx, + codec_control(_, VP9E_SET_SVC_LAYER_ID, A<vpx_svc_layer_id_t*>())) + .Times(AnyNumber()); + EXPECT_CALL(*vpx, + codec_control(_, VP9E_GET_SVC_LAYER_ID, A<vpx_svc_layer_id_t*>())) + .WillRepeatedly(WithArg<2>([&](vpx_svc_layer_id_t* layer_id) { + layer_id->spatial_layer_id = spatial_id; + layer_id->temporal_layer_id = temporal_id; + return VPX_CODEC_OK; + })); + vpx_svc_ref_frame_config_t stored_refs = {}; + ON_CALL(*vpx, codec_control(_, VP9E_SET_SVC_REF_FRAME_CONFIG, + A<vpx_svc_ref_frame_config_t*>())) + .WillByDefault( + DoAll(SaveArgPointee<2>(&stored_refs), Return(VPX_CODEC_OK))); + ON_CALL(*vpx, codec_control(_, VP9E_GET_SVC_REF_FRAME_CONFIG, + A<vpx_svc_ref_frame_config_t*>())) + .WillByDefault( + DoAll(SetArgPointee<2>(ByRef(stored_refs)), Return(VPX_CODEC_OK))); + + // First frame is keyframe. + encoded_data.data.frame.flags = VPX_FRAME_IS_KEY; + + // Default 3-layer temporal pattern: 0-2-1-2, then repeat and do two more. + for (int ti : {0, 2, 1, 2, 0, 2}) { + EXPECT_CALL(*vpx, codec_encode).WillOnce(Return(VPX_CODEC_OK)); + // No update expected if flags haven't changed, and they change we we move + // between base temporal layer and non-base temporal layer. + if ((ti > 0) != (temporal_id > 0)) { + EXPECT_CALL(*vpx, codec_control( + _, VP9E_SET_SVC_PARAMETERS, + SafeMatcherCast<vpx_svc_extra_cfg_t*>(AllOf( + Field(&vpx_svc_extra_cfg_t::speed_per_layer, + ElementsAreArray(ti == 0 ? kBaseTlSpeed + : kHighTlSpeed)), + Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, + ElementsAreArray(kLoopFilter)))))); + } else { + EXPECT_CALL(*vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS, + A<vpx_svc_extra_cfg_t*>())) + .Times(0); + } + + VideoFrame frame = + VideoFrame::Builder() + .set_video_frame_buffer(frame_generator->NextFrame().buffer) + .build(); + encoder.Encode(frame, nullptr); + + temporal_id = ti; + for (int si = 0; si < kNumSpatialLayers; ++si) { + spatial_id = si; + + EXPECT_CALL(callback, OnEncodedImage).WillOnce(Return(kImageOk)); + callback_pointer.output_cx_pkt(&encoded_data, callback_pointer.user_priv); + } + + encoded_data.data.frame.flags = 0; // Following frames are delta frames. + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc new file mode 100644 index 0000000000..222e57b6ba --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp9/include/vp9.h" + +#include <memory> + +#include "absl/container/inlined_vector.h" +#include "api/transport/field_trial_based_config.h" +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/vp9_profile.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "rtc_base/checks.h" +#include "vpx/vp8cx.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_codec.h" + +namespace webrtc { + +std::vector<SdpVideoFormat> SupportedVP9Codecs(bool add_scalability_modes) { +#ifdef RTC_ENABLE_VP9 + // Profile 2 might not be available on some platforms until + // https://bugs.chromium.org/p/webm/issues/detail?id=1544 is solved. + static bool vpx_supports_high_bit_depth = + (vpx_codec_get_caps(vpx_codec_vp9_cx()) & VPX_CODEC_CAP_HIGHBITDEPTH) != + 0 && + (vpx_codec_get_caps(vpx_codec_vp9_dx()) & VPX_CODEC_CAP_HIGHBITDEPTH) != + 0; + + absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> scalability_modes; + if (add_scalability_modes) { + for (const auto scalability_mode : kAllScalabilityModes) { + if (ScalabilityStructureConfig(scalability_mode).has_value()) { + scalability_modes.push_back(scalability_mode); + } + } + } + std::vector<SdpVideoFormat> supported_formats{SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile0)}}, + scalability_modes)}; + if (vpx_supports_high_bit_depth) { + supported_formats.push_back(SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile2)}}, + scalability_modes)); + } + + return supported_formats; +#else + return std::vector<SdpVideoFormat>(); +#endif +} + +std::vector<SdpVideoFormat> SupportedVP9DecoderCodecs() { +#ifdef RTC_ENABLE_VP9 + std::vector<SdpVideoFormat> supported_formats = SupportedVP9Codecs(); + // The WebRTC internal decoder supports VP9 profile 1 and 3. However, there's + // currently no way of sending VP9 profile 1 or 3 using the internal encoder. + // It would require extended support for I444, I422, and I440 buffers. + supported_formats.push_back(SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile1)}})); + supported_formats.push_back(SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile3)}})); + return supported_formats; +#else + return std::vector<SdpVideoFormat>(); +#endif +} + +std::unique_ptr<VP9Encoder> VP9Encoder::Create() { +#ifdef RTC_ENABLE_VP9 + return std::make_unique<LibvpxVp9Encoder>(cricket::VideoCodec(), + LibvpxInterface::Create(), + FieldTrialBasedConfig()); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +std::unique_ptr<VP9Encoder> VP9Encoder::Create( + const cricket::VideoCodec& codec) { +#ifdef RTC_ENABLE_VP9 + return std::make_unique<LibvpxVp9Encoder>(codec, LibvpxInterface::Create(), + FieldTrialBasedConfig()); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +bool VP9Encoder::SupportsScalabilityMode(ScalabilityMode scalability_mode) { + return ScalabilityStructureConfig(scalability_mode).has_value(); +} + +std::unique_ptr<VP9Decoder> VP9Decoder::Create() { +#ifdef RTC_ENABLE_VP9 + return std::make_unique<LibvpxVp9Decoder>(); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc new file mode 100644 index 0000000000..181550ce91 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifdef RTC_ENABLE_VP9 + +#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "vpx/vpx_codec.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vpx_frame_buffer.h" + +namespace webrtc { + +uint8_t* Vp9FrameBufferPool::Vp9FrameBuffer::GetData() { + return data_.data<uint8_t>(); +} + +size_t Vp9FrameBufferPool::Vp9FrameBuffer::GetDataSize() const { + return data_.size(); +} + +void Vp9FrameBufferPool::Vp9FrameBuffer::SetSize(size_t size) { + data_.SetSize(size); +} + +bool Vp9FrameBufferPool::InitializeVpxUsePool( + vpx_codec_ctx* vpx_codec_context) { + RTC_DCHECK(vpx_codec_context); + // Tell libvpx to use this pool. + if (vpx_codec_set_frame_buffer_functions( + // In which context to use these callback functions. + vpx_codec_context, + // Called by libvpx when it needs another frame buffer. + &Vp9FrameBufferPool::VpxGetFrameBuffer, + // Called by libvpx when it no longer uses a frame buffer. + &Vp9FrameBufferPool::VpxReleaseFrameBuffer, + // `this` will be passed as `user_priv` to VpxGetFrameBuffer. + this)) { + // Failed to configure libvpx to use Vp9FrameBufferPool. + return false; + } + return true; +} + +rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> +Vp9FrameBufferPool::GetFrameBuffer(size_t min_size) { + RTC_DCHECK_GT(min_size, 0); + rtc::scoped_refptr<Vp9FrameBuffer> available_buffer = nullptr; + { + MutexLock lock(&buffers_lock_); + // Do we have a buffer we can recycle? + for (const auto& buffer : allocated_buffers_) { + if (buffer->HasOneRef()) { + available_buffer = buffer; + break; + } + } + // Otherwise create one. + if (available_buffer == nullptr) { + available_buffer = new Vp9FrameBuffer(); + allocated_buffers_.push_back(available_buffer); + if (allocated_buffers_.size() > max_num_buffers_) { + RTC_LOG(LS_WARNING) + << allocated_buffers_.size() + << " Vp9FrameBuffers have been " + "allocated by a Vp9FrameBufferPool (exceeding what is " + "considered reasonable, " + << max_num_buffers_ << ")."; + + // TODO(phoglund): this limit is being hit in tests since Oct 5 2016. + // See https://bugs.chromium.org/p/webrtc/issues/detail?id=6484. + // RTC_DCHECK_NOTREACHED(); + } + } + } + + available_buffer->SetSize(min_size); + return available_buffer; +} + +int Vp9FrameBufferPool::GetNumBuffersInUse() const { + int num_buffers_in_use = 0; + MutexLock lock(&buffers_lock_); + for (const auto& buffer : allocated_buffers_) { + if (!buffer->HasOneRef()) + ++num_buffers_in_use; + } + return num_buffers_in_use; +} + +bool Vp9FrameBufferPool::Resize(size_t max_number_of_buffers) { + MutexLock lock(&buffers_lock_); + size_t used_buffers_count = 0; + for (const auto& buffer : allocated_buffers_) { + // If the buffer is in use, the ref count will be >= 2, one from the list we + // are looping over and one from the application. If the ref count is 1, + // then the list we are looping over holds the only reference and it's safe + // to reuse. + if (!buffer->HasOneRef()) { + used_buffers_count++; + } + } + if (used_buffers_count > max_number_of_buffers) { + return false; + } + max_num_buffers_ = max_number_of_buffers; + + size_t buffers_to_purge = allocated_buffers_.size() - max_num_buffers_; + auto iter = allocated_buffers_.begin(); + while (iter != allocated_buffers_.end() && buffers_to_purge > 0) { + if ((*iter)->HasOneRef()) { + iter = allocated_buffers_.erase(iter); + buffers_to_purge--; + } else { + ++iter; + } + } + return true; +} + +void Vp9FrameBufferPool::ClearPool() { + MutexLock lock(&buffers_lock_); + allocated_buffers_.clear(); +} + +// static +int32_t Vp9FrameBufferPool::VpxGetFrameBuffer(void* user_priv, + size_t min_size, + vpx_codec_frame_buffer* fb) { + RTC_DCHECK(user_priv); + RTC_DCHECK(fb); + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Limit size of 8k YUV highdef frame + size_t size_limit = 7680 * 4320 * 3 / 2 * 2; + if (min_size > size_limit) + return -1; +#endif + + Vp9FrameBufferPool* pool = static_cast<Vp9FrameBufferPool*>(user_priv); + + rtc::scoped_refptr<Vp9FrameBuffer> buffer = pool->GetFrameBuffer(min_size); + fb->data = buffer->GetData(); + fb->size = buffer->GetDataSize(); + // Store Vp9FrameBuffer* in `priv` for use in VpxReleaseFrameBuffer. + // This also makes vpx_codec_get_frame return images with their `fb_priv` set + // to `buffer` which is important for external reference counting. + // Release from refptr so that the buffer's `ref_count_` remains 1 when + // `buffer` goes out of scope. + fb->priv = static_cast<void*>(buffer.release()); + return 0; +} + +// static +int32_t Vp9FrameBufferPool::VpxReleaseFrameBuffer(void* user_priv, + vpx_codec_frame_buffer* fb) { + RTC_DCHECK(user_priv); + RTC_DCHECK(fb); + Vp9FrameBuffer* buffer = static_cast<Vp9FrameBuffer*>(fb->priv); + if (buffer != nullptr) { + buffer->Release(); + // When libvpx fails to decode and you continue to try to decode (and fail) + // libvpx can for some reason try to release the same buffer multiple times. + // Setting `priv` to null protects against trying to Release multiple times. + fb->priv = nullptr; + } + return 0; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h new file mode 100644 index 0000000000..f46f1b7ea2 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_ + +#ifdef RTC_ENABLE_VP9 + +#include <vector> + +#include "api/ref_counted_base.h" +#include "api/scoped_refptr.h" +#include "rtc_base/buffer.h" +#include "rtc_base/synchronization/mutex.h" + +struct vpx_codec_ctx; +struct vpx_codec_frame_buffer; + +namespace webrtc { + +// If more buffers than this are allocated we print warnings and crash if in +// debug mode. VP9 is defined to have 8 reference buffers, of which 3 can be +// referenced by any frame, see +// https://tools.ietf.org/html/draft-grange-vp9-bitstream-00#section-2.2.2. +// Assuming VP9 holds on to at most 8 buffers, any more buffers than that +// would have to be by application code. Decoded frames should not be +// referenced for longer than necessary. If we allow ~60 additional buffers +// then the application has ~1 second to e.g. render each frame of a 60 fps +// video. +constexpr size_t kDefaultMaxNumBuffers = 68; + +// This memory pool is used to serve buffers to libvpx for decoding purposes in +// VP9, which is set up in InitializeVPXUsePool. After the initialization any +// time libvpx wants to decode a frame it will use buffers provided and released +// through VpxGetFrameBuffer and VpxReleaseFrameBuffer. +// The benefit of owning the pool that libvpx relies on for decoding is that the +// decoded frames returned by libvpx (from vpx_codec_get_frame) use parts of our +// buffers for the decoded image data. By retaining ownership of this buffer +// using scoped_refptr, the image buffer can be reused by VideoFrames and no +// frame copy has to occur during decoding and frame delivery. +// +// Pseudo example usage case: +// Vp9FrameBufferPool pool; +// pool.InitializeVpxUsePool(decoder_ctx); +// ... +// +// // During decoding, libvpx will get and release buffers from the pool. +// vpx_codec_decode(decoder_ctx, ...); +// +// vpx_image_t* img = vpx_codec_get_frame(decoder_ctx, &iter); +// // Important to use scoped_refptr to protect it against being recycled by +// // the pool. +// scoped_refptr<Vp9FrameBuffer> img_buffer = (Vp9FrameBuffer*)img->fb_priv; +// ... +// +// // Destroying the codec will make libvpx release any buffers it was using. +// vpx_codec_destroy(decoder_ctx); +class Vp9FrameBufferPool { + public: + class Vp9FrameBuffer final + : public rtc::RefCountedNonVirtual<Vp9FrameBuffer> { + public: + uint8_t* GetData(); + size_t GetDataSize() const; + void SetSize(size_t size); + + using rtc::RefCountedNonVirtual<Vp9FrameBuffer>::HasOneRef; + + private: + // Data as an easily resizable buffer. + rtc::Buffer data_; + }; + + // Configures libvpx to, in the specified context, use this memory pool for + // buffers used to decompress frames. This is only supported for VP9. + bool InitializeVpxUsePool(vpx_codec_ctx* vpx_codec_context); + + // Gets a frame buffer of at least `min_size`, recycling an available one or + // creating a new one. When no longer referenced from the outside the buffer + // becomes recyclable. + rtc::scoped_refptr<Vp9FrameBuffer> GetFrameBuffer(size_t min_size); + // Gets the number of buffers currently in use (not ready to be recycled). + int GetNumBuffersInUse() const; + // Changes the max amount of buffers in the pool to the new value. + // Returns true if change was successful and false if the amount of already + // allocated buffers is bigger than new value. + bool Resize(size_t max_number_of_buffers); + // Releases allocated buffers, deleting available buffers. Buffers in use are + // not deleted until they are no longer referenced. + void ClearPool(); + + // InitializeVpxUsePool configures libvpx to call this function when it needs + // a new frame buffer. Parameters: + // `user_priv` Private data passed to libvpx, InitializeVpxUsePool sets it up + // to be a pointer to the pool. + // `min_size` Minimum size needed by libvpx (to decompress a frame). + // `fb` Pointer to the libvpx frame buffer object, this is updated to + // use the pool's buffer. + // Returns 0 on success. Returns < 0 on failure. + static int32_t VpxGetFrameBuffer(void* user_priv, + size_t min_size, + vpx_codec_frame_buffer* fb); + + // InitializeVpxUsePool configures libvpx to call this function when it has + // finished using one of the pool's frame buffer. Parameters: + // `user_priv` Private data passed to libvpx, InitializeVpxUsePool sets it up + // to be a pointer to the pool. + // `fb` Pointer to the libvpx frame buffer object, its `priv` will be + // a pointer to one of the pool's Vp9FrameBuffer. + static int32_t VpxReleaseFrameBuffer(void* user_priv, + vpx_codec_frame_buffer* fb); + + private: + // Protects `allocated_buffers_`. + mutable Mutex buffers_lock_; + // All buffers, in use or ready to be recycled. + std::vector<rtc::scoped_refptr<Vp9FrameBuffer>> allocated_buffers_ + RTC_GUARDED_BY(buffers_lock_); + size_t max_num_buffers_ = kDefaultMaxNumBuffers; +}; + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_ |