summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/video_coding/codecs/vp9
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/libwebrtc/modules/video_coding/codecs/vp9
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/video_coding/codecs/vp9')
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS3
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h54
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h179
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc403
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h60
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc2194
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h251
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc240
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h39
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc285
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc2446
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc118
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc182
-rw-r--r--third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h134
14 files changed, 6588 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS b/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS
new file mode 100644
index 0000000000..cc5cd70142
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+ "+media/base",
+]
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h
new file mode 100644
index 0000000000..79d403ded3
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_
+#define MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_
+
+#include <memory>
+#include <vector>
+
+#include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/sdp_video_format.h"
+#include "media/base/codec.h"
+#include "modules/video_coding/include/video_codec_interface.h"
+
+namespace webrtc {
+
+// Returns a vector with all supported internal VP9 profiles that we can
+// negotiate in SDP, in order of preference.
+std::vector<SdpVideoFormat> SupportedVP9Codecs(
+ bool add_scalability_modes = false);
+
+// Returns a vector with all supported internal VP9 decode profiles in order of
+// preference. These will be availble for receive-only connections.
+std::vector<SdpVideoFormat> SupportedVP9DecoderCodecs();
+
+class VP9Encoder : public VideoEncoder {
+ public:
+ // Deprecated. Returns default implementation using VP9 Profile 0.
+ // TODO(emircan): Remove once this is no longer used.
+ static std::unique_ptr<VP9Encoder> Create();
+ // Parses VP9 Profile from `codec` and returns the appropriate implementation.
+ static std::unique_ptr<VP9Encoder> Create(const cricket::VideoCodec& codec);
+ static bool SupportsScalabilityMode(ScalabilityMode scalability_mode);
+
+ ~VP9Encoder() override {}
+};
+
+class VP9Decoder : public VideoDecoder {
+ public:
+ static std::unique_ptr<VP9Decoder> Create();
+
+ ~VP9Decoder() override {}
+};
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h
new file mode 100644
index 0000000000..f67215ec77
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file contains codec dependent definitions that are needed in
+// order to compile the WebRTC codebase, even if this codec is not used.
+
+#ifndef MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_
+#define MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_
+
+#include <stdint.h>
+
+#include "modules/video_coding/codecs/interface/common_constants.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits
+const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits
+const uint8_t kNoSpatialIdx = 0xFF;
+const uint8_t kNoGofIdx = 0xFF;
+const uint8_t kNumVp9Buffers = 8;
+const size_t kMaxVp9RefPics = 3;
+const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits
+const size_t kMaxVp9NumberOfSpatialLayers = 8;
+
+const size_t kMinVp9SpatialLayerLongSideLength = 240;
+const size_t kMinVp9SpatialLayerShortSideLength = 135;
+
+enum TemporalStructureMode {
+ kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP...
+ kTemporalStructureMode2, // 2 temporal layers 01...
+ kTemporalStructureMode3, // 3 temporal layers 0212...
+};
+
+struct GofInfoVP9 {
+ void SetGofInfoVP9(TemporalStructureMode tm) {
+ switch (tm) {
+ case kTemporalStructureMode1:
+ num_frames_in_gof = 1;
+ temporal_idx[0] = 0;
+ temporal_up_switch[0] = true;
+ num_ref_pics[0] = 1;
+ pid_diff[0][0] = 1;
+ break;
+ case kTemporalStructureMode2:
+ num_frames_in_gof = 2;
+ temporal_idx[0] = 0;
+ temporal_up_switch[0] = true;
+ num_ref_pics[0] = 1;
+ pid_diff[0][0] = 2;
+
+ temporal_idx[1] = 1;
+ temporal_up_switch[1] = true;
+ num_ref_pics[1] = 1;
+ pid_diff[1][0] = 1;
+ break;
+ case kTemporalStructureMode3:
+ num_frames_in_gof = 4;
+ temporal_idx[0] = 0;
+ temporal_up_switch[0] = true;
+ num_ref_pics[0] = 1;
+ pid_diff[0][0] = 4;
+
+ temporal_idx[1] = 2;
+ temporal_up_switch[1] = true;
+ num_ref_pics[1] = 1;
+ pid_diff[1][0] = 1;
+
+ temporal_idx[2] = 1;
+ temporal_up_switch[2] = true;
+ num_ref_pics[2] = 1;
+ pid_diff[2][0] = 2;
+
+ temporal_idx[3] = 2;
+ temporal_up_switch[3] = true;
+ num_ref_pics[3] = 1;
+ pid_diff[3][0] = 1;
+ break;
+ default:
+ RTC_DCHECK_NOTREACHED();
+ }
+ }
+
+ void CopyGofInfoVP9(const GofInfoVP9& src) {
+ num_frames_in_gof = src.num_frames_in_gof;
+ for (size_t i = 0; i < num_frames_in_gof; ++i) {
+ temporal_idx[i] = src.temporal_idx[i];
+ temporal_up_switch[i] = src.temporal_up_switch[i];
+ num_ref_pics[i] = src.num_ref_pics[i];
+ for (uint8_t r = 0; r < num_ref_pics[i]; ++r) {
+ pid_diff[i][r] = src.pid_diff[i][r];
+ }
+ }
+ }
+
+ size_t num_frames_in_gof;
+ uint8_t temporal_idx[kMaxVp9FramesInGof];
+ bool temporal_up_switch[kMaxVp9FramesInGof];
+ uint8_t num_ref_pics[kMaxVp9FramesInGof];
+ uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
+ uint16_t pid_start;
+};
+
+struct RTPVideoHeaderVP9 {
+ void InitRTPVideoHeaderVP9() {
+ inter_pic_predicted = false;
+ flexible_mode = false;
+ beginning_of_frame = false;
+ end_of_frame = false;
+ ss_data_available = false;
+ non_ref_for_inter_layer_pred = false;
+ picture_id = kNoPictureId;
+ max_picture_id = kMaxTwoBytePictureId;
+ tl0_pic_idx = kNoTl0PicIdx;
+ temporal_idx = kNoTemporalIdx;
+ spatial_idx = kNoSpatialIdx;
+ temporal_up_switch = false;
+ inter_layer_predicted = false;
+ gof_idx = kNoGofIdx;
+ num_ref_pics = 0;
+ num_spatial_layers = 1;
+ first_active_layer = 0;
+ end_of_picture = true;
+ }
+
+ bool inter_pic_predicted; // This layer frame is dependent on previously
+ // coded frame(s).
+ bool flexible_mode; // This frame is in flexible mode.
+ bool beginning_of_frame; // True if this packet is the first in a VP9 layer
+ // frame.
+ bool end_of_frame; // True if this packet is the last in a VP9 layer frame.
+ bool ss_data_available; // True if SS data is available in this payload
+ // descriptor.
+ bool non_ref_for_inter_layer_pred; // True for frame which is not used as
+ // reference for inter-layer prediction.
+ int16_t picture_id; // PictureID index, 15 bits;
+ // kNoPictureId if PictureID does not exist.
+ int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF;
+ int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits;
+ // kNoTl0PicIdx means no value provided.
+ uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx.
+ uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx.
+ bool temporal_up_switch; // True if upswitch to higher frame rate is possible
+ // meaning subsequent higher temporal layer pictures
+ // will not depend on any picture before the current
+ // picture (in coding order) with temporal layer ID
+ // greater than `temporal_idx` of this frame.
+ bool inter_layer_predicted; // Frame is dependent on directly lower spatial
+ // layer frame.
+
+ uint8_t gof_idx; // Index to predefined temporal frame info in SS data.
+
+ uint8_t num_ref_pics; // Number of reference pictures used by this layer
+ // frame.
+ uint8_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID
+ // of the reference pictures.
+ int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures.
+
+ // SS data.
+ size_t num_spatial_layers; // Always populated.
+ size_t first_active_layer; // Not sent on wire, used to adjust ss data.
+ bool spatial_layer_resolution_present;
+ uint16_t width[kMaxVp9NumberOfSpatialLayers];
+ uint16_t height[kMaxVp9NumberOfSpatialLayers];
+ GofInfoVP9 gof;
+
+ bool end_of_picture; // This frame is the last frame in picture.
+};
+
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc
new file mode 100644
index 0000000000..a981f259cf
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc
@@ -0,0 +1,403 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifdef RTC_ENABLE_VP9
+
+#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h"
+
+#include <algorithm>
+
+#include "absl/strings/match.h"
+#include "api/transport/field_trial_based_config.h"
+#include "api/video/color_space.h"
+#include "api/video/i010_buffer.h"
+#include "common_video/include/video_frame_buffer.h"
+#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "third_party/libyuv/include/libyuv/convert.h"
+#include "vpx/vp8dx.h"
+#include "vpx/vpx_decoder.h"
+
+namespace webrtc {
+namespace {
+
+// Helper class for extracting VP9 colorspace.
+ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
+ vpx_color_range_t range_t,
+ unsigned int bit_depth) {
+ ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified;
+ ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified;
+ ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified;
+ switch (space_t) {
+ case VPX_CS_BT_601:
+ case VPX_CS_SMPTE_170:
+ primaries = ColorSpace::PrimaryID::kSMPTE170M;
+ transfer = ColorSpace::TransferID::kSMPTE170M;
+ matrix = ColorSpace::MatrixID::kSMPTE170M;
+ break;
+ case VPX_CS_SMPTE_240:
+ primaries = ColorSpace::PrimaryID::kSMPTE240M;
+ transfer = ColorSpace::TransferID::kSMPTE240M;
+ matrix = ColorSpace::MatrixID::kSMPTE240M;
+ break;
+ case VPX_CS_BT_709:
+ primaries = ColorSpace::PrimaryID::kBT709;
+ transfer = ColorSpace::TransferID::kBT709;
+ matrix = ColorSpace::MatrixID::kBT709;
+ break;
+ case VPX_CS_BT_2020:
+ primaries = ColorSpace::PrimaryID::kBT2020;
+ switch (bit_depth) {
+ case 8:
+ transfer = ColorSpace::TransferID::kBT709;
+ break;
+ case 10:
+ transfer = ColorSpace::TransferID::kBT2020_10;
+ break;
+ default:
+ RTC_DCHECK_NOTREACHED();
+ break;
+ }
+ matrix = ColorSpace::MatrixID::kBT2020_NCL;
+ break;
+ case VPX_CS_SRGB:
+ primaries = ColorSpace::PrimaryID::kBT709;
+ transfer = ColorSpace::TransferID::kIEC61966_2_1;
+ matrix = ColorSpace::MatrixID::kBT709;
+ break;
+ default:
+ break;
+ }
+
+ ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid;
+ switch (range_t) {
+ case VPX_CR_STUDIO_RANGE:
+ range = ColorSpace::RangeID::kLimited;
+ break;
+ case VPX_CR_FULL_RANGE:
+ range = ColorSpace::RangeID::kFull;
+ break;
+ default:
+ break;
+ }
+ return ColorSpace(primaries, transfer, matrix, range);
+}
+
+} // namespace
+
+LibvpxVp9Decoder::LibvpxVp9Decoder()
+ : decode_complete_callback_(nullptr),
+ inited_(false),
+ decoder_(nullptr),
+ key_frame_required_(true) {}
+
+LibvpxVp9Decoder::~LibvpxVp9Decoder() {
+ inited_ = true; // in order to do the actual release
+ Release();
+ int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse();
+ if (num_buffers_in_use > 0) {
+ // The frame buffers are reference counted and frames are exposed after
+ // decoding. There may be valid usage cases where previous frames are still
+ // referenced after ~LibvpxVp9Decoder that is not a leak.
+ RTC_LOG(LS_INFO) << num_buffers_in_use
+ << " Vp9FrameBuffers are still "
+ "referenced during ~LibvpxVp9Decoder.";
+ }
+}
+
+bool LibvpxVp9Decoder::Configure(const Settings& settings) {
+ if (Release() < 0) {
+ return false;
+ }
+
+ if (decoder_ == nullptr) {
+ decoder_ = new vpx_codec_ctx_t;
+ memset(decoder_, 0, sizeof(*decoder_));
+ }
+ vpx_codec_dec_cfg_t cfg;
+ memset(&cfg, 0, sizeof(cfg));
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // We focus on webrtc fuzzing here, not libvpx itself. Use single thread for
+ // fuzzing, because:
+ // - libvpx's VP9 single thread decoder is more fuzzer friendly. It detects
+ // errors earlier than the multi-threads version.
+ // - Make peak CPU usage under control (not depending on input)
+ cfg.threads = 1;
+#else
+ const RenderResolution& resolution = settings.max_render_resolution();
+ if (!resolution.Valid()) {
+ // Postpone configuring number of threads until resolution is known.
+ cfg.threads = 1;
+ } else {
+ // We want to use multithreading when decoding high resolution videos. But
+ // not too many in order to avoid overhead when many stream are decoded
+ // concurrently.
+ // Set 2 thread as target for 1280x720 pixel count, and then scale up
+ // linearly from there - but cap at physical core count.
+ // For common resolutions this results in:
+ // 1 for 360p
+ // 2 for 720p
+ // 4 for 1080p
+ // 8 for 1440p
+ // 18 for 4K
+ int num_threads = std::max(
+ 1, 2 * resolution.Width() * resolution.Height() / (1280 * 720));
+ cfg.threads = std::min(settings.number_of_cores(), num_threads);
+ }
+#endif
+
+ current_settings_ = settings;
+
+ vpx_codec_flags_t flags = 0;
+ if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
+ return false;
+ }
+
+ if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) {
+ return false;
+ }
+
+ inited_ = true;
+ // Always start with a complete key frame.
+ key_frame_required_ = true;
+ if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) {
+ if (!libvpx_buffer_pool_.Resize(*buffer_pool_size)) {
+ return false;
+ }
+ }
+
+ vpx_codec_err_t status =
+ vpx_codec_control(decoder_, VP9D_SET_LOOP_FILTER_OPT, 1);
+ if (status != VPX_CODEC_OK) {
+ RTC_LOG(LS_ERROR) << "Failed to enable VP9D_SET_LOOP_FILTER_OPT. "
+ << vpx_codec_error(decoder_);
+ return false;
+ }
+
+ return true;
+}
+
+int LibvpxVp9Decoder::Decode(const EncodedImage& input_image,
+ bool missing_frames,
+ int64_t /*render_time_ms*/) {
+ if (!inited_) {
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+ if (decode_complete_callback_ == nullptr) {
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+
+ if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
+ absl::optional<Vp9UncompressedHeader> frame_info =
+ ParseUncompressedVp9Header(
+ rtc::MakeArrayView(input_image.data(), input_image.size()));
+ if (frame_info) {
+ RenderResolution frame_resolution(frame_info->frame_width,
+ frame_info->frame_height);
+ if (frame_resolution != current_settings_.max_render_resolution()) {
+ // Resolution has changed, tear down and re-init a new decoder in
+ // order to get correct sizing.
+ Release();
+ current_settings_.set_max_render_resolution(frame_resolution);
+ if (!Configure(current_settings_)) {
+ RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+ }
+ } else {
+ RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
+ }
+ }
+
+ // Always start with a complete key frame.
+ if (key_frame_required_) {
+ if (input_image._frameType != VideoFrameType::kVideoFrameKey)
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ key_frame_required_ = false;
+ }
+ vpx_codec_iter_t iter = nullptr;
+ vpx_image_t* img;
+ const uint8_t* buffer = input_image.data();
+ if (input_image.size() == 0) {
+ buffer = nullptr; // Triggers full frame concealment.
+ }
+ // During decode libvpx may get and release buffers from
+ // `libvpx_buffer_pool_`. In practice libvpx keeps a few (~3-4) buffers alive
+ // at a time.
+ if (vpx_codec_decode(decoder_, buffer,
+ static_cast<unsigned int>(input_image.size()), 0,
+ VPX_DL_REALTIME)) {
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ // `img->fb_priv` contains the image data, a reference counted Vp9FrameBuffer.
+ // It may be released by libvpx during future vpx_codec_decode or
+ // vpx_codec_destroy calls.
+ img = vpx_codec_get_frame(decoder_, &iter);
+ int qp;
+ vpx_codec_err_t vpx_ret =
+ vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp);
+ RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK);
+ int ret =
+ ReturnFrame(img, input_image.Timestamp(), qp, input_image.ColorSpace());
+ if (ret != 0) {
+ return ret;
+ }
+ return WEBRTC_VIDEO_CODEC_OK;
+}
+
+int LibvpxVp9Decoder::ReturnFrame(
+ const vpx_image_t* img,
+ uint32_t timestamp,
+ int qp,
+ const webrtc::ColorSpace* explicit_color_space) {
+ if (img == nullptr) {
+ // Decoder OK and nullptr image => No show frame.
+ return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
+ }
+
+ // This buffer contains all of `img`'s image data, a reference counted
+ // Vp9FrameBuffer. (libvpx is done with the buffers after a few
+ // vpx_codec_decode calls or vpx_codec_destroy).
+ rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> img_buffer(
+ static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv));
+
+ // The buffer can be used directly by the VideoFrame (without copy) by
+ // using a Wrapped*Buffer.
+ rtc::scoped_refptr<VideoFrameBuffer> img_wrapped_buffer;
+ switch (img->fmt) {
+ case VPX_IMG_FMT_I420:
+ img_wrapped_buffer = WrapI420Buffer(
+ img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
+ img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
+ img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
+ img->stride[VPX_PLANE_V],
+ // WrappedI420Buffer's mechanism for allowing the release of its
+ // frame buffer is through a callback function. This is where we
+ // should release `img_buffer`.
+ [img_buffer] {});
+ break;
+ case VPX_IMG_FMT_I422:
+ img_wrapped_buffer = WrapI422Buffer(
+ img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
+ img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
+ img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
+ img->stride[VPX_PLANE_V],
+ // WrappedI444Buffer's mechanism for allowing the release of its
+ // frame buffer is through a callback function. This is where we
+ // should release `img_buffer`.
+ [img_buffer] {});
+ break;
+ case VPX_IMG_FMT_I444:
+ img_wrapped_buffer = WrapI444Buffer(
+ img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
+ img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
+ img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
+ img->stride[VPX_PLANE_V],
+ // WrappedI444Buffer's mechanism for allowing the release of its
+ // frame buffer is through a callback function. This is where we
+ // should release `img_buffer`.
+ [img_buffer] {});
+ break;
+ case VPX_IMG_FMT_I42016:
+ img_wrapped_buffer = WrapI010Buffer(
+ img->d_w, img->d_h,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
+ img->stride[VPX_PLANE_Y] / 2,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
+ img->stride[VPX_PLANE_U] / 2,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
+ img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
+ break;
+ case VPX_IMG_FMT_I42216:
+ img_wrapped_buffer = WrapI210Buffer(
+ img->d_w, img->d_h,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
+ img->stride[VPX_PLANE_Y] / 2,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
+ img->stride[VPX_PLANE_U] / 2,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
+ img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
+ break;
+ case VPX_IMG_FMT_I44416:
+ img_wrapped_buffer = WrapI410Buffer(
+ img->d_w, img->d_h,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
+ img->stride[VPX_PLANE_Y] / 2,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
+ img->stride[VPX_PLANE_U] / 2,
+ reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
+ img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
+ break;
+ default:
+ RTC_LOG(LS_ERROR) << "Unsupported pixel format produced by the decoder: "
+ << static_cast<int>(img->fmt);
+ return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
+ }
+
+ auto builder = VideoFrame::Builder()
+ .set_video_frame_buffer(img_wrapped_buffer)
+ .set_timestamp_rtp(timestamp);
+ if (explicit_color_space) {
+ builder.set_color_space(*explicit_color_space);
+ } else {
+ builder.set_color_space(
+ ExtractVP9ColorSpace(img->cs, img->range, img->bit_depth));
+ }
+ VideoFrame decoded_image = builder.build();
+
+ decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp);
+ return WEBRTC_VIDEO_CODEC_OK;
+}
+
+int LibvpxVp9Decoder::RegisterDecodeCompleteCallback(
+ DecodedImageCallback* callback) {
+ decode_complete_callback_ = callback;
+ return WEBRTC_VIDEO_CODEC_OK;
+}
+
+int LibvpxVp9Decoder::Release() {
+ int ret_val = WEBRTC_VIDEO_CODEC_OK;
+
+ if (decoder_ != nullptr) {
+ if (inited_) {
+ // When a codec is destroyed libvpx will release any buffers of
+ // `libvpx_buffer_pool_` it is currently using.
+ if (vpx_codec_destroy(decoder_)) {
+ ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
+ }
+ }
+ delete decoder_;
+ decoder_ = nullptr;
+ }
+ // Releases buffers from the pool. Any buffers not in use are deleted. Buffers
+ // still referenced externally are deleted once fully released, not returning
+ // to the pool.
+ libvpx_buffer_pool_.ClearPool();
+ inited_ = false;
+ return ret_val;
+}
+
+VideoDecoder::DecoderInfo LibvpxVp9Decoder::GetDecoderInfo() const {
+ DecoderInfo info;
+ info.implementation_name = "libvpx";
+ info.is_hardware_accelerated = false;
+ return info;
+}
+
+const char* LibvpxVp9Decoder::ImplementationName() const {
+ return "libvpx";
+}
+
+} // namespace webrtc
+
+#endif // RTC_ENABLE_VP9
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h
new file mode 100644
index 0000000000..65fc553b82
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_
+#define MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_
+
+#ifdef RTC_ENABLE_VP9
+
+#include "api/video_codecs/video_decoder.h"
+#include "modules/video_coding/codecs/vp9/include/vp9.h"
+#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
+#include "vpx/vp8cx.h"
+
+namespace webrtc {
+
+class LibvpxVp9Decoder : public VP9Decoder {
+ public:
+ LibvpxVp9Decoder();
+ virtual ~LibvpxVp9Decoder();
+
+ bool Configure(const Settings& settings) override;
+
+ int Decode(const EncodedImage& input_image,
+ bool missing_frames,
+ int64_t /*render_time_ms*/) override;
+
+ int RegisterDecodeCompleteCallback(DecodedImageCallback* callback) override;
+
+ int Release() override;
+
+ DecoderInfo GetDecoderInfo() const override;
+ const char* ImplementationName() const override;
+
+ private:
+ int ReturnFrame(const vpx_image_t* img,
+ uint32_t timestamp,
+ int qp,
+ const webrtc::ColorSpace* explicit_color_space);
+
+ // Memory pool used to share buffers between libvpx and webrtc.
+ Vp9FrameBufferPool libvpx_buffer_pool_;
+ DecodedImageCallback* decode_complete_callback_;
+ bool inited_;
+ vpx_codec_ctx_t* decoder_;
+ bool key_frame_required_;
+ Settings current_settings_;
+};
+} // namespace webrtc
+
+#endif // RTC_ENABLE_VP9
+
+#endif // MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
new file mode 100644
index 0000000000..5877373b76
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -0,0 +1,2194 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include <memory>
+#ifdef RTC_ENABLE_VP9
+
+#include <algorithm>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/match.h"
+#include "absl/types/optional.h"
+#include "api/video/color_space.h"
+#include "api/video/i010_buffer.h"
+#include "api/video_codecs/scalability_mode.h"
+#include "common_video/include/video_frame_buffer.h"
+#include "common_video/libyuv/include/webrtc_libyuv.h"
+#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
+#include "modules/video_coding/svc/scalability_mode_util.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
+#include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
+#include "modules/video_coding/svc/svc_rate_allocator.h"
+#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/experiments/field_trial_list.h"
+#include "rtc_base/experiments/field_trial_parser.h"
+#include "rtc_base/experiments/rate_control_settings.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/strings/string_builder.h"
+#include "rtc_base/time_utils.h"
+#include "rtc_base/trace_event.h"
+#include "third_party/libyuv/include/libyuv/convert.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+
+namespace webrtc {
+
+namespace {
+// Maps from gof_idx to encoder internal reference frame buffer index. These
+// maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames.
+uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
+uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
+
+// Maximum allowed PID difference for differnet per-layer frame-rate case.
+const int kMaxAllowedPidDiff = 30;
+
+// TODO(ilink): Tune these thresholds further.
+// Selected using ConverenceMotion_1280_720_50.yuv clip.
+// No toggling observed on any link capacity from 100-2000kbps.
+// HD was reached consistently when link capacity was 1500kbps.
+// Set resolutions are a bit more conservative than svc_config.cc sets, e.g.
+// for 300kbps resolution converged to 270p instead of 360p.
+constexpr int kLowVp9QpThreshold = 149;
+constexpr int kHighVp9QpThreshold = 205;
+
+std::pair<size_t, size_t> GetActiveLayers(
+ const VideoBitrateAllocation& allocation) {
+ for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) {
+ if (allocation.GetSpatialLayerSum(sl_idx) > 0) {
+ size_t last_layer = sl_idx + 1;
+ while (last_layer < kMaxSpatialLayers &&
+ allocation.GetSpatialLayerSum(last_layer) > 0) {
+ ++last_layer;
+ }
+ return std::make_pair(sl_idx, last_layer);
+ }
+ }
+ return {0, 0};
+}
+
+using Vp9ScalabilityStructure =
+ std::tuple<std::unique_ptr<ScalableVideoController>, ScalabilityMode>;
+absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
+ const VideoCodec& codec) {
+ int num_spatial_layers = codec.VP9().numberOfSpatialLayers;
+ int num_temporal_layers =
+ std::max(1, int{codec.VP9().numberOfTemporalLayers});
+ if (num_spatial_layers == 1 && num_temporal_layers == 1) {
+ return absl::make_optional<Vp9ScalabilityStructure>(
+ std::make_unique<ScalableVideoControllerNoLayering>(),
+ ScalabilityMode::kL1T1);
+ }
+
+ char name[20];
+ rtc::SimpleStringBuilder ss(name);
+ if (codec.mode == VideoCodecMode::kScreensharing) {
+ // TODO(bugs.webrtc.org/11999): Compose names of the structures when they
+ // are implemented.
+ return absl::nullopt;
+ } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn ||
+ num_spatial_layers == 1) {
+ ss << "L" << num_spatial_layers << "T" << num_temporal_layers;
+ } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) {
+ ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY";
+ } else {
+ RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff);
+ ss << "S" << num_spatial_layers << "T" << num_temporal_layers;
+ }
+
+ // Check spatial ratio.
+ if (num_spatial_layers > 1 && codec.spatialLayers[0].targetBitrate > 0) {
+ if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width ||
+ codec.height != codec.spatialLayers[num_spatial_layers - 1].height) {
+ RTC_LOG(LS_WARNING)
+ << "Top layer resolution expected to match overall resolution";
+ return absl::nullopt;
+ }
+ // Check if the ratio is one of the supported.
+ int numerator;
+ int denominator;
+ if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) {
+ numerator = 1;
+ denominator = 2;
+ // no suffix for 1:2 ratio.
+ } else if (2 * codec.spatialLayers[1].width ==
+ 3 * codec.spatialLayers[0].width) {
+ numerator = 2;
+ denominator = 3;
+ ss << "h";
+ } else {
+ RTC_LOG(LS_WARNING) << "Unsupported scalability ratio "
+ << codec.spatialLayers[0].width << ":"
+ << codec.spatialLayers[1].width;
+ return absl::nullopt;
+ }
+ // Validate ratio is consistent for all spatial layer transitions.
+ for (int sid = 1; sid < num_spatial_layers; ++sid) {
+ if (codec.spatialLayers[sid].width * numerator !=
+ codec.spatialLayers[sid - 1].width * denominator ||
+ codec.spatialLayers[sid].height * numerator !=
+ codec.spatialLayers[sid - 1].height * denominator) {
+ RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator
+ << ":" << denominator;
+ return absl::nullopt;
+ }
+ }
+ }
+
+ absl::optional<ScalabilityMode> scalability_mode =
+ ScalabilityModeFromString(name);
+ if (!scalability_mode.has_value()) {
+ RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name;
+ return absl::nullopt;
+ }
+ auto scalability_structure_controller =
+ CreateScalabilityStructure(*scalability_mode);
+ if (scalability_structure_controller == nullptr) {
+ RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name;
+ } else {
+ RTC_LOG(LS_INFO) << "Created scalability structure " << name;
+ }
+ return absl::make_optional<Vp9ScalabilityStructure>(
+ std::move(scalability_structure_controller), *scalability_mode);
+}
+
+vpx_svc_ref_frame_config_t Vp9References(
+ rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers) {
+ vpx_svc_ref_frame_config_t ref_config = {};
+ for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) {
+ const auto& buffers = layer_frame.Buffers();
+ RTC_DCHECK_LE(buffers.size(), 3);
+ int sid = layer_frame.SpatialId();
+ if (!buffers.empty()) {
+ ref_config.lst_fb_idx[sid] = buffers[0].id;
+ ref_config.reference_last[sid] = buffers[0].referenced;
+ if (buffers[0].updated) {
+ ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id);
+ }
+ }
+ if (buffers.size() > 1) {
+ ref_config.gld_fb_idx[sid] = buffers[1].id;
+ ref_config.reference_golden[sid] = buffers[1].referenced;
+ if (buffers[1].updated) {
+ ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id);
+ }
+ }
+ if (buffers.size() > 2) {
+ ref_config.alt_fb_idx[sid] = buffers[2].id;
+ ref_config.reference_alt_ref[sid] = buffers[2].referenced;
+ if (buffers[2].updated) {
+ ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id);
+ }
+ }
+ }
+ // TODO(bugs.webrtc.org/11999): Fill ref_config.duration
+ return ref_config;
+}
+
+bool AllowDenoising() {
+ // Do not enable the denoiser on ARM since optimization is pending.
+ // Denoiser is on by default on other platforms.
+#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) && \
+ !defined(ANDROID)
+ return true;
+#else
+ return false;
+#endif
+}
+
+} // namespace
+
+void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
+ void* user_data) {
+ LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data);
+ enc->GetEncodedLayerFrame(pkt);
+}
+
+LibvpxVp9Encoder::LibvpxVp9Encoder(const cricket::VideoCodec& codec,
+ std::unique_ptr<LibvpxInterface> interface,
+ const FieldTrialsView& trials)
+ : libvpx_(std::move(interface)),
+ encoded_image_(),
+ encoded_complete_callback_(nullptr),
+ profile_(
+ ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)),
+ inited_(false),
+ timestamp_(0),
+ rc_max_intra_target_(0),
+ encoder_(nullptr),
+ config_(nullptr),
+ raw_(nullptr),
+ input_image_(nullptr),
+ force_key_frame_(true),
+ pics_since_key_(0),
+ num_temporal_layers_(0),
+ num_spatial_layers_(0),
+ num_active_spatial_layers_(0),
+ first_active_layer_(0),
+ layer_deactivation_requires_key_frame_(absl::StartsWith(
+ trials.Lookup("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation"),
+ "Enabled")),
+ is_svc_(false),
+ inter_layer_pred_(InterLayerPredMode::kOn),
+ external_ref_control_(false), // Set in InitEncode because of tests.
+ trusted_rate_controller_(
+ RateControlSettings::ParseFromKeyValueConfig(&trials)
+ .LibvpxVp9TrustedRateController()),
+ layer_buffering_(false),
+ full_superframe_drop_(true),
+ first_frame_in_picture_(true),
+ ss_info_needed_(false),
+ force_all_active_layers_(false),
+ num_cores_(0),
+ is_flexible_mode_(false),
+ variable_framerate_experiment_(ParseVariableFramerateConfig(trials)),
+ variable_framerate_controller_(
+ variable_framerate_experiment_.framerate_limit),
+ quality_scaler_experiment_(ParseQualityScalerConfig(trials)),
+ external_ref_ctrl_(
+ !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"),
+ "Disabled")),
+ performance_flags_(ParsePerformanceFlagsFromTrials(trials)),
+ num_steady_state_frames_(0),
+ config_changed_(true) {
+ codec_ = {};
+ memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
+}
+
+LibvpxVp9Encoder::~LibvpxVp9Encoder() {
+ Release();
+}
+
+void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) {
+ // Ignored.
+}
+
+int LibvpxVp9Encoder::Release() {
+ int ret_val = WEBRTC_VIDEO_CODEC_OK;
+
+ if (encoder_ != nullptr) {
+ if (inited_) {
+ if (libvpx_->codec_destroy(encoder_)) {
+ ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
+ }
+ }
+ delete encoder_;
+ encoder_ = nullptr;
+ }
+ if (config_ != nullptr) {
+ delete config_;
+ config_ = nullptr;
+ }
+ if (raw_ != nullptr) {
+ libvpx_->img_free(raw_);
+ raw_ = nullptr;
+ }
+ inited_ = false;
+ return ret_val;
+}
+
+bool LibvpxVp9Encoder::ExplicitlyConfiguredSpatialLayers() const {
+ // We check target_bitrate_bps of the 0th layer to see if the spatial layers
+ // (i.e. bitrates) were explicitly configured.
+ return codec_.spatialLayers[0].targetBitrate > 0;
+}
+
+bool LibvpxVp9Encoder::SetSvcRates(
+ const VideoBitrateAllocation& bitrate_allocation) {
+ std::pair<size_t, size_t> current_layers =
+ GetActiveLayers(current_bitrate_allocation_);
+ std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation);
+
+ const bool layer_activation_requires_key_frame =
+ inter_layer_pred_ == InterLayerPredMode::kOff ||
+ inter_layer_pred_ == InterLayerPredMode::kOnKeyPic;
+ const bool lower_layers_enabled = new_layers.first < current_layers.first;
+ const bool higher_layers_enabled = new_layers.second > current_layers.second;
+ const bool disabled_layers = new_layers.first > current_layers.first ||
+ new_layers.second < current_layers.second;
+
+ if (lower_layers_enabled ||
+ (higher_layers_enabled && layer_activation_requires_key_frame) ||
+ (disabled_layers && layer_deactivation_requires_key_frame_)) {
+ force_key_frame_ = true;
+ }
+
+ if (current_layers != new_layers) {
+ ss_info_needed_ = true;
+ }
+
+ config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps();
+
+ if (ExplicitlyConfiguredSpatialLayers()) {
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
+ const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0);
+ config_->ss_target_bitrate[sl_idx] =
+ bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000;
+
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) {
+ config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] =
+ bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000;
+ }
+
+ if (!was_layer_active) {
+ // Reset frame rate controller if layer is resumed after pause.
+ framerate_controller_[sl_idx].Reset();
+ }
+
+ framerate_controller_[sl_idx].SetTargetRate(
+ codec_.spatialLayers[sl_idx].maxFramerate);
+ }
+ } else {
+ float rate_ratio[VPX_MAX_LAYERS] = {0};
+ float total = 0;
+ for (int i = 0; i < num_spatial_layers_; ++i) {
+ if (svc_params_.scaling_factor_num[i] <= 0 ||
+ svc_params_.scaling_factor_den[i] <= 0) {
+ RTC_LOG(LS_ERROR) << "Scaling factors not specified!";
+ return false;
+ }
+ rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) /
+ svc_params_.scaling_factor_den[i];
+ total += rate_ratio[i];
+ }
+
+ for (int i = 0; i < num_spatial_layers_; ++i) {
+ RTC_CHECK_GT(total, 0);
+ config_->ss_target_bitrate[i] = static_cast<unsigned int>(
+ config_->rc_target_bitrate * rate_ratio[i] / total);
+ if (num_temporal_layers_ == 1) {
+ config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
+ } else if (num_temporal_layers_ == 2) {
+ config_->layer_target_bitrate[i * num_temporal_layers_] =
+ config_->ss_target_bitrate[i] * 2 / 3;
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
+ config_->ss_target_bitrate[i];
+ } else if (num_temporal_layers_ == 3) {
+ config_->layer_target_bitrate[i * num_temporal_layers_] =
+ config_->ss_target_bitrate[i] / 2;
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
+ config_->layer_target_bitrate[i * num_temporal_layers_] +
+ (config_->ss_target_bitrate[i] / 4);
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
+ config_->ss_target_bitrate[i];
+ } else {
+ RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: "
+ << num_temporal_layers_;
+ return false;
+ }
+
+ framerate_controller_[i].SetTargetRate(codec_.maxFramerate);
+ }
+ }
+
+ num_active_spatial_layers_ = 0;
+ first_active_layer_ = 0;
+ bool seen_active_layer = false;
+ bool expect_no_more_active_layers = false;
+ for (int i = 0; i < num_spatial_layers_; ++i) {
+ if (config_->ss_target_bitrate[i] > 0) {
+ RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is "
+ "deactivated.";
+ if (!seen_active_layer) {
+ first_active_layer_ = i;
+ }
+ num_active_spatial_layers_ = i + 1;
+ seen_active_layer = true;
+ } else {
+ expect_no_more_active_layers = seen_active_layer;
+ }
+ }
+
+ if (seen_active_layer && performance_flags_.use_per_layer_speed) {
+ bool denoiser_on =
+ AllowDenoising() && codec_.VP9()->denoisingOn &&
+ performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1]
+ .allow_denoising;
+ libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
+ denoiser_on ? 1 : 0);
+ }
+
+ if (higher_layers_enabled && !force_key_frame_) {
+ // Prohibit drop of all layers for the next frame, so newly enabled
+ // layer would have a valid spatial reference.
+ for (size_t i = 0; i < num_spatial_layers_; ++i) {
+ svc_drop_frame_.framedrop_thresh[i] = 0;
+ }
+ force_all_active_layers_ = true;
+ }
+
+ if (svc_controller_) {
+ for (int sid = 0; sid < num_spatial_layers_; ++sid) {
+ // Bitrates in `layer_target_bitrate` are accumulated for each temporal
+ // layer but in `VideoBitrateAllocation` they should be separated.
+ int previous_bitrate_kbps = 0;
+ for (int tid = 0; tid < num_temporal_layers_; ++tid) {
+ int accumulated_bitrate_kbps =
+ config_->layer_target_bitrate[sid * num_temporal_layers_ + tid];
+ int single_layer_bitrate_kbps =
+ accumulated_bitrate_kbps - previous_bitrate_kbps;
+ RTC_DCHECK_GE(single_layer_bitrate_kbps, 0);
+ current_bitrate_allocation_.SetBitrate(
+ sid, tid, single_layer_bitrate_kbps * 1'000);
+ previous_bitrate_kbps = accumulated_bitrate_kbps;
+ }
+ }
+ svc_controller_->OnRatesUpdated(current_bitrate_allocation_);
+ } else {
+ current_bitrate_allocation_ = bitrate_allocation;
+ }
+ config_changed_ = true;
+ return true;
+}
+
+void LibvpxVp9Encoder::DisableSpatialLayer(int sid) {
+ RTC_DCHECK_LT(sid, num_spatial_layers_);
+ if (config_->ss_target_bitrate[sid] == 0) {
+ return;
+ }
+ config_->ss_target_bitrate[sid] = 0;
+ for (int tid = 0; tid < num_temporal_layers_; ++tid) {
+ config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0;
+ }
+ config_changed_ = true;
+}
+
+void LibvpxVp9Encoder::EnableSpatialLayer(int sid) {
+ RTC_DCHECK_LT(sid, num_spatial_layers_);
+ if (config_->ss_target_bitrate[sid] > 0) {
+ return;
+ }
+ for (int tid = 0; tid < num_temporal_layers_; ++tid) {
+ config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] =
+ current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000;
+ }
+ config_->ss_target_bitrate[sid] =
+ current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000;
+ RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0);
+ config_changed_ = true;
+}
+
+void LibvpxVp9Encoder::SetActiveSpatialLayers() {
+ // Svc controller may decide to skip a frame at certain spatial layer even
+ // when bitrate for it is non-zero, however libvpx uses configured bitrate as
+ // a signal which layers should be produced.
+ RTC_DCHECK(svc_controller_);
+ RTC_DCHECK(!layer_frames_.empty());
+ RTC_DCHECK(absl::c_is_sorted(
+ layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs,
+ const ScalableVideoController::LayerFrameConfig& rhs) {
+ return lhs.SpatialId() < rhs.SpatialId();
+ }));
+
+ auto frame_it = layer_frames_.begin();
+ for (int sid = 0; sid < num_spatial_layers_; ++sid) {
+ if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) {
+ EnableSpatialLayer(sid);
+ ++frame_it;
+ } else {
+ DisableSpatialLayer(sid);
+ }
+ }
+}
+
+void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) {
+ if (!inited_) {
+ RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized.";
+ return;
+ }
+ if (encoder_->err) {
+ RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err;
+ return;
+ }
+ if (parameters.framerate_fps < 1.0) {
+ RTC_LOG(LS_WARNING) << "Unsupported framerate: "
+ << parameters.framerate_fps;
+ return;
+ }
+
+ codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5);
+
+ bool res = SetSvcRates(parameters.bitrate);
+ RTC_DCHECK(res) << "Failed to set new bitrate allocation";
+ config_changed_ = true;
+}
+
+// TODO(eladalon): s/inst/codec_settings/g.
+int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
+ const Settings& settings) {
+ if (inst == nullptr) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ if (inst->maxFramerate < 1) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ // Allow zero to represent an unspecified maxBitRate
+ if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ if (inst->width < 1 || inst->height < 1) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ if (settings.number_of_cores < 1) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ if (inst->VP9().numberOfTemporalLayers > 3) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ // libvpx probably does not support more than 3 spatial layers.
+ if (inst->VP9().numberOfSpatialLayers > 3) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ absl::optional<vpx_img_fmt_t> previous_img_fmt =
+ raw_ ? absl::make_optional<vpx_img_fmt_t>(raw_->fmt) : absl::nullopt;
+
+ int ret_val = Release();
+ if (ret_val < 0) {
+ return ret_val;
+ }
+ if (encoder_ == nullptr) {
+ encoder_ = new vpx_codec_ctx_t;
+ memset(encoder_, 0, sizeof(*encoder_));
+ }
+ if (config_ == nullptr) {
+ config_ = new vpx_codec_enc_cfg_t;
+ memset(config_, 0, sizeof(*config_));
+ }
+ timestamp_ = 0;
+ if (&codec_ != inst) {
+ codec_ = *inst;
+ }
+ memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
+
+ force_key_frame_ = true;
+ pics_since_key_ = 0;
+ num_cores_ = settings.number_of_cores;
+
+ scalability_mode_ = inst->GetScalabilityMode();
+ if (scalability_mode_.has_value()) {
+ // Use settings from `ScalabilityMode` identifier.
+ RTC_LOG(LS_INFO) << "Create scalability structure "
+ << ScalabilityModeToString(*scalability_mode_);
+ svc_controller_ = CreateScalabilityStructure(*scalability_mode_);
+ if (!svc_controller_) {
+ RTC_LOG(LS_WARNING) << "Failed to create scalability structure.";
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ ScalableVideoController::StreamLayersConfig info =
+ svc_controller_->StreamConfig();
+ num_spatial_layers_ = info.num_spatial_layers;
+ num_temporal_layers_ = info.num_temporal_layers;
+ inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_);
+ } else {
+ num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
+ RTC_DCHECK_GT(num_spatial_layers_, 0);
+ num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
+ if (num_temporal_layers_ == 0) {
+ num_temporal_layers_ = 1;
+ }
+ inter_layer_pred_ = inst->VP9().interLayerPred;
+ auto vp9_scalability = CreateVp9ScalabilityStructure(*inst);
+ if (vp9_scalability.has_value()) {
+ std::tie(svc_controller_, scalability_mode_) =
+ std::move(vp9_scalability.value());
+ } else {
+ svc_controller_ = nullptr;
+ scalability_mode_ = absl::nullopt;
+ }
+ }
+
+ framerate_controller_ = std::vector<FramerateControllerDeprecated>(
+ num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate));
+
+ is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1);
+
+ // Populate encoder configuration with default values.
+ if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) {
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+
+ vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE;
+ unsigned int bits_for_storage = 8;
+ switch (profile_) {
+ case VP9Profile::kProfile0:
+ img_fmt = previous_img_fmt.value_or(VPX_IMG_FMT_I420);
+ bits_for_storage = 8;
+ config_->g_bit_depth = VPX_BITS_8;
+ config_->g_profile = 0;
+ config_->g_input_bit_depth = 8;
+ break;
+ case VP9Profile::kProfile1:
+ // Encoding of profile 1 is not implemented. It would require extended
+ // support for I444, I422, and I440 buffers.
+ RTC_DCHECK_NOTREACHED();
+ break;
+ case VP9Profile::kProfile2:
+ img_fmt = VPX_IMG_FMT_I42016;
+ bits_for_storage = 16;
+ config_->g_bit_depth = VPX_BITS_10;
+ config_->g_profile = 2;
+ config_->g_input_bit_depth = 10;
+ break;
+ case VP9Profile::kProfile3:
+ // Encoding of profile 3 is not implemented.
+ RTC_DCHECK_NOTREACHED();
+ break;
+ }
+
+ // Creating a wrapper to the image - setting image data to nullptr. Actual
+ // pointer will be set in encode. Setting align to 1, as it is meaningless
+ // (actual memory is not allocated).
+ raw_ = libvpx_->img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1,
+ nullptr);
+ raw_->bit_depth = bits_for_storage;
+
+ config_->g_w = codec_.width;
+ config_->g_h = codec_.height;
+ config_->rc_target_bitrate = inst->startBitrate; // in kbit/s
+ config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
+ // Setting the time base of the codec.
+ config_->g_timebase.num = 1;
+ config_->g_timebase.den = 90000;
+ config_->g_lag_in_frames = 0; // 0- no frame lagging
+ config_->g_threads = 1;
+ // Rate control settings.
+ config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0;
+ config_->rc_end_usage = VPX_CBR;
+ config_->g_pass = VPX_RC_ONE_PASS;
+ config_->rc_min_quantizer =
+ codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2;
+ config_->rc_max_quantizer = 52;
+ config_->rc_undershoot_pct = 50;
+ config_->rc_overshoot_pct = 50;
+ config_->rc_buf_initial_sz = 500;
+ config_->rc_buf_optimal_sz = 600;
+ config_->rc_buf_sz = 1000;
+ // Set the maximum target size of any key-frame.
+ rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz);
+ // Key-frame interval is enforced manually by this wrapper.
+ config_->kf_mode = VPX_KF_DISABLED;
+ // TODO(webm:1592): work-around for libvpx issue, as it can still
+ // put some key-frames at will even in VPX_KF_DISABLED kf_mode.
+ config_->kf_max_dist = inst->VP9().keyFrameInterval;
+ config_->kf_min_dist = config_->kf_max_dist;
+ if (quality_scaler_experiment_.enabled) {
+ // In that experiment webrtc wide quality scaler is used instead of libvpx
+ // internal scaler.
+ config_->rc_resize_allowed = 0;
+ } else {
+ config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
+ }
+ // Determine number of threads based on the image size and #cores.
+ config_->g_threads =
+ NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
+
+ is_flexible_mode_ = inst->VP9().flexibleMode;
+
+ if (num_spatial_layers_ > 1 &&
+ codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
+ RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
+ "several spatial layers";
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ // External reference control is required for different frame rate on spatial
+ // layers because libvpx generates rtp incompatible references in this case.
+ external_ref_control_ = external_ref_ctrl_ ||
+ (num_spatial_layers_ > 1 &&
+ codec_.mode == VideoCodecMode::kScreensharing) ||
+ inter_layer_pred_ == InterLayerPredMode::kOn;
+
+ if (num_temporal_layers_ == 1) {
+ gof_.SetGofInfoVP9(kTemporalStructureMode1);
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
+ config_->ts_number_layers = 1;
+ config_->ts_rate_decimator[0] = 1;
+ config_->ts_periodicity = 1;
+ config_->ts_layer_id[0] = 0;
+ } else if (num_temporal_layers_ == 2) {
+ gof_.SetGofInfoVP9(kTemporalStructureMode2);
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
+ config_->ts_number_layers = 2;
+ config_->ts_rate_decimator[0] = 2;
+ config_->ts_rate_decimator[1] = 1;
+ config_->ts_periodicity = 2;
+ config_->ts_layer_id[0] = 0;
+ config_->ts_layer_id[1] = 1;
+ } else if (num_temporal_layers_ == 3) {
+ gof_.SetGofInfoVP9(kTemporalStructureMode3);
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
+ config_->ts_number_layers = 3;
+ config_->ts_rate_decimator[0] = 4;
+ config_->ts_rate_decimator[1] = 2;
+ config_->ts_rate_decimator[2] = 1;
+ config_->ts_periodicity = 4;
+ config_->ts_layer_id[0] = 0;
+ config_->ts_layer_id[1] = 2;
+ config_->ts_layer_id[2] = 1;
+ config_->ts_layer_id[3] = 2;
+ } else {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ if (external_ref_control_) {
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+ if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 &&
+ codec_.mode == VideoCodecMode::kScreensharing) {
+ // External reference control for several temporal layers with different
+ // frame rates on spatial layers is not implemented yet.
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ }
+ ref_buf_ = {};
+
+ return InitAndSetControlSettings(inst);
+}
+
+int LibvpxVp9Encoder::NumberOfThreads(int width,
+ int height,
+ int number_of_cores) {
+ // Keep the number of encoder threads equal to the possible number of column
+ // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
+ if (width * height >= 1280 * 720 && number_of_cores > 4) {
+ return 4;
+ } else if (width * height >= 640 * 360 && number_of_cores > 2) {
+ return 2;
+ } else {
+// Use 2 threads for low res on ARM.
+#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
+ defined(WEBRTC_ANDROID)
+ if (width * height >= 320 * 180 && number_of_cores > 2) {
+ return 2;
+ }
+#endif
+ // 1 thread less than VGA.
+ return 1;
+ }
+}
+
+int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
+ // Set QP-min/max per spatial and temporal layer.
+ int tot_num_layers = num_spatial_layers_ * num_temporal_layers_;
+ for (int i = 0; i < tot_num_layers; ++i) {
+ svc_params_.max_quantizers[i] = config_->rc_max_quantizer;
+ svc_params_.min_quantizers[i] = config_->rc_min_quantizer;
+ }
+ config_->ss_number_layers = num_spatial_layers_;
+ if (svc_controller_) {
+ auto stream_config = svc_controller_->StreamConfig();
+ for (int i = 0; i < stream_config.num_spatial_layers; ++i) {
+ svc_params_.scaling_factor_num[i] = stream_config.scaling_factor_num[i];
+ svc_params_.scaling_factor_den[i] = stream_config.scaling_factor_den[i];
+ }
+ } else if (ExplicitlyConfiguredSpatialLayers()) {
+ for (int i = 0; i < num_spatial_layers_; ++i) {
+ const auto& layer = codec_.spatialLayers[i];
+ RTC_CHECK_GT(layer.width, 0);
+ const int scale_factor = codec_.width / layer.width;
+ RTC_DCHECK_GT(scale_factor, 0);
+
+ // Ensure scaler factor is integer.
+ if (scale_factor * layer.width != codec_.width) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ // Ensure scale factor is the same in both dimensions.
+ if (scale_factor * layer.height != codec_.height) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ // Ensure scale factor is power of two.
+ const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0;
+ if (!is_pow_of_two) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ svc_params_.scaling_factor_num[i] = 1;
+ svc_params_.scaling_factor_den[i] = scale_factor;
+
+ RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0);
+ RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate);
+ if (i > 0) {
+ // Frame rate of high spatial layer is supposed to be equal or higher
+ // than frame rate of low spatial layer.
+ RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate,
+ codec_.spatialLayers[i - 1].maxFramerate);
+ }
+ }
+ } else {
+ int scaling_factor_num = 256;
+ for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
+ // 1:2 scaling in each dimension.
+ svc_params_.scaling_factor_num[i] = scaling_factor_num;
+ svc_params_.scaling_factor_den[i] = 256;
+ }
+ }
+
+ UpdatePerformanceFlags();
+ RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(),
+ static_cast<size_t>(num_spatial_layers_));
+
+ SvcRateAllocator init_allocator(codec_);
+ current_bitrate_allocation_ =
+ init_allocator.Allocate(VideoBitrateAllocationParameters(
+ inst->startBitrate * 1000, inst->maxFramerate));
+ if (!SetSvcRates(current_bitrate_allocation_)) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ const vpx_codec_err_t rv = libvpx_->codec_enc_init(
+ encoder_, vpx_codec_vp9_cx(), config_,
+ config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH);
+ if (rv != VPX_CODEC_OK) {
+ RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv);
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+
+ if (performance_flags_.use_per_layer_speed) {
+ for (int si = 0; si < num_spatial_layers_; ++si) {
+ svc_params_.speed_per_layer[si] =
+ performance_flags_by_spatial_index_[si].base_layer_speed;
+ svc_params_.loopfilter_ctrl[si] =
+ performance_flags_by_spatial_index_[si].deblock_mode;
+ }
+ bool denoiser_on =
+ AllowDenoising() && inst->VP9().denoisingOn &&
+ performance_flags_by_spatial_index_[num_spatial_layers_ - 1]
+ .allow_denoising;
+ libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
+ denoiser_on ? 1 : 0);
+ }
+
+ libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+ rc_max_intra_target_);
+ libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE,
+ inst->VP9().adaptiveQpMode ? 3 : 0);
+
+ libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
+
+ if (is_svc_) {
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1);
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
+ }
+ if (!is_svc_ || !performance_flags_.use_per_layer_speed) {
+ libvpx_->codec_control(
+ encoder_, VP8E_SET_CPUUSED,
+ performance_flags_by_spatial_index_.rbegin()->base_layer_speed);
+ }
+
+ if (num_spatial_layers_ > 1) {
+ switch (inter_layer_pred_) {
+ case InterLayerPredMode::kOn:
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0);
+ break;
+ case InterLayerPredMode::kOff:
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1);
+ break;
+ case InterLayerPredMode::kOnKeyPic:
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2);
+ break;
+ default:
+ RTC_DCHECK_NOTREACHED();
+ }
+
+ memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_));
+ const bool reverse_constrained_drop_mode =
+ inter_layer_pred_ == InterLayerPredMode::kOn &&
+ codec_.mode == VideoCodecMode::kScreensharing &&
+ num_spatial_layers_ > 1;
+ if (reverse_constrained_drop_mode) {
+ // Screenshare dropping mode: drop a layer only together with all lower
+ // layers. This ensures that drops on lower layers won't reduce frame-rate
+ // for higher layers and reference structure is RTP-compatible.
+ svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP;
+ svc_drop_frame_.max_consec_drop = 5;
+ for (size_t i = 0; i < num_spatial_layers_; ++i) {
+ svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
+ }
+ // No buffering is needed because the highest layer is always present in
+ // all frames in CONSTRAINED_FROM_ABOVE drop mode.
+ layer_buffering_ = false;
+ } else {
+ // Configure encoder to drop entire superframe whenever it needs to drop
+ // a layer. This mode is preferred over per-layer dropping which causes
+ // quality flickering and is not compatible with RTP non-flexible mode.
+ svc_drop_frame_.framedrop_mode =
+ full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
+ // Buffering is needed only for constrained layer drop, as it's not clear
+ // which frame is the last.
+ layer_buffering_ = !full_superframe_drop_;
+ svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max();
+ for (size_t i = 0; i < num_spatial_layers_; ++i) {
+ svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
+ }
+ }
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
+ &svc_drop_frame_);
+ }
+
+ // Register callback for getting each spatial layer.
+ vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
+ LibvpxVp9Encoder::EncoderOutputCodedPacketCallback,
+ reinterpret_cast<void*>(this)};
+ libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK,
+ reinterpret_cast<void*>(&cbp));
+
+ // Control function to set the number of column tiles in encoding a frame, in
+ // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
+ // The number tile columns will be capped by the encoder based on image size
+ // (minimum width of tile column is 256 pixels, maximum is 4096).
+ libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS,
+ static_cast<int>((config_->g_threads >> 1)));
+
+ // Turn on row-based multithreading.
+ libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1);
+
+ if (AllowDenoising() && !performance_flags_.use_per_layer_speed) {
+ libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
+ inst->VP9().denoisingOn ? 1 : 0);
+ }
+
+ if (codec_.mode == VideoCodecMode::kScreensharing) {
+ // Adjust internal parameters to screen content.
+ libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1);
+ }
+ // Enable encoder skip of static/low content blocks.
+ libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
+ inited_ = true;
+ config_changed_ = true;
+ return WEBRTC_VIDEO_CODEC_OK;
+}
+
+uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) {
+ // Set max to the optimal buffer level (normalized by target BR),
+ // and scaled by a scale_par.
+ // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps].
+ // This value is presented in percentage of perFrameBw:
+ // perFrameBw = targetBR[Kbps] * 1000 / framerate.
+ // The target in % is as follows:
+ float scale_par = 0.5;
+ uint32_t target_pct =
+ optimal_buffer_size * scale_par * codec_.maxFramerate / 10;
+ // Don't go below 3 times the per frame bandwidth.
+ const uint32_t min_intra_size = 300;
+ return (target_pct < min_intra_size) ? min_intra_size : target_pct;
+}
+
+int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
+ const std::vector<VideoFrameType>* frame_types) {
+ if (!inited_) {
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+ if (encoded_complete_callback_ == nullptr) {
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+ if (num_active_spatial_layers_ == 0) {
+ // All spatial layers are disabled, return without encoding anything.
+ return WEBRTC_VIDEO_CODEC_OK;
+ }
+
+ // We only support one stream at the moment.
+ if (frame_types && !frame_types->empty()) {
+ if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) {
+ force_key_frame_ = true;
+ }
+ }
+
+ if (pics_since_key_ + 1 ==
+ static_cast<size_t>(codec_.VP9()->keyFrameInterval)) {
+ force_key_frame_ = true;
+ }
+
+ if (svc_controller_) {
+ layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_);
+ if (layer_frames_.empty()) {
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ if (layer_frames_.front().IsKeyframe()) {
+ force_key_frame_ = true;
+ }
+ }
+
+ vpx_svc_layer_id_t layer_id = {0};
+ if (!force_key_frame_) {
+ const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
+ layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
+
+ if (codec_.mode == VideoCodecMode::kScreensharing) {
+ const uint32_t frame_timestamp_ms =
+ 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
+
+ // To ensure that several rate-limiters with different limits don't
+ // interfere, they must be queried in order of increasing limit.
+
+ bool use_steady_state_limiter =
+ variable_framerate_experiment_.enabled &&
+ input_image.update_rect().IsEmpty() &&
+ num_steady_state_frames_ >=
+ variable_framerate_experiment_.frames_before_steady_state;
+
+ // Need to check all frame limiters, even if lower layers are disabled,
+ // because variable frame-rate limiter should be checked after the first
+ // layer. It's easier to overwrite active layers after, then check all
+ // cases.
+ for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
+ const float layer_fps =
+ framerate_controller_[layer_id.spatial_layer_id].GetTargetRate();
+ // Use steady state rate-limiter at the correct place.
+ if (use_steady_state_limiter &&
+ layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) {
+ if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) {
+ layer_id.spatial_layer_id = num_active_spatial_layers_;
+ }
+ // Break always: if rate limiter triggered frame drop, no need to
+ // continue; otherwise, the rate is less than the next limiters.
+ break;
+ }
+ if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
+ ++layer_id.spatial_layer_id;
+ } else {
+ break;
+ }
+ }
+
+ if (use_steady_state_limiter &&
+ layer_id.spatial_layer_id < num_active_spatial_layers_) {
+ variable_framerate_controller_.AddFrame(frame_timestamp_ms);
+ }
+ }
+
+ if (force_all_active_layers_) {
+ layer_id.spatial_layer_id = first_active_layer_;
+ force_all_active_layers_ = false;
+ }
+
+ RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_);
+ if (layer_id.spatial_layer_id >= num_active_spatial_layers_) {
+ // Drop entire picture.
+ return WEBRTC_VIDEO_CODEC_OK;
+ }
+ }
+
+ // Need to set temporal layer id on ALL layers, even disabled ones.
+ // Otherwise libvpx might produce frames on a disabled layer:
+ // http://crbug.com/1051476
+ for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
+ layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id;
+ }
+
+ if (layer_id.spatial_layer_id < first_active_layer_) {
+ layer_id.spatial_layer_id = first_active_layer_;
+ }
+
+ if (svc_controller_) {
+ layer_id.spatial_layer_id = layer_frames_.front().SpatialId();
+ layer_id.temporal_layer_id = layer_frames_.front().TemporalId();
+ for (const auto& layer : layer_frames_) {
+ layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] =
+ layer.TemporalId();
+ }
+ SetActiveSpatialLayers();
+ }
+
+ if (is_svc_ && performance_flags_.use_per_layer_speed) {
+ // Update speed settings that might depend on temporal index.
+ bool speed_updated = false;
+ for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
+ const int target_speed =
+ layer_id.temporal_layer_id_per_spatial[sl_idx] == 0
+ ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed
+ : performance_flags_by_spatial_index_[sl_idx].high_layer_speed;
+ if (svc_params_.speed_per_layer[sl_idx] != target_speed) {
+ svc_params_.speed_per_layer[sl_idx] = target_speed;
+ speed_updated = true;
+ }
+ }
+ if (speed_updated) {
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
+ }
+ }
+
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
+
+ if (num_spatial_layers_ > 1) {
+ // Update frame dropping settings as they may change on per-frame basis.
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
+ &svc_drop_frame_);
+ }
+
+ if (config_changed_) {
+ if (libvpx_->codec_enc_config_set(encoder_, config_)) {
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+
+ if (!performance_flags_.use_per_layer_speed) {
+ // Not setting individual speeds per layer, find the highest active
+ // resolution instead and base the speed on that.
+ for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
+ if (config_->ss_target_bitrate[i] > 0) {
+ int width = (svc_params_.scaling_factor_num[i] * config_->g_w) /
+ svc_params_.scaling_factor_den[i];
+ int height = (svc_params_.scaling_factor_num[i] * config_->g_h) /
+ svc_params_.scaling_factor_den[i];
+ int speed =
+ std::prev(performance_flags_.settings_by_resolution.lower_bound(
+ width * height))
+ ->second.base_layer_speed;
+ libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed);
+ break;
+ }
+ }
+ }
+ config_changed_ = false;
+ }
+
+ if (input_image.width() != codec_.width ||
+ input_image.height() != codec_.height) {
+ int ret = UpdateCodecFrameSize(input_image);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
+ RTC_DCHECK_EQ(input_image.height(), raw_->d_h);
+
+ // Set input image for use in the callback.
+ // This was necessary since you need some information from input_image.
+ // You can save only the necessary information (such as timestamp) instead of
+ // doing this.
+ input_image_ = &input_image;
+
+ // In case we need to map the buffer, `mapped_buffer` is used to keep it alive
+ // through reference counting until after encoding has finished.
+ rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer;
+ const I010BufferInterface* i010_buffer;
+ rtc::scoped_refptr<const I010BufferInterface> i010_copy;
+ switch (profile_) {
+ case VP9Profile::kProfile0: {
+ mapped_buffer =
+ PrepareBufferForProfile0(input_image.video_frame_buffer());
+ if (!mapped_buffer) {
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ break;
+ }
+ case VP9Profile::kProfile1: {
+ RTC_DCHECK_NOTREACHED();
+ break;
+ }
+ case VP9Profile::kProfile2: {
+ // We can inject kI010 frames directly for encode. All other formats
+ // should be converted to it.
+ switch (input_image.video_frame_buffer()->type()) {
+ case VideoFrameBuffer::Type::kI010: {
+ i010_buffer = input_image.video_frame_buffer()->GetI010();
+ break;
+ }
+ default: {
+ auto i420_buffer = input_image.video_frame_buffer()->ToI420();
+ if (!i420_buffer) {
+ RTC_LOG(LS_ERROR) << "Failed to convert "
+ << VideoFrameBufferTypeToString(
+ input_image.video_frame_buffer()->type())
+ << " image to I420. Can't encode frame.";
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ i010_copy = I010Buffer::Copy(*i420_buffer);
+ i010_buffer = i010_copy.get();
+ }
+ }
+ raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(
+ reinterpret_cast<const uint8_t*>(i010_buffer->DataY()));
+ raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(
+ reinterpret_cast<const uint8_t*>(i010_buffer->DataU()));
+ raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(
+ reinterpret_cast<const uint8_t*>(i010_buffer->DataV()));
+ raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2;
+ raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2;
+ raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2;
+ break;
+ }
+ case VP9Profile::kProfile3: {
+ RTC_DCHECK_NOTREACHED();
+ break;
+ }
+ }
+
+ vpx_enc_frame_flags_t flags = 0;
+ if (force_key_frame_) {
+ flags = VPX_EFLAG_FORCE_KF;
+ }
+
+ if (svc_controller_) {
+ vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_);
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG,
+ &ref_config);
+ } else if (external_ref_control_) {
+ vpx_svc_ref_frame_config_t ref_config =
+ SetReferences(force_key_frame_, layer_id.spatial_layer_id);
+
+ if (VideoCodecMode::kScreensharing == codec_.mode) {
+ for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
+ ref_config.duration[sl_idx] = static_cast<int64_t>(
+ 90000 / (std::min(static_cast<float>(codec_.maxFramerate),
+ framerate_controller_[sl_idx].GetTargetRate())));
+ }
+ }
+
+ libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG,
+ &ref_config);
+ }
+
+ first_frame_in_picture_ = true;
+
+ // TODO(ssilkin): Frame duration should be specified per spatial layer
+ // since their frame rate can be different. For now calculate frame duration
+ // based on target frame rate of the highest spatial layer, which frame rate
+ // is supposed to be equal or higher than frame rate of low spatial layers.
+ // Also, timestamp should represent actual time passed since previous frame
+ // (not 'expected' time). Then rate controller can drain buffer more
+ // accurately.
+ RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_);
+ float target_framerate_fps =
+ (codec_.mode == VideoCodecMode::kScreensharing)
+ ? std::min(static_cast<float>(codec_.maxFramerate),
+ framerate_controller_[num_active_spatial_layers_ - 1]
+ .GetTargetRate())
+ : codec_.maxFramerate;
+ uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps);
+ const vpx_codec_err_t rv = libvpx_->codec_encode(
+ encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME);
+ if (rv != VPX_CODEC_OK) {
+ RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv)
+ << "\n"
+ "Details: "
+ << libvpx_->codec_error(encoder_) << "\n"
+ << libvpx_->codec_error_detail(encoder_);
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ timestamp_ += duration;
+
+ if (layer_buffering_) {
+ const bool end_of_picture = true;
+ DeliverBufferedFrame(end_of_picture);
+ }
+
+ return WEBRTC_VIDEO_CODEC_OK;
+}
+
+int LibvpxVp9Encoder::UpdateCodecFrameSize(
+ const VideoFrame& input_image) {
+ RTC_LOG(LS_INFO) << "Reconfiging VP from " <<
+ codec_.width << "x" << codec_.height << " to " <<
+ input_image.width() << "x" << input_image.height();
+ // Preserve latest bitrate/framerate setting
+ // TODO: Mozilla - see below, we need to save more state here.
+ //uint32_t old_bitrate_kbit = config_->rc_target_bitrate;
+ //uint32_t old_framerate = codec_.maxFramerate;
+
+ codec_.width = input_image.width();
+ codec_.height = input_image.height();
+
+ vpx_img_free(raw_);
+ raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height,
+ 1, NULL);
+ // Update encoder context for new frame size.
+ config_->g_w = codec_.width;
+ config_->g_h = codec_.height;
+
+ // Determine number of threads based on the image size and #cores.
+ config_->g_threads = NumberOfThreads(codec_.width, codec_.height,
+ num_cores_);
+
+ // NOTE: We would like to do this the same way vp8 does it
+ // (with vpx_codec_enc_config_set()), but that causes asserts
+ // in AQ 3 (cyclic); and in AQ 0 it works, but on a resize to smaller
+ // than 1/2 x 1/2 original it asserts in convolve(). Given these
+ // bugs in trying to do it the "right" way, we basically re-do
+ // the initialization.
+ vpx_codec_destroy(encoder_); // clean up old state
+ int result = InitAndSetControlSettings(&codec_);
+ if (result == WEBRTC_VIDEO_CODEC_OK) {
+ // TODO: Mozilla rates have become much more complicated, we need to store
+ // more state or find another way of doing this.
+ //return SetRates(old_bitrate_kbit, old_framerate);
+ RTC_CHECK(false);
+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+ }
+ return result;
+}
+
+bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+ absl::optional<int>* spatial_idx,
+ absl::optional<int>* temporal_idx,
+ const vpx_codec_cx_pkt& pkt) {
+ RTC_CHECK(codec_specific != nullptr);
+ codec_specific->codecType = kVideoCodecVP9;
+ CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
+
+ vp9_info->first_frame_in_picture = first_frame_in_picture_;
+ vp9_info->flexible_mode = is_flexible_mode_;
+
+ if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
+ pics_since_key_ = 0;
+ } else if (first_frame_in_picture_) {
+ ++pics_since_key_;
+ }
+
+ vpx_svc_layer_id_t layer_id = {0};
+ libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+
+ // Can't have keyframe with non-zero temporal layer.
+ RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0);
+
+ RTC_CHECK_GT(num_temporal_layers_, 0);
+ RTC_CHECK_GT(num_active_spatial_layers_, 0);
+ if (num_temporal_layers_ == 1) {
+ RTC_CHECK_EQ(layer_id.temporal_layer_id, 0);
+ vp9_info->temporal_idx = kNoTemporalIdx;
+ *temporal_idx = absl::nullopt;
+ } else {
+ vp9_info->temporal_idx = layer_id.temporal_layer_id;
+ *temporal_idx = layer_id.temporal_layer_id;
+ }
+ if (num_active_spatial_layers_ == 1) {
+ RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
+ *spatial_idx = absl::nullopt;
+ } else {
+ *spatial_idx = layer_id.spatial_layer_id;
+ }
+
+ const bool is_key_pic = (pics_since_key_ == 0);
+ const bool is_inter_layer_pred_allowed =
+ (inter_layer_pred_ == InterLayerPredMode::kOn ||
+ (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic));
+
+ // Always set inter_layer_predicted to true on high layer frame if inter-layer
+ // prediction (ILP) is allowed even if encoder didn't actually use it.
+ // Setting inter_layer_predicted to false would allow receiver to decode high
+ // layer frame without decoding low layer frame. If that would happen (e.g.
+ // if low layer frame is lost) then receiver won't be able to decode next high
+ // layer frame which uses ILP.
+ vp9_info->inter_layer_predicted =
+ first_frame_in_picture_ ? false : is_inter_layer_pred_allowed;
+
+ // Mark all low spatial layer frames as references (not just frames of
+ // active low spatial layers) if inter-layer prediction is enabled since
+ // these frames are indirect references of high spatial layer, which can
+ // later be enabled without key frame.
+ vp9_info->non_ref_for_inter_layer_pred =
+ !is_inter_layer_pred_allowed ||
+ layer_id.spatial_layer_id + 1 == num_spatial_layers_;
+
+ // Always populate this, so that the packetizer can properly set the marker
+ // bit.
+ vp9_info->num_spatial_layers = num_active_spatial_layers_;
+ vp9_info->first_active_layer = first_active_layer_;
+
+ vp9_info->num_ref_pics = 0;
+ FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
+ vp9_info);
+ if (vp9_info->flexible_mode) {
+ vp9_info->gof_idx = kNoGofIdx;
+ if (!svc_controller_) {
+ if (num_temporal_layers_ == 1) {
+ vp9_info->temporal_up_switch = true;
+ } else {
+ // In flexible mode with > 1 temporal layer but no SVC controller we
+ // can't techincally determine if a frame is an upswitch point, use
+ // gof-based data as proxy for now.
+ // TODO(sprang): Remove once SVC controller is the only choice.
+ vp9_info->gof_idx =
+ static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
+ vp9_info->temporal_up_switch =
+ gof_.temporal_up_switch[vp9_info->gof_idx];
+ }
+ }
+ } else {
+ vp9_info->gof_idx =
+ static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
+ vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
+ RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] ||
+ vp9_info->num_ref_pics == 0);
+ }
+
+ vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0);
+
+ // Write SS on key frame of independently coded spatial layers and on base
+ // temporal/spatial layer frame if number of layers changed without issuing
+ // of key picture (inter-layer prediction is enabled).
+ const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted;
+ if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
+ layer_id.spatial_layer_id == first_active_layer_)) {
+ vp9_info->ss_data_available = true;
+ vp9_info->spatial_layer_resolution_present = true;
+ // Signal disabled layers.
+ for (size_t i = 0; i < first_active_layer_; ++i) {
+ vp9_info->width[i] = 0;
+ vp9_info->height[i] = 0;
+ }
+ for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) {
+ vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] /
+ svc_params_.scaling_factor_den[i];
+ vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] /
+ svc_params_.scaling_factor_den[i];
+ }
+ if (vp9_info->flexible_mode) {
+ vp9_info->gof.num_frames_in_gof = 0;
+ } else {
+ vp9_info->gof.CopyGofInfoVP9(gof_);
+ }
+
+ ss_info_needed_ = false;
+ } else {
+ vp9_info->ss_data_available = false;
+ }
+
+ first_frame_in_picture_ = false;
+
+ // Populate codec-agnostic section in the codec specific structure.
+ if (svc_controller_) {
+ auto it = absl::c_find_if(
+ layer_frames_,
+ [&](const ScalableVideoController::LayerFrameConfig& config) {
+ return config.SpatialId() == layer_id.spatial_layer_id;
+ });
+ if (it == layer_frames_.end()) {
+ RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S"
+ << layer_id.spatial_layer_id << "T"
+ << layer_id.temporal_layer_id
+ << " that wasn't requested.";
+ return false;
+ }
+ codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it);
+ if (is_key_frame) {
+ codec_specific->template_structure =
+ svc_controller_->DependencyStructure();
+ auto& resolutions = codec_specific->template_structure->resolutions;
+ resolutions.resize(num_spatial_layers_);
+ for (int sid = 0; sid < num_spatial_layers_; ++sid) {
+ resolutions[sid] = RenderResolution(
+ /*width=*/codec_.width * svc_params_.scaling_factor_num[sid] /
+ svc_params_.scaling_factor_den[sid],
+ /*height=*/codec_.height * svc_params_.scaling_factor_num[sid] /
+ svc_params_.scaling_factor_den[sid]);
+ }
+ }
+ if (is_flexible_mode_) {
+ // Populate data for legacy temporal-upswitch state.
+ // We can switch up to a higher temporal layer only if all temporal layers
+ // higher than this (within the current spatial layer) are switch points.
+ vp9_info->temporal_up_switch = true;
+ for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_;
+ ++i) {
+ // Assumes decode targets are always ordered first by spatial then by
+ // temporal id.
+ size_t dti_index =
+ (layer_id.spatial_layer_id * num_temporal_layers_) + i;
+ vp9_info->temporal_up_switch &=
+ (codec_specific->generic_frame_info
+ ->decode_target_indications[dti_index] ==
+ DecodeTargetIndication::kSwitch);
+ }
+ }
+ }
+ codec_specific->scalability_mode = scalability_mode_;
+ return true;
+}
+
+void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
+ const size_t pic_num,
+ const bool inter_layer_predicted,
+ CodecSpecificInfoVP9* vp9_info) {
+ vpx_svc_layer_id_t layer_id = {0};
+ libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+
+ const bool is_key_frame =
+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
+
+ std::vector<RefFrameBuffer> ref_buf_list;
+
+ if (is_svc_) {
+ vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
+ libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG,
+ &enc_layer_conf);
+ char ref_buf_flags[] = "00000000";
+ // There should be one character per buffer + 1 termination '\0'.
+ static_assert(sizeof(ref_buf_flags) == kNumVp9Buffers + 1);
+
+ if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) {
+ const size_t fb_idx =
+ enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id];
+ RTC_DCHECK_LT(fb_idx, ref_buf_.size());
+ if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
+ ref_buf_[fb_idx]) == ref_buf_list.end()) {
+ ref_buf_list.push_back(ref_buf_[fb_idx]);
+ ref_buf_flags[fb_idx] = '1';
+ }
+ }
+
+ if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) {
+ const size_t fb_idx =
+ enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id];
+ RTC_DCHECK_LT(fb_idx, ref_buf_.size());
+ if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
+ ref_buf_[fb_idx]) == ref_buf_list.end()) {
+ ref_buf_list.push_back(ref_buf_[fb_idx]);
+ ref_buf_flags[fb_idx] = '1';
+ }
+ }
+
+ if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) {
+ const size_t fb_idx =
+ enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id];
+ RTC_DCHECK_LT(fb_idx, ref_buf_.size());
+ if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
+ ref_buf_[fb_idx]) == ref_buf_list.end()) {
+ ref_buf_list.push_back(ref_buf_[fb_idx]);
+ ref_buf_flags[fb_idx] = '1';
+ }
+ }
+
+ RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
+ << layer_id.spatial_layer_id << " tl "
+ << layer_id.temporal_layer_id << " refered buffers "
+ << ref_buf_flags;
+
+ } else if (!is_key_frame) {
+ RTC_DCHECK_EQ(num_spatial_layers_, 1);
+ RTC_DCHECK_EQ(num_temporal_layers_, 1);
+ // In non-SVC mode encoder doesn't provide reference list. Assume each frame
+ // refers previous one, which is stored in buffer 0.
+ ref_buf_list.push_back(ref_buf_[0]);
+ }
+
+ std::vector<size_t> ref_pid_list;
+
+ vp9_info->num_ref_pics = 0;
+ for (const RefFrameBuffer& ref_buf : ref_buf_list) {
+ RTC_DCHECK_LE(ref_buf.pic_num, pic_num);
+ if (ref_buf.pic_num < pic_num) {
+ if (inter_layer_pred_ != InterLayerPredMode::kOn) {
+ // RTP spec limits temporal prediction to the same spatial layer.
+ // It is safe to ignore this requirement if inter-layer prediction is
+ // enabled for all frames when all base frames are relayed to receiver.
+ RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
+ } else {
+ RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
+ }
+ RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
+
+ // Encoder may reference several spatial layers on the same previous
+ // frame in case if some spatial layers are skipped on the current frame.
+ // We shouldn't put duplicate references as it may break some old
+ // clients and isn't RTP compatible.
+ if (std::find(ref_pid_list.begin(), ref_pid_list.end(),
+ ref_buf.pic_num) != ref_pid_list.end()) {
+ continue;
+ }
+ ref_pid_list.push_back(ref_buf.pic_num);
+
+ const size_t p_diff = pic_num - ref_buf.pic_num;
+ RTC_DCHECK_LE(p_diff, 127UL);
+
+ vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff);
+ ++vp9_info->num_ref_pics;
+ } else {
+ RTC_DCHECK(inter_layer_predicted);
+ // RTP spec only allows to use previous spatial layer for inter-layer
+ // prediction.
+ RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id);
+ }
+ }
+}
+
+void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
+ const size_t pic_num) {
+ vpx_svc_layer_id_t layer_id = {0};
+ libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+
+ RefFrameBuffer frame_buf = {.pic_num = pic_num,
+ .spatial_layer_id = layer_id.spatial_layer_id,
+ .temporal_layer_id = layer_id.temporal_layer_id};
+
+ if (is_svc_) {
+ vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
+ libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG,
+ &enc_layer_conf);
+ const int update_buffer_slot =
+ enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id];
+
+ for (size_t i = 0; i < ref_buf_.size(); ++i) {
+ if (update_buffer_slot & (1 << i)) {
+ ref_buf_[i] = frame_buf;
+ }
+ }
+
+ RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
+ << layer_id.spatial_layer_id << " tl "
+ << layer_id.temporal_layer_id << " updated buffers "
+ << (update_buffer_slot & (1 << 0) ? 1 : 0)
+ << (update_buffer_slot & (1 << 1) ? 1 : 0)
+ << (update_buffer_slot & (1 << 2) ? 1 : 0)
+ << (update_buffer_slot & (1 << 3) ? 1 : 0)
+ << (update_buffer_slot & (1 << 4) ? 1 : 0)
+ << (update_buffer_slot & (1 << 5) ? 1 : 0)
+ << (update_buffer_slot & (1 << 6) ? 1 : 0)
+ << (update_buffer_slot & (1 << 7) ? 1 : 0);
+ } else {
+ RTC_DCHECK_EQ(num_spatial_layers_, 1);
+ RTC_DCHECK_EQ(num_temporal_layers_, 1);
+ // In non-svc mode encoder doesn't provide reference list. Assume each frame
+ // is reference and stored in buffer 0.
+ ref_buf_[0] = frame_buf;
+ }
+}
+
+vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences(
+ bool is_key_pic,
+ int first_active_spatial_layer_id) {
+ // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs.
+ RTC_DCHECK_LE(gof_.num_frames_in_gof, 4);
+
+ vpx_svc_ref_frame_config_t ref_config;
+ memset(&ref_config, 0, sizeof(ref_config));
+
+ const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1);
+ const bool is_inter_layer_pred_allowed =
+ inter_layer_pred_ == InterLayerPredMode::kOn ||
+ (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic);
+ absl::optional<int> last_updated_buf_idx;
+
+ // Put temporal reference to LAST and spatial reference to GOLDEN. Update
+ // frame buffer (i.e. store encoded frame) if current frame is a temporal
+ // reference (i.e. it belongs to a low temporal layer) or it is a spatial
+ // reference. In later case, always store spatial reference in the last
+ // reference frame buffer.
+ // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers
+ // for temporal references plus 1 buffer for spatial reference. 7 buffers
+ // in total.
+
+ for (int sl_idx = first_active_spatial_layer_id;
+ sl_idx < num_active_spatial_layers_; ++sl_idx) {
+ const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1;
+ const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof;
+
+ if (!is_key_pic) {
+ // Set up temporal reference.
+ const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx];
+
+ // Last reference frame buffer is reserved for spatial reference. It is
+ // not supposed to be used for temporal prediction.
+ RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1);
+
+ const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
+ // Incorrect spatial layer may be in the buffer due to a key-frame.
+ const bool same_spatial_layer =
+ ref_buf_[buf_idx].spatial_layer_id == sl_idx;
+ bool correct_pid = false;
+ if (is_flexible_mode_) {
+ correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff;
+ } else {
+ // Below code assumes single temporal referecence.
+ RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
+ correct_pid = pid_diff == gof_.pid_diff[gof_idx][0];
+ }
+
+ if (same_spatial_layer && correct_pid) {
+ ref_config.lst_fb_idx[sl_idx] = buf_idx;
+ ref_config.reference_last[sl_idx] = 1;
+ } else {
+ // This reference doesn't match with one specified by GOF. This can
+ // only happen if spatial layer is enabled dynamically without key
+ // frame. Spatial prediction is supposed to be enabled in this case.
+ RTC_DCHECK(is_inter_layer_pred_allowed &&
+ sl_idx > first_active_spatial_layer_id);
+ }
+ }
+
+ if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) {
+ // Set up spatial reference.
+ RTC_DCHECK(last_updated_buf_idx);
+ ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx;
+ ref_config.reference_golden[sl_idx] = 1;
+ } else {
+ RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 ||
+ sl_idx == first_active_spatial_layer_id ||
+ inter_layer_pred_ == InterLayerPredMode::kOff);
+ }
+
+ last_updated_buf_idx.reset();
+
+ if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 ||
+ num_temporal_layers_ == 1) {
+ last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx];
+
+ // Ensure last frame buffer is not used for temporal prediction (it is
+ // reserved for spatial reference).
+ RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1);
+ } else if (is_inter_layer_pred_allowed) {
+ last_updated_buf_idx = kNumVp9Buffers - 1;
+ }
+
+ if (last_updated_buf_idx) {
+ ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx;
+ }
+ }
+
+ return ref_config;
+}
+
+void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
+ RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);
+
+ if (pkt->data.frame.sz == 0) {
+ // Ignore dropped frame.
+ return;
+ }
+
+ vpx_svc_layer_id_t layer_id = {0};
+ libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+
+ if (layer_buffering_) {
+ // Deliver buffered low spatial layer frame.
+ const bool end_of_picture = false;
+ DeliverBufferedFrame(end_of_picture);
+ }
+
+ encoded_image_.SetEncodedData(EncodedImageBuffer::Create(
+ static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz));
+
+ codec_specific_ = {};
+ absl::optional<int> spatial_index;
+ absl::optional<int> temporal_index;
+ if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index,
+ *pkt)) {
+ // Drop the frame.
+ encoded_image_.set_size(0);
+ return;
+ }
+ encoded_image_.SetSpatialIndex(spatial_index);
+ encoded_image_.SetTemporalIndex(temporal_index);
+
+ const bool is_key_frame =
+ ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) &&
+ !codec_specific_.codecSpecific.VP9.inter_layer_predicted;
+
+ // Ensure encoder issued key frame on request.
+ RTC_DCHECK(is_key_frame || !force_key_frame_);
+
+ // Check if encoded frame is a key frame.
+ encoded_image_._frameType = VideoFrameType::kVideoFrameDelta;
+ if (is_key_frame) {
+ encoded_image_._frameType = VideoFrameType::kVideoFrameKey;
+ force_key_frame_ = false;
+ }
+
+ UpdateReferenceBuffers(*pkt, pics_since_key_);
+
+ TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
+ encoded_image_.SetTimestamp(input_image_->timestamp());
+ encoded_image_.SetColorSpace(input_image_->color_space());
+ encoded_image_._encodedHeight =
+ pkt->data.frame.height[layer_id.spatial_layer_id];
+ encoded_image_._encodedWidth =
+ pkt->data.frame.width[layer_id.spatial_layer_id];
+ int qp = -1;
+ libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
+ encoded_image_.qp_ = qp;
+
+ if (!layer_buffering_) {
+ const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 ==
+ num_active_spatial_layers_;
+ DeliverBufferedFrame(end_of_picture);
+ }
+}
+
+void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) {
+ if (encoded_image_.size() > 0) {
+ if (num_spatial_layers_ > 1) {
+ // Restore frame dropping settings, as dropping may be temporary forbidden
+ // due to dynamically enabled layers.
+ for (size_t i = 0; i < num_spatial_layers_; ++i) {
+ svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
+ }
+ }
+
+ codec_specific_.end_of_picture = end_of_picture;
+
+ encoded_complete_callback_->OnEncodedImage(encoded_image_,
+ &codec_specific_);
+
+ if (codec_.mode == VideoCodecMode::kScreensharing) {
+ const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0);
+ const uint32_t frame_timestamp_ms =
+ 1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency;
+ framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms);
+
+ const size_t steady_state_size = SteadyStateSize(
+ spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx);
+
+ // Only frames on spatial layers, which may be limited in a steady state
+ // are considered for steady state detection.
+ if (framerate_controller_[spatial_idx].GetTargetRate() >
+ variable_framerate_experiment_.framerate_limit + 1e-9) {
+ if (encoded_image_.qp_ <=
+ variable_framerate_experiment_.steady_state_qp &&
+ encoded_image_.size() <= steady_state_size) {
+ ++num_steady_state_frames_;
+ } else {
+ num_steady_state_frames_ = 0;
+ }
+ }
+ }
+ encoded_image_.set_size(0);
+ }
+}
+
+int LibvpxVp9Encoder::RegisterEncodeCompleteCallback(
+ EncodedImageCallback* callback) {
+ encoded_complete_callback_ = callback;
+ return WEBRTC_VIDEO_CODEC_OK;
+}
+
+VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const {
+ EncoderInfo info;
+ info.supports_native_handle = false;
+ info.implementation_name = "libvpx";
+ if (quality_scaler_experiment_.enabled && inited_ &&
+ codec_.VP9().automaticResizeOn) {
+ info.scaling_settings = VideoEncoder::ScalingSettings(
+ quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp);
+ } else {
+ info.scaling_settings = VideoEncoder::ScalingSettings::kOff;
+ }
+ info.has_trusted_rate_controller = trusted_rate_controller_;
+ info.is_hardware_accelerated = false;
+ if (inited_) {
+ // Find the max configured fps of any active spatial layer.
+ float max_fps = 0.0;
+ for (size_t si = 0; si < num_spatial_layers_; ++si) {
+ if (codec_.spatialLayers[si].active &&
+ codec_.spatialLayers[si].maxFramerate > max_fps) {
+ max_fps = codec_.spatialLayers[si].maxFramerate;
+ }
+ }
+
+ for (size_t si = 0; si < num_spatial_layers_; ++si) {
+ info.fps_allocation[si].clear();
+ if (!codec_.spatialLayers[si].active) {
+ continue;
+ }
+
+ // This spatial layer may already use a fraction of the total frame rate.
+ const float sl_fps_fraction =
+ codec_.spatialLayers[si].maxFramerate / max_fps;
+ for (size_t ti = 0; ti < num_temporal_layers_; ++ti) {
+ const uint32_t decimator =
+ num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti];
+ RTC_DCHECK_GT(decimator, 0);
+ info.fps_allocation[si].push_back(
+ rtc::saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction *
+ (sl_fps_fraction / decimator)));
+ }
+ }
+ if (profile_ == VP9Profile::kProfile0) {
+ info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
+ VideoFrameBuffer::Type::kNV12};
+ }
+ }
+ if (!encoder_info_override_.resolution_bitrate_limits().empty()) {
+ info.resolution_bitrate_limits =
+ encoder_info_override_.resolution_bitrate_limits();
+ }
+ return info;
+}
+
+size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) {
+ const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate(
+ sid, tid == kNoTemporalIdx ? 0 : tid);
+ const float fps = (codec_.mode == VideoCodecMode::kScreensharing)
+ ? std::min(static_cast<float>(codec_.maxFramerate),
+ framerate_controller_[sid].GetTargetRate())
+ : codec_.maxFramerate;
+ return static_cast<size_t>(
+ bitrate_bps / (8 * fps) *
+ (100 -
+ variable_framerate_experiment_.steady_state_undershoot_percentage) /
+ 100 +
+ 0.5);
+}
+
+// static
+LibvpxVp9Encoder::VariableFramerateExperiment
+LibvpxVp9Encoder::ParseVariableFramerateConfig(const FieldTrialsView& trials) {
+ FieldTrialFlag enabled = FieldTrialFlag("Enabled");
+ FieldTrialParameter<double> framerate_limit("min_fps", 5.0);
+ FieldTrialParameter<int> qp("min_qp", 32);
+ FieldTrialParameter<int> undershoot_percentage("undershoot", 30);
+ FieldTrialParameter<int> frames_before_steady_state(
+ "frames_before_steady_state", 5);
+ ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage,
+ &frames_before_steady_state},
+ trials.Lookup("WebRTC-VP9VariableFramerateScreenshare"));
+ VariableFramerateExperiment config;
+ config.enabled = enabled.Get();
+ config.framerate_limit = framerate_limit.Get();
+ config.steady_state_qp = qp.Get();
+ config.steady_state_undershoot_percentage = undershoot_percentage.Get();
+ config.frames_before_steady_state = frames_before_steady_state.Get();
+
+ return config;
+}
+
+// static
+LibvpxVp9Encoder::QualityScalerExperiment
+LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) {
+ FieldTrialFlag disabled = FieldTrialFlag("Disabled");
+ FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold);
+ FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold);
+ ParseFieldTrial({&disabled, &low_qp, &high_qp},
+ trials.Lookup("WebRTC-VP9QualityScaler"));
+ QualityScalerExperiment config;
+ config.enabled = !disabled.Get();
+ RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is "
+ << (config.enabled ? "enabled." : "disabled");
+ config.low_qp = low_qp.Get();
+ config.high_qp = high_qp.Get();
+
+ return config;
+}
+
+void LibvpxVp9Encoder::UpdatePerformanceFlags() {
+ flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution;
+ if (codec_.GetVideoEncoderComplexity() ==
+ VideoCodecComplexity::kComplexityLow) {
+ // For low tier devices, always use speed 9. Only disable upper
+ // layer deblocking below QCIF.
+ params_by_resolution[0] = {.base_layer_speed = 9,
+ .high_layer_speed = 9,
+ .deblock_mode = 1,
+ .allow_denoising = true};
+ params_by_resolution[352 * 288] = {.base_layer_speed = 9,
+ .high_layer_speed = 9,
+ .deblock_mode = 0,
+ .allow_denoising = true};
+ } else {
+ params_by_resolution = performance_flags_.settings_by_resolution;
+ }
+
+ const auto find_speed = [&](int min_pixel_count) {
+ RTC_DCHECK(!params_by_resolution.empty());
+ auto it = params_by_resolution.upper_bound(min_pixel_count);
+ return std::prev(it)->second;
+ };
+ performance_flags_by_spatial_index_.clear();
+
+ if (is_svc_) {
+ for (int si = 0; si < num_spatial_layers_; ++si) {
+ performance_flags_by_spatial_index_.push_back(find_speed(
+ codec_.spatialLayers[si].width * codec_.spatialLayers[si].height));
+ }
+ } else {
+ performance_flags_by_spatial_index_.push_back(
+ find_speed(codec_.width * codec_.height));
+ }
+}
+
+// static
+LibvpxVp9Encoder::PerformanceFlags
+LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials(
+ const FieldTrialsView& trials) {
+ struct Params : public PerformanceFlags::ParameterSet {
+ int min_pixel_count = 0;
+ };
+
+ FieldTrialStructList<Params> trials_list(
+ {FieldTrialStructMember("min_pixel_count",
+ [](Params* p) { return &p->min_pixel_count; }),
+ FieldTrialStructMember("high_layer_speed",
+ [](Params* p) { return &p->high_layer_speed; }),
+ FieldTrialStructMember("base_layer_speed",
+ [](Params* p) { return &p->base_layer_speed; }),
+ FieldTrialStructMember("deblock_mode",
+ [](Params* p) { return &p->deblock_mode; }),
+ FieldTrialStructMember("denoiser",
+ [](Params* p) { return &p->allow_denoising; })},
+ {});
+
+ FieldTrialFlag per_layer_speed("use_per_layer_speed");
+
+ ParseFieldTrial({&trials_list, &per_layer_speed},
+ trials.Lookup("WebRTC-VP9-PerformanceFlags"));
+
+ PerformanceFlags flags;
+ flags.use_per_layer_speed = per_layer_speed.Get();
+
+ constexpr int kMinSpeed = 1;
+ constexpr int kMaxSpeed = 9;
+ for (auto& f : trials_list.Get()) {
+ if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed ||
+ f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed ||
+ f.deblock_mode < 0 || f.deblock_mode > 2) {
+ RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: "
+ << "min_pixel_count = " << f.min_pixel_count
+ << ", high_layer_speed = " << f.high_layer_speed
+ << ", base_layer_speed = " << f.base_layer_speed
+ << ", deblock_mode = " << f.deblock_mode;
+ continue;
+ }
+ flags.settings_by_resolution[f.min_pixel_count] = f;
+ }
+
+ if (flags.settings_by_resolution.empty()) {
+ return GetDefaultPerformanceFlags();
+ }
+
+ return flags;
+}
+
+// static
+LibvpxVp9Encoder::PerformanceFlags
+LibvpxVp9Encoder::GetDefaultPerformanceFlags() {
+ PerformanceFlags flags;
+ flags.use_per_layer_speed = true;
+#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
+ // Speed 8 on all layers for all resolutions.
+ flags.settings_by_resolution[0] = {.base_layer_speed = 8,
+ .high_layer_speed = 8,
+ .deblock_mode = 0,
+ .allow_denoising = true};
+#else
+
+ // For smaller resolutions, use lower speed setting for the temporal base
+ // layer (get some coding gain at the cost of increased encoding complexity).
+ // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and
+ // disable deblocking for upper-most temporal layers.
+ flags.settings_by_resolution[0] = {.base_layer_speed = 5,
+ .high_layer_speed = 8,
+ .deblock_mode = 1,
+ .allow_denoising = true};
+
+ // Use speed 7 for QCIF and above.
+ // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and
+ // enable deblocking for all temporal layers.
+ flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7,
+ .high_layer_speed = 8,
+ .deblock_mode = 0,
+ .allow_denoising = true};
+
+ // For very high resolution (1080p and up), turn the speed all the way up
+ // since this is very CPU intensive. Also disable denoising to save CPU, at
+ // these resolutions denoising appear less effective and hopefully you also
+ // have a less noisy video source at this point.
+ flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9,
+ .high_layer_speed = 9,
+ .deblock_mode = 0,
+ .allow_denoising = false};
+
+#endif
+ return flags;
+}
+
+void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) {
+ if (!raw_) {
+ raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1,
+ nullptr);
+ } else if (raw_->fmt != fmt) {
+ RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to "
+ << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420");
+ libvpx_->img_free(raw_);
+ raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1,
+ nullptr);
+ }
+ // else no-op since the image is already in the right format.
+}
+
+rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
+ rtc::scoped_refptr<VideoFrameBuffer> buffer) {
+ absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
+ supported_formats = {VideoFrameBuffer::Type::kI420,
+ VideoFrameBuffer::Type::kNV12};
+
+ rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
+ if (buffer->type() != VideoFrameBuffer::Type::kNative) {
+ // `buffer` is already mapped.
+ mapped_buffer = buffer;
+ } else {
+ // Attempt to map to one of the supported formats.
+ mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
+ }
+ if (!mapped_buffer ||
+ (absl::c_find(supported_formats, mapped_buffer->type()) ==
+ supported_formats.end() &&
+ mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
+ // Unknown pixel format or unable to map, convert to I420 and prepare that
+ // buffer instead to ensure Scale() is safe to use.
+ auto converted_buffer = buffer->ToI420();
+ if (!converted_buffer) {
+ RTC_LOG(LS_ERROR) << "Failed to convert "
+ << VideoFrameBufferTypeToString(buffer->type())
+ << " image to I420. Can't encode frame.";
+ return {};
+ }
+ RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
+ converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
+
+ // Because `buffer` had to be converted, use `converted_buffer` instead.
+ buffer = mapped_buffer = converted_buffer;
+ }
+
+ // Prepare `raw_` from `mapped_buffer`.
+ switch (mapped_buffer->type()) {
+ case VideoFrameBuffer::Type::kI420:
+ case VideoFrameBuffer::Type::kI420A: {
+ MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420);
+ const I420BufferInterface* i420_buffer = mapped_buffer->GetI420();
+ RTC_DCHECK(i420_buffer);
+ raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY());
+ raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU());
+ raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV());
+ raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
+ raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU();
+ raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV();
+ break;
+ }
+ case VideoFrameBuffer::Type::kNV12: {
+ MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12);
+ const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
+ RTC_DCHECK(nv12_buffer);
+ raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY());
+ raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV());
+ raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1;
+ raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
+ raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
+ raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
+ break;
+ }
+ default:
+ RTC_DCHECK_NOTREACHED();
+ }
+ return mapped_buffer;
+}
+
+} // namespace webrtc
+
+#endif // RTC_ENABLE_VP9
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
new file mode 100644
index 0000000000..bb871f8498
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_
+#define MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_
+
+#ifdef RTC_ENABLE_VP9
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "api/fec_controller_override.h"
+#include "api/field_trials_view.h"
+#include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/video_encoder.h"
+#include "api/video_codecs/vp9_profile.h"
+#include "common_video/include/video_frame_buffer_pool.h"
+#include "modules/video_coding/codecs/interface/libvpx_interface.h"
+#include "modules/video_coding/codecs/vp9/include/vp9.h"
+#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
+#include "modules/video_coding/utility/framerate_controller_deprecated.h"
+#include "rtc_base/containers/flat_map.h"
+#include "rtc_base/experiments/encoder_info_settings.h"
+#include "vpx/vp8cx.h"
+
+namespace webrtc {
+
+class LibvpxVp9Encoder : public VP9Encoder {
+ public:
+ LibvpxVp9Encoder(const cricket::VideoCodec& codec,
+ std::unique_ptr<LibvpxInterface> interface,
+ const FieldTrialsView& trials);
+
+ ~LibvpxVp9Encoder() override;
+
+ void SetFecControllerOverride(
+ FecControllerOverride* fec_controller_override) override;
+
+ int Release() override;
+
+ int InitEncode(const VideoCodec* codec_settings,
+ const Settings& settings) override;
+
+ int Encode(const VideoFrame& input_image,
+ const std::vector<VideoFrameType>* frame_types) override;
+
+ int RegisterEncodeCompleteCallback(EncodedImageCallback* callback) override;
+
+ void SetRates(const RateControlParameters& parameters) override;
+
+ EncoderInfo GetEncoderInfo() const override;
+
+ private:
+ // Determine number of encoder threads to use.
+ int NumberOfThreads(int width, int height, int number_of_cores);
+
+ // Call encoder initialize function and set control settings.
+ int InitAndSetControlSettings(const VideoCodec* inst);
+
+ // Update frame size for codec.
+ int UpdateCodecFrameSize(const VideoFrame& input_image);
+
+ bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+ absl::optional<int>* spatial_idx,
+ absl::optional<int>* temporal_idx,
+ const vpx_codec_cx_pkt& pkt);
+ void FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
+ size_t pic_num,
+ bool inter_layer_predicted,
+ CodecSpecificInfoVP9* vp9_info);
+ void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, size_t pic_num);
+ vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic,
+ int first_active_spatial_layer_id);
+
+ bool ExplicitlyConfiguredSpatialLayers() const;
+ bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation);
+
+ // Configures which spatial layers libvpx should encode according to
+ // configuration provided by svc_controller_.
+ void EnableSpatialLayer(int sid);
+ void DisableSpatialLayer(int sid);
+ void SetActiveSpatialLayers();
+
+ void GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);
+
+ // Callback function for outputting packets per spatial layer.
+ static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
+ void* user_data);
+
+ void DeliverBufferedFrame(bool end_of_picture);
+
+ bool DropFrame(uint8_t spatial_idx, uint32_t rtp_timestamp);
+
+ // Determine maximum target for Intra frames
+ //
+ // Input:
+ // - optimal_buffer_size : Optimal buffer size
+ // Return Value : Max target size for Intra frames represented as
+ // percentage of the per frame bandwidth
+ uint32_t MaxIntraTarget(uint32_t optimal_buffer_size);
+
+ size_t SteadyStateSize(int sid, int tid);
+
+ void MaybeRewrapRawWithFormat(vpx_img_fmt fmt);
+ // Prepares `raw_` to reference image data of `buffer`, or of mapped or scaled
+ // versions of `buffer`. Returns the buffer that got referenced as a result,
+ // allowing the caller to keep a reference to it until after encoding has
+ // finished. On failure to convert the buffer, null is returned.
+ rtc::scoped_refptr<VideoFrameBuffer> PrepareBufferForProfile0(
+ rtc::scoped_refptr<VideoFrameBuffer> buffer);
+
+ const std::unique_ptr<LibvpxInterface> libvpx_;
+ EncodedImage encoded_image_;
+ CodecSpecificInfo codec_specific_;
+ EncodedImageCallback* encoded_complete_callback_;
+ VideoCodec codec_;
+ const VP9Profile profile_;
+ bool inited_;
+ int64_t timestamp_;
+ uint32_t rc_max_intra_target_;
+ vpx_codec_ctx_t* encoder_;
+ vpx_codec_enc_cfg_t* config_;
+ vpx_image_t* raw_;
+ vpx_svc_extra_cfg_t svc_params_;
+ const VideoFrame* input_image_;
+ GofInfoVP9 gof_; // Contains each frame's temporal information for
+ // non-flexible mode.
+ bool force_key_frame_;
+ size_t pics_since_key_;
+ uint8_t num_temporal_layers_;
+ uint8_t num_spatial_layers_; // Number of configured SLs
+ uint8_t num_active_spatial_layers_; // Number of actively encoded SLs
+ uint8_t first_active_layer_;
+ bool layer_deactivation_requires_key_frame_;
+ bool is_svc_;
+ InterLayerPredMode inter_layer_pred_;
+ bool external_ref_control_;
+ const bool trusted_rate_controller_;
+ bool layer_buffering_;
+ const bool full_superframe_drop_;
+ vpx_svc_frame_drop_t svc_drop_frame_;
+ bool first_frame_in_picture_;
+ VideoBitrateAllocation current_bitrate_allocation_;
+ bool ss_info_needed_;
+ bool force_all_active_layers_;
+ uint8_t num_cores_;
+
+ std::unique_ptr<ScalableVideoController> svc_controller_;
+ absl::optional<ScalabilityMode> scalability_mode_;
+ std::vector<FramerateControllerDeprecated> framerate_controller_;
+
+ // Used for flexible mode.
+ bool is_flexible_mode_;
+ struct RefFrameBuffer {
+ bool operator==(const RefFrameBuffer& o) {
+ return pic_num == o.pic_num && spatial_layer_id == o.spatial_layer_id &&
+ temporal_layer_id == o.temporal_layer_id;
+ }
+
+ size_t pic_num = 0;
+ int spatial_layer_id = 0;
+ int temporal_layer_id = 0;
+ };
+ std::array<RefFrameBuffer, kNumVp9Buffers> ref_buf_;
+ std::vector<ScalableVideoController::LayerFrameConfig> layer_frames_;
+
+ // Variable frame-rate related fields and methods.
+ const struct VariableFramerateExperiment {
+ bool enabled;
+ // Framerate is limited to this value in steady state.
+ float framerate_limit;
+ // This qp or below is considered a steady state.
+ int steady_state_qp;
+ // Frames of at least this percentage below ideal for configured bitrate are
+ // considered in a steady state.
+ int steady_state_undershoot_percentage;
+ // Number of consecutive frames with good QP and size required to detect
+ // the steady state.
+ int frames_before_steady_state;
+ } variable_framerate_experiment_;
+ static VariableFramerateExperiment ParseVariableFramerateConfig(
+ const FieldTrialsView& trials);
+ FramerateControllerDeprecated variable_framerate_controller_;
+
+ const struct QualityScalerExperiment {
+ int low_qp;
+ int high_qp;
+ bool enabled;
+ } quality_scaler_experiment_;
+ static QualityScalerExperiment ParseQualityScalerConfig(
+ const FieldTrialsView& trials);
+ const bool external_ref_ctrl_;
+
+ // Flags that can affect speed vs quality tradeoff, and are configureable per
+ // resolution ranges.
+ struct PerformanceFlags {
+ // If false, a lookup will be made in `settings_by_resolution` base on the
+ // highest currently active resolution, and the overall speed then set to
+ // to the `base_layer_speed` matching that entry.
+ // If true, each active resolution will have it's speed and deblock_mode set
+ // based on it resolution, and the high layer speed configured for non
+ // base temporal layer frames.
+ bool use_per_layer_speed = false;
+
+ struct ParameterSet {
+ int base_layer_speed = -1; // Speed setting for TL0.
+ int high_layer_speed = -1; // Speed setting for TL1-TL3.
+ // 0 = deblock all temporal layers (TL)
+ // 1 = disable deblock for top-most TL
+ // 2 = disable deblock for all TLs
+ int deblock_mode = 0;
+ bool allow_denoising = true;
+ };
+ // Map from min pixel count to settings for that resolution and above.
+ // E.g. if you want some settings A if below wvga (640x360) and some other
+ // setting B at wvga and above, you'd use map {{0, A}, {230400, B}}.
+ flat_map<int, ParameterSet> settings_by_resolution;
+ };
+ // Performance flags, ordered by `min_pixel_count`.
+ const PerformanceFlags performance_flags_;
+ // Caching of of `speed_configs_`, where index i maps to the resolution as
+ // specified in `codec_.spatialLayer[i]`.
+ std::vector<PerformanceFlags::ParameterSet>
+ performance_flags_by_spatial_index_;
+ void UpdatePerformanceFlags();
+ static PerformanceFlags ParsePerformanceFlagsFromTrials(
+ const FieldTrialsView& trials);
+ static PerformanceFlags GetDefaultPerformanceFlags();
+
+ int num_steady_state_frames_;
+ // Only set config when this flag is set.
+ bool config_changed_;
+
+ const LibvpxVp9EncoderInfoSettings encoder_info_override_;
+};
+
+} // namespace webrtc
+
+#endif // RTC_ENABLE_VP9
+
+#endif // MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc
new file mode 100644
index 0000000000..3a32a43622
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/codecs/vp9/svc_config.h"
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "media/base/video_common.h"
+#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
+#include "modules/video_coding/svc/scalability_mode_util.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+
+namespace {
+const size_t kMinVp9SvcBitrateKbps = 30;
+
+const size_t kMaxNumLayersForScreenSharing = 3;
+const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 10.0, 30.0};
+const size_t kMinScreenSharingLayerBitrateKbps[] = {30, 200, 500};
+const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950};
+const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950};
+
+// Gets limited number of layers for given resolution.
+size_t GetLimitedNumSpatialLayers(size_t width, size_t height) {
+ const bool is_landscape = width >= height;
+ const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
+ : kMinVp9SpatialLayerShortSideLength;
+ const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
+ : kMinVp9SpatialLayerLongSideLength;
+ const size_t num_layers_fit_horz = static_cast<size_t>(
+ std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width))));
+ const size_t num_layers_fit_vert = static_cast<size_t>(
+ std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height))));
+ return std::min(num_layers_fit_horz, num_layers_fit_vert);
+}
+} // namespace
+
+std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t num_spatial_layers) {
+ num_spatial_layers =
+ std::min(num_spatial_layers, kMaxNumLayersForScreenSharing);
+ std::vector<SpatialLayer> spatial_layers;
+
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ SpatialLayer spatial_layer = {0};
+ spatial_layer.width = input_width;
+ spatial_layer.height = input_height;
+ spatial_layer.maxFramerate =
+ std::min(kMaxScreenSharingLayerFramerateFps[sl_idx], max_framerate_fps);
+ spatial_layer.numberOfTemporalLayers = 1;
+ spatial_layer.minBitrate =
+ static_cast<int>(kMinScreenSharingLayerBitrateKbps[sl_idx]);
+ spatial_layer.maxBitrate =
+ static_cast<int>(kMaxScreenSharingLayerBitrateKbps[sl_idx]);
+ spatial_layer.targetBitrate =
+ static_cast<int>(kTargetScreenSharingLayerBitrateKbps[sl_idx]);
+ spatial_layer.active = true;
+ spatial_layers.push_back(spatial_layer);
+ }
+
+ return spatial_layers;
+}
+
+std::vector<SpatialLayer> ConfigureSvcNormalVideo(
+ size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t first_active_layer,
+ size_t num_spatial_layers,
+ size_t num_temporal_layers,
+ absl::optional<ScalableVideoController::StreamLayersConfig> config) {
+ RTC_DCHECK_LT(first_active_layer, num_spatial_layers);
+
+ // Limit number of layers for given resolution.
+ size_t limited_num_spatial_layers =
+ GetLimitedNumSpatialLayers(input_width, input_height);
+ if (limited_num_spatial_layers < num_spatial_layers) {
+ RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from "
+ << num_spatial_layers << " to "
+ << limited_num_spatial_layers
+ << " due to low input resolution.";
+ num_spatial_layers = limited_num_spatial_layers;
+ }
+
+ // First active layer must be configured.
+ num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1);
+
+ // Ensure top layer is even enough.
+ int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1);
+ if (config) {
+ required_divisiblity = 1;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ required_divisiblity = cricket::LeastCommonMultiple(
+ required_divisiblity, config->scaling_factor_den[sl_idx]);
+ }
+ }
+ input_width = input_width - input_width % required_divisiblity;
+ input_height = input_height - input_height % required_divisiblity;
+
+ std::vector<SpatialLayer> spatial_layers;
+ for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers;
+ ++sl_idx) {
+ SpatialLayer spatial_layer = {0};
+ spatial_layer.width = input_width >> (num_spatial_layers - sl_idx - 1);
+ spatial_layer.height = input_height >> (num_spatial_layers - sl_idx - 1);
+ spatial_layer.maxFramerate = max_framerate_fps;
+ spatial_layer.numberOfTemporalLayers = num_temporal_layers;
+ spatial_layer.active = true;
+
+ if (config) {
+ spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] /
+ config->scaling_factor_den[sl_idx];
+ spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] /
+ config->scaling_factor_den[sl_idx];
+ }
+
+ // minBitrate and maxBitrate formulas were derived from
+ // subjective-quality data to determing bit rates below which video
+ // quality is unacceptable and above which additional bits do not provide
+ // benefit. The formulas express rate in units of kbps.
+
+ // TODO(ssilkin): Add to the comment PSNR/SSIM we get at encoding certain
+ // video to min/max bitrate specified by those formulas.
+ const size_t num_pixels = spatial_layer.width * spatial_layer.height;
+ int min_bitrate =
+ static_cast<int>((600. * std::sqrt(num_pixels) - 95000.) / 1000.);
+ min_bitrate = std::max(min_bitrate, 0);
+ spatial_layer.minBitrate =
+ std::max(static_cast<size_t>(min_bitrate), kMinVp9SvcBitrateKbps);
+ spatial_layer.maxBitrate =
+ static_cast<int>((1.6 * num_pixels + 50 * 1000) / 1000);
+ spatial_layer.targetBitrate =
+ (spatial_layer.minBitrate + spatial_layer.maxBitrate) / 2;
+ spatial_layers.push_back(spatial_layer);
+ }
+
+ // A workaround for situation when single HD layer is left with minBitrate
+ // about 500kbps. This would mean that there will always be at least 500kbps
+ // allocated to video regardless of how low is the actual BWE.
+ // Also, boost maxBitrate for the first layer to account for lost ability to
+ // predict from previous layers.
+ if (first_active_layer > 0) {
+ spatial_layers[0].minBitrate = kMinVp9SvcBitrateKbps;
+ // TODO(ilnik): tune this value or come up with a different formula to
+ // ensure that all singlecast configurations look good and not too much
+ // bitrate is added.
+ spatial_layers[0].maxBitrate *= 1.1;
+ }
+
+ return spatial_layers;
+}
+
+// Uses scalability mode to configure spatial layers.
+std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& codec) {
+ RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9);
+
+ absl::optional<ScalabilityMode> scalability_mode = codec.GetScalabilityMode();
+ RTC_DCHECK(scalability_mode.has_value());
+
+ // Limit number of spatial layers for given resolution.
+ int limited_num_spatial_layers =
+ GetLimitedNumSpatialLayers(codec.width, codec.height);
+ if (limited_num_spatial_layers <
+ ScalabilityModeToNumSpatialLayers(*scalability_mode)) {
+ ScalabilityMode limited_scalability_mode =
+ LimitNumSpatialLayers(*scalability_mode, limited_num_spatial_layers);
+ RTC_LOG(LS_WARNING)
+ << "Reducing number of spatial layers due to low input resolution: "
+ << ScalabilityModeToString(*scalability_mode) << " to "
+ << ScalabilityModeToString(limited_scalability_mode);
+ scalability_mode = limited_scalability_mode;
+ codec.SetScalabilityMode(limited_scalability_mode);
+ }
+
+ absl::optional<ScalableVideoController::StreamLayersConfig> info =
+ ScalabilityStructureConfig(*scalability_mode);
+ if (!info.has_value()) {
+ RTC_LOG(LS_WARNING) << "Failed to create structure "
+ << ScalabilityModeToString(*scalability_mode);
+ return {};
+ }
+
+ // TODO(bugs.webrtc.org/11607): Add support for screensharing.
+ std::vector<SpatialLayer> spatial_layers =
+ GetSvcConfig(codec.width, codec.height, codec.maxFramerate,
+ /*first_active_layer=*/0, info->num_spatial_layers,
+ info->num_temporal_layers, /*is_screen_sharing=*/false,
+ codec.GetScalabilityMode() ? info : absl::nullopt);
+ RTC_DCHECK(!spatial_layers.empty());
+
+ // Use codec bitrate limits if spatial layering is not requested.
+ if (info->num_spatial_layers == 1) {
+ spatial_layers.back().minBitrate = codec.minBitrate;
+ spatial_layers.back().targetBitrate = codec.maxBitrate;
+ spatial_layers.back().maxBitrate = codec.maxBitrate;
+ }
+
+ return spatial_layers;
+}
+
+std::vector<SpatialLayer> GetSvcConfig(
+ size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t first_active_layer,
+ size_t num_spatial_layers,
+ size_t num_temporal_layers,
+ bool is_screen_sharing,
+ absl::optional<ScalableVideoController::StreamLayersConfig> config) {
+ RTC_DCHECK_GT(input_width, 0);
+ RTC_DCHECK_GT(input_height, 0);
+ RTC_DCHECK_GT(num_spatial_layers, 0);
+ RTC_DCHECK_GT(num_temporal_layers, 0);
+
+ if (is_screen_sharing) {
+ return ConfigureSvcScreenSharing(input_width, input_height,
+ max_framerate_fps, num_spatial_layers);
+ } else {
+ return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps,
+ first_active_layer, num_spatial_layers,
+ num_temporal_layers, config);
+ }
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h
new file mode 100644
index 0000000000..adeaf0f161
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h
@@ -0,0 +1,39 @@
+/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_
+#define MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "api/video_codecs/spatial_layer.h"
+#include "api/video_codecs/video_codec.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
+
+namespace webrtc {
+
+// Uses scalability mode to configure spatial layers.
+std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& video_codec);
+
+std::vector<SpatialLayer> GetSvcConfig(
+ size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t first_active_layer,
+ size_t num_spatial_layers,
+ size_t num_temporal_layers,
+ bool is_screen_sharing,
+ absl::optional<ScalableVideoController::StreamLayersConfig> config =
+ absl::nullopt);
+
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc
new file mode 100644
index 0000000000..762fd39287
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/codecs/vp9/svc_config.h"
+
+#include <cstddef>
+#include <vector>
+
+#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+using ::testing::ElementsAre;
+using ::testing::Field;
+
+namespace webrtc {
+TEST(SvcConfig, NumSpatialLayers) {
+ const size_t max_num_spatial_layers = 6;
+ const size_t first_active_layer = 0;
+ const size_t num_spatial_layers = 2;
+
+ std::vector<SpatialLayer> spatial_layers = GetSvcConfig(
+ kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1),
+ kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30,
+ first_active_layer, max_num_spatial_layers, 1, false);
+
+ EXPECT_EQ(spatial_layers.size(), num_spatial_layers);
+}
+
+TEST(SvcConfig, NumSpatialLayersPortrait) {
+ const size_t max_num_spatial_layers = 6;
+ const size_t first_active_layer = 0;
+ const size_t num_spatial_layers = 2;
+
+ std::vector<SpatialLayer> spatial_layers = GetSvcConfig(
+ kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1),
+ kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), 30,
+ first_active_layer, max_num_spatial_layers, 1, false);
+
+ EXPECT_EQ(spatial_layers.size(), num_spatial_layers);
+}
+
+TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 960;
+ codec.height = 540;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+ Field(&SpatialLayer::height, 270),
+ Field(&SpatialLayer::height, 540)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
+ Field(&SpatialLayer::numberOfTemporalLayers, 3),
+ Field(&SpatialLayer::numberOfTemporalLayers, 3)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 480;
+ codec.height = 270;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
+
+ // Scalability mode updated.
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+ Field(&SpatialLayer::height, 270)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
+ Field(&SpatialLayer::numberOfTemporalLayers, 3)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T3_KEY);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 270;
+ codec.height = 480;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T1);
+
+ // Scalability mode updated.
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135),
+ Field(&SpatialLayer::width, 270)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
+ Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1);
+}
+
+TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 270;
+ codec.height = 480;
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180),
+ Field(&SpatialLayer::width, 270)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
+ Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 320;
+ codec.height = 180;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T1h); // 1.5:1
+
+ // Scalability mode updated.
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL1T1);
+}
+
+TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) {
+ const size_t max_num_spatial_layers = 6;
+ const size_t first_active_layer = 5;
+
+ std::vector<SpatialLayer> spatial_layers = GetSvcConfig(
+ kMinVp9SpatialLayerLongSideLength, kMinVp9SpatialLayerShortSideLength, 30,
+ first_active_layer, max_num_spatial_layers, 1, false);
+ EXPECT_EQ(spatial_layers.size(), 1u);
+ EXPECT_EQ(spatial_layers.back().width, kMinVp9SpatialLayerLongSideLength);
+}
+
+TEST(SvcConfig, AlwaysSendsAtLeastOneLayerPortrait) {
+ const size_t max_num_spatial_layers = 6;
+ const size_t first_active_layer = 5;
+
+ std::vector<SpatialLayer> spatial_layers = GetSvcConfig(
+ kMinVp9SpatialLayerShortSideLength, kMinVp9SpatialLayerLongSideLength, 30,
+ first_active_layer, max_num_spatial_layers, 1, false);
+ EXPECT_EQ(spatial_layers.size(), 1u);
+ EXPECT_EQ(spatial_layers.back().width, kMinVp9SpatialLayerShortSideLength);
+}
+
+TEST(SvcConfig, EnforcesMinimalRequiredParity) {
+ const size_t max_num_spatial_layers = 3;
+ const size_t kOddSize = 1023;
+
+ std::vector<SpatialLayer> spatial_layers =
+ GetSvcConfig(kOddSize, kOddSize, 30,
+ /*first_active_layer=*/1, max_num_spatial_layers, 1, false);
+ // Since there are 2 layers total (1, 2), divisiblity by 2 is required.
+ EXPECT_EQ(spatial_layers.back().width, kOddSize - 1);
+ EXPECT_EQ(spatial_layers.back().width, kOddSize - 1);
+
+ spatial_layers =
+ GetSvcConfig(kOddSize, kOddSize, 30,
+ /*first_active_layer=*/0, max_num_spatial_layers, 1, false);
+ // Since there are 3 layers total (0, 1, 2), divisiblity by 4 is required.
+ EXPECT_EQ(spatial_layers.back().width, kOddSize - 3);
+ EXPECT_EQ(spatial_layers.back().width, kOddSize - 3);
+
+ spatial_layers =
+ GetSvcConfig(kOddSize, kOddSize, 30,
+ /*first_active_layer=*/2, max_num_spatial_layers, 1, false);
+ // Since there is only 1 layer active (2), divisiblity by 1 is required.
+ EXPECT_EQ(spatial_layers.back().width, kOddSize);
+ EXPECT_EQ(spatial_layers.back().width, kOddSize);
+}
+
+TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 1023;
+ codec.height = 1023;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T1);
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 4 required.
+ ElementsAre(Field(&SpatialLayer::width, 255),
+ Field(&SpatialLayer::width, 510),
+ Field(&SpatialLayer::width, 1020)));
+
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1);
+ spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 2 required.
+ ElementsAre(Field(&SpatialLayer::width, 511),
+ Field(&SpatialLayer::width, 1022)));
+
+ codec.SetScalabilityMode(ScalabilityMode::kL1T1);
+ spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 1 required.
+ ElementsAre(Field(&SpatialLayer::width, 1023)));
+}
+
+TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 1280;
+ codec.height = 1280;
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 3 required.
+ ElementsAre(Field(&SpatialLayer::width, 852),
+ Field(&SpatialLayer::width, 1278)));
+}
+
+TEST(SvcConfig, SkipsInactiveLayers) {
+ const size_t num_spatial_layers = 4;
+ const size_t first_active_layer = 2;
+
+ std::vector<SpatialLayer> spatial_layers = GetSvcConfig(
+ kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1),
+ kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30,
+ first_active_layer, num_spatial_layers, 1, false);
+ EXPECT_EQ(spatial_layers.size(), 2u);
+ EXPECT_EQ(spatial_layers.back().width,
+ kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1));
+}
+
+TEST(SvcConfig, BitrateThresholds) {
+ const size_t first_active_layer = 0;
+ const size_t num_spatial_layers = 3;
+ std::vector<SpatialLayer> spatial_layers = GetSvcConfig(
+ kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1),
+ kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30,
+ first_active_layer, num_spatial_layers, 1, false);
+
+ EXPECT_EQ(spatial_layers.size(), num_spatial_layers);
+
+ for (const SpatialLayer& layer : spatial_layers) {
+ EXPECT_LE(layer.minBitrate, layer.maxBitrate);
+ EXPECT_LE(layer.minBitrate, layer.targetBitrate);
+ EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
+ }
+}
+
+TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 960;
+ codec.height = 540;
+ codec.SetScalabilityMode(ScalabilityMode::kS3T3);
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+ Field(&SpatialLayer::height, 270),
+ Field(&SpatialLayer::height, 540)));
+
+ for (const SpatialLayer& layer : spatial_layers) {
+ EXPECT_LE(layer.minBitrate, layer.maxBitrate);
+ EXPECT_LE(layer.minBitrate, layer.targetBitrate);
+ EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
+ }
+}
+
+TEST(SvcConfig, ScreenSharing) {
+ std::vector<SpatialLayer> spatial_layers =
+ GetSvcConfig(1920, 1080, 30, 1, 3, 3, true);
+
+ EXPECT_EQ(spatial_layers.size(), 3UL);
+
+ for (size_t i = 0; i < 3; ++i) {
+ const SpatialLayer& layer = spatial_layers[i];
+ EXPECT_EQ(layer.width, 1920);
+ EXPECT_EQ(layer.height, 1080);
+ EXPECT_EQ(layer.maxFramerate, (i < 1) ? 5 : (i < 2 ? 10 : 30));
+ EXPECT_EQ(layer.numberOfTemporalLayers, 1);
+ EXPECT_LE(layer.minBitrate, layer.maxBitrate);
+ EXPECT_LE(layer.minBitrate, layer.targetBitrate);
+ EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
+ }
+}
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
new file mode 100644
index 0000000000..b6293a342e
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -0,0 +1,2446 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "absl/memory/memory.h"
+#include "api/test/create_frame_generator.h"
+#include "api/test/frame_generator_interface.h"
+#include "api/test/mock_video_encoder.h"
+#include "api/video/color_space.h"
+#include "api/video/i420_buffer.h"
+#include "api/video_codecs/video_encoder.h"
+#include "api/video_codecs/vp9_profile.h"
+#include "common_video/libyuv/include/webrtc_libyuv.h"
+#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/video_coding/codecs/interface/libvpx_interface.h"
+#include "modules/video_coding/codecs/interface/mock_libvpx_interface.h"
+#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h"
+#include "modules/video_coding/codecs/test/video_codec_unittest.h"
+#include "modules/video_coding/codecs/vp9/include/vp9.h"
+#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
+#include "modules/video_coding/codecs/vp9/svc_config.h"
+#include "rtc_base/strings/string_builder.h"
+#include "test/explicit_key_value_config.h"
+#include "test/field_trial.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+#include "test/mappable_native_buffer.h"
+#include "test/video_codec_settings.h"
+
+namespace webrtc {
+namespace {
+
+using ::testing::_;
+using ::testing::A;
+using ::testing::AllOf;
+using ::testing::An;
+using ::testing::AnyNumber;
+using ::testing::ByRef;
+using ::testing::DoAll;
+using ::testing::Each;
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Field;
+using ::testing::IsEmpty;
+using ::testing::Mock;
+using ::testing::NiceMock;
+using ::testing::Return;
+using ::testing::SafeMatcherCast;
+using ::testing::SaveArgPointee;
+using ::testing::SetArgPointee;
+using ::testing::SizeIs;
+using ::testing::TypedEq;
+using ::testing::UnorderedElementsAreArray;
+using ::testing::WithArg;
+using EncoderInfo = webrtc::VideoEncoder::EncoderInfo;
+using FramerateFractions =
+ absl::InlinedVector<uint8_t, webrtc::kMaxTemporalStreams>;
+
+constexpr size_t kWidth = 1280;
+constexpr size_t kHeight = 720;
+
+const VideoEncoder::Capabilities kCapabilities(false);
+const VideoEncoder::Settings kSettings(kCapabilities,
+ /*number_of_cores=*/1,
+ /*max_payload_size=*/0);
+
+VideoCodec DefaultCodecSettings() {
+ VideoCodec codec_settings;
+ webrtc::test::CodecSettings(kVideoCodecVP9, &codec_settings);
+ codec_settings.width = kWidth;
+ codec_settings.height = kHeight;
+ codec_settings.VP9()->numberOfTemporalLayers = 1;
+ codec_settings.VP9()->numberOfSpatialLayers = 1;
+ return codec_settings;
+}
+
+void ConfigureSvc(VideoCodec& codec_settings,
+ int num_spatial_layers,
+ int num_temporal_layers = 1) {
+ codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers;
+ codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers;
+ codec_settings.SetFrameDropEnabled(false);
+
+ std::vector<SpatialLayer> layers = GetSvcConfig(
+ codec_settings.width, codec_settings.height, codec_settings.maxFramerate,
+ /*first_active_layer=*/0, num_spatial_layers, num_temporal_layers, false);
+ for (size_t i = 0; i < layers.size(); ++i) {
+ codec_settings.spatialLayers[i] = layers[i];
+ }
+}
+
+} // namespace
+
+class TestVp9Impl : public VideoCodecUnitTest {
+ protected:
+ std::unique_ptr<VideoEncoder> CreateEncoder() override {
+ return VP9Encoder::Create();
+ }
+
+ std::unique_ptr<VideoDecoder> CreateDecoder() override {
+ return VP9Decoder::Create();
+ }
+
+ void ModifyCodecSettings(VideoCodec* codec_settings) override {
+ webrtc::test::CodecSettings(kVideoCodecVP9, codec_settings);
+ codec_settings->width = kWidth;
+ codec_settings->height = kHeight;
+ codec_settings->VP9()->numberOfTemporalLayers = 1;
+ codec_settings->VP9()->numberOfSpatialLayers = 1;
+ }
+};
+
+class TestVp9ImplForPixelFormat
+ : public TestVp9Impl,
+ public ::testing::WithParamInterface<
+ test::FrameGeneratorInterface::OutputType> {
+ protected:
+ void SetUp() override {
+ input_frame_generator_ = test::CreateSquareFrameGenerator(
+ kWidth, kHeight, GetParam(), absl::optional<int>());
+ TestVp9Impl::SetUp();
+ }
+};
+
+// Disabled on ios as flake, see https://crbug.com/webrtc/7057
+#if defined(WEBRTC_IOS)
+TEST_P(TestVp9ImplForPixelFormat, DISABLED_EncodeDecode) {
+#else
+TEST_P(TestVp9ImplForPixelFormat, EncodeDecode) {
+#endif
+ VideoFrame input_frame = NextInputFrame();
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+ // First frame should be a key frame.
+ encoded_frame._frameType = VideoFrameType::kVideoFrameKey;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
+ std::unique_ptr<VideoFrame> decoded_frame;
+ absl::optional<uint8_t> decoded_qp;
+ ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+ ASSERT_TRUE(decoded_frame);
+ EXPECT_GT(I420PSNR(&input_frame, decoded_frame.get()), 36);
+
+ const ColorSpace color_space = *decoded_frame->color_space();
+ EXPECT_EQ(ColorSpace::PrimaryID::kUnspecified, color_space.primaries());
+ EXPECT_EQ(ColorSpace::TransferID::kUnspecified, color_space.transfer());
+ EXPECT_EQ(ColorSpace::MatrixID::kUnspecified, color_space.matrix());
+ EXPECT_EQ(ColorSpace::RangeID::kLimited, color_space.range());
+ EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified,
+ color_space.chroma_siting_horizontal());
+ EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified,
+ color_space.chroma_siting_vertical());
+}
+
+TEST_P(TestVp9ImplForPixelFormat, EncodeNativeBuffer) {
+ VideoFrame input_frame = NextInputFrame();
+ // Replace the input frame with a fake native buffer of the same size and
+ // underlying pixel format. Do not allow ToI420() for non-I420 buffers,
+ // ensuring zero-conversion.
+ input_frame = test::CreateMappableNativeFrame(
+ input_frame.ntp_time_ms(), input_frame.video_frame_buffer()->type(),
+ input_frame.width(), input_frame.height());
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+
+ // After encoding, we would expect a single mapping to have happened.
+ rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer =
+ test::GetMappableNativeBufferFromVideoFrame(input_frame);
+ std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers =
+ mappable_buffer->GetMappedFramedBuffers();
+ ASSERT_EQ(mapped_buffers.size(), 1u);
+ EXPECT_EQ(mapped_buffers[0]->type(), mappable_buffer->mappable_type());
+ EXPECT_EQ(mapped_buffers[0]->width(), input_frame.width());
+ EXPECT_EQ(mapped_buffers[0]->height(), input_frame.height());
+ EXPECT_FALSE(mappable_buffer->DidConvertToI420());
+}
+
+TEST_P(TestVp9ImplForPixelFormat, DecodedColorSpaceFromBitstream) {
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+
+ // Encoded frame without explicit color space information.
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
+ std::unique_ptr<VideoFrame> decoded_frame;
+ absl::optional<uint8_t> decoded_qp;
+ ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+ ASSERT_TRUE(decoded_frame);
+ // Color space present from encoded bitstream.
+ ASSERT_TRUE(decoded_frame->color_space());
+ // No HDR metadata present.
+ EXPECT_FALSE(decoded_frame->color_space()->hdr_metadata());
+}
+
+TEST_P(TestVp9ImplForPixelFormat, DecodedQpEqualsEncodedQp) {
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+ // First frame should be a key frame.
+ encoded_frame._frameType = VideoFrameType::kVideoFrameKey;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
+ std::unique_ptr<VideoFrame> decoded_frame;
+ absl::optional<uint8_t> decoded_qp;
+ ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+ ASSERT_TRUE(decoded_frame);
+ ASSERT_TRUE(decoded_qp);
+ EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
+}
+
+TEST_F(TestVp9Impl, SwitchInputPixelFormatsWithoutReconfigure) {
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+
+ // Change the input frame type from I420 to NV12, encoding should still work.
+ input_frame_generator_ = test::CreateSquareFrameGenerator(
+ kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12,
+ absl::optional<int>());
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+
+ // Flipping back to I420, encoding should still work.
+ input_frame_generator_ = test::CreateSquareFrameGenerator(
+ kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420,
+ absl::optional<int>());
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+}
+
+TEST(Vp9ImplTest, ParserQpEqualsEncodedQp) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ encoder->InitEncode(&codec_settings, kSettings);
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(1)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+ ASSERT_THAT(frames, SizeIs(1));
+ const auto& encoded_frame = frames.front().encoded_image;
+ int qp = 0;
+ ASSERT_TRUE(vp9::GetQp(encoded_frame.data(), encoded_frame.size(), &qp));
+ EXPECT_EQ(encoded_frame.qp_, qp);
+}
+
+TEST(Vp9ImplTest, EncodeAttachesTemplateStructureWithSvcController) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(2)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+
+ ASSERT_THAT(frames, SizeIs(2));
+ EXPECT_TRUE(frames[0].codec_specific_info.template_structure);
+ EXPECT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+
+ EXPECT_FALSE(frames[1].codec_specific_info.template_structure);
+ EXPECT_TRUE(frames[1].codec_specific_info.generic_frame_info);
+}
+
+TEST(Vp9ImplTest, EncoderWith2TemporalLayers) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ codec_settings.VP9()->numberOfTemporalLayers = 2;
+ // Tl0PidIdx is only used in non-flexible mode.
+ codec_settings.VP9()->flexibleMode = false;
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(4)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+
+ ASSERT_THAT(frames, SizeIs(4));
+ EXPECT_EQ(frames[0].codec_specific_info.codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(frames[1].codec_specific_info.codecSpecific.VP9.temporal_idx, 1);
+ EXPECT_EQ(frames[2].codec_specific_info.codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(frames[3].codec_specific_info.codecSpecific.VP9.temporal_idx, 1);
+}
+
+TEST(Vp9ImplTest, EncodeTemporalLayersWithSvcController) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ codec_settings.VP9()->numberOfTemporalLayers = 2;
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(4)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+
+ ASSERT_THAT(frames, SizeIs(4));
+ EXPECT_EQ(frames[0].codec_specific_info.codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(frames[1].codec_specific_info.codecSpecific.VP9.temporal_idx, 1);
+ EXPECT_EQ(frames[2].codec_specific_info.codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(frames[3].codec_specific_info.codecSpecific.VP9.temporal_idx, 1);
+ // Verify codec agnostic part
+ ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+ ASSERT_TRUE(frames[1].codec_specific_info.generic_frame_info);
+ ASSERT_TRUE(frames[2].codec_specific_info.generic_frame_info);
+ ASSERT_TRUE(frames[3].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->temporal_id, 0);
+ EXPECT_EQ(frames[1].codec_specific_info.generic_frame_info->temporal_id, 1);
+ EXPECT_EQ(frames[2].codec_specific_info.generic_frame_info->temporal_id, 0);
+ EXPECT_EQ(frames[3].codec_specific_info.generic_frame_info->temporal_id, 1);
+}
+
+TEST(Vp9ImplTest, EncoderWith2SpatialLayers) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ codec_settings.VP9()->numberOfSpatialLayers = 2;
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(1)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+
+ ASSERT_THAT(frames, SizeIs(2));
+ EXPECT_EQ(frames[0].encoded_image.SpatialIndex(), 0);
+ EXPECT_EQ(frames[1].encoded_image.SpatialIndex(), 1);
+}
+
+TEST(Vp9ImplTest, EncodeSpatialLayersWithSvcController) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ codec_settings.VP9()->numberOfSpatialLayers = 2;
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(2)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+
+ ASSERT_THAT(frames, SizeIs(4));
+ EXPECT_EQ(frames[0].encoded_image.SpatialIndex(), 0);
+ EXPECT_EQ(frames[1].encoded_image.SpatialIndex(), 1);
+ EXPECT_EQ(frames[2].encoded_image.SpatialIndex(), 0);
+ EXPECT_EQ(frames[3].encoded_image.SpatialIndex(), 1);
+ // Verify codec agnostic part
+ ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+ ASSERT_TRUE(frames[1].codec_specific_info.generic_frame_info);
+ ASSERT_TRUE(frames[2].codec_specific_info.generic_frame_info);
+ ASSERT_TRUE(frames[3].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 0);
+ EXPECT_EQ(frames[1].codec_specific_info.generic_frame_info->spatial_id, 1);
+ EXPECT_EQ(frames[2].codec_specific_info.generic_frame_info->spatial_id, 0);
+ EXPECT_EQ(frames[3].codec_specific_info.generic_frame_info->spatial_id, 1);
+}
+
+TEST_F(TestVp9Impl, EncoderExplicitLayering) {
+ // Override default settings.
+ codec_settings_.VP9()->numberOfTemporalLayers = 1;
+ codec_settings_.VP9()->numberOfSpatialLayers = 2;
+
+ codec_settings_.width = 960;
+ codec_settings_.height = 540;
+ codec_settings_.spatialLayers[0].minBitrate = 200;
+ codec_settings_.spatialLayers[0].maxBitrate = 500;
+ codec_settings_.spatialLayers[0].targetBitrate =
+ (codec_settings_.spatialLayers[0].minBitrate +
+ codec_settings_.spatialLayers[0].maxBitrate) /
+ 2;
+ codec_settings_.spatialLayers[0].active = true;
+
+ codec_settings_.spatialLayers[1].minBitrate = 400;
+ codec_settings_.spatialLayers[1].maxBitrate = 1500;
+ codec_settings_.spatialLayers[1].targetBitrate =
+ (codec_settings_.spatialLayers[1].minBitrate +
+ codec_settings_.spatialLayers[1].maxBitrate) /
+ 2;
+ codec_settings_.spatialLayers[1].active = true;
+
+ codec_settings_.spatialLayers[0].width = codec_settings_.width / 2;
+ codec_settings_.spatialLayers[0].height = codec_settings_.height / 2;
+ codec_settings_.spatialLayers[0].maxFramerate = codec_settings_.maxFramerate;
+ codec_settings_.spatialLayers[1].width = codec_settings_.width;
+ codec_settings_.spatialLayers[1].height = codec_settings_.height;
+ codec_settings_.spatialLayers[1].maxFramerate = codec_settings_.maxFramerate;
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // Ensure it fails if scaling factors in horz/vert dimentions are different.
+ codec_settings_.spatialLayers[0].width = codec_settings_.width;
+ codec_settings_.spatialLayers[0].height = codec_settings_.height / 2;
+ codec_settings_.spatialLayers[1].width = codec_settings_.width;
+ codec_settings_.spatialLayers[1].height = codec_settings_.height;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_PARAMETER,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // Ensure it fails if scaling factor is not power of two.
+ codec_settings_.spatialLayers[0].width = codec_settings_.width / 3;
+ codec_settings_.spatialLayers[0].height = codec_settings_.height / 3;
+ codec_settings_.spatialLayers[1].width = codec_settings_.width;
+ codec_settings_.spatialLayers[1].height = codec_settings_.height;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_PARAMETER,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+}
+
+TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
+ // Configure encoder to produce N spatial layers. Encode frames of layer 0
+ // then enable layer 1 and encode more frames and so on until layer N-1.
+ // Then disable layers one by one in the same way.
+ // Note: bit rate allocation is high to avoid frame dropping due to rate
+ // control, the encoder should always produce a frame. A dropped
+ // frame indicates a problem and the test will fail.
+ const size_t num_spatial_layers = 3;
+ const size_t num_frames_to_encode = 5;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.SetFrameDropEnabled(true);
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(sl_idx + 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ }
+ }
+
+ for (size_t i = 0; i < num_spatial_layers - 1; ++i) {
+ const size_t sl_idx = num_spatial_layers - i - 1;
+ bitrate_allocation.SetBitrate(sl_idx, 0, 0);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(sl_idx);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ }
+ }
+}
+
+TEST(Vp9ImplTest, EnableDisableSpatialLayersWithSvcController) {
+ const int num_spatial_layers = 3;
+ // Configure encoder to produce 3 spatial layers. Encode frames of layer 0
+ // then enable layer 1 and encode more frames and so on.
+ // Then disable layers one by one in the same way.
+ // Note: bit rate allocation is high to avoid frame dropping due to rate
+ // control, the encoder should always produce a frame. A dropped
+ // frame indicates a problem and the test will fail.
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ ConfigureSvc(codec_settings, num_spatial_layers);
+ codec_settings.SetFrameDropEnabled(true);
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ EncodedVideoFrameProducer producer(*encoder);
+ producer.SetResolution({kWidth, kHeight});
+
+ // Encode a key frame to validate all other frames are delta frames.
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ producer.SetNumInputFrames(1).Encode();
+ ASSERT_THAT(frames, Not(IsEmpty()));
+ EXPECT_TRUE(frames[0].codec_specific_info.template_structure);
+
+ const size_t num_frames_to_encode = 5;
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ // With (sl_idx+1) spatial layers expect (sl_idx+1) frames per input frame.
+ ASSERT_THAT(frames, SizeIs(num_frames_to_encode * (sl_idx + 1)));
+ for (size_t i = 0; i < frames.size(); ++i) {
+ EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info);
+ EXPECT_FALSE(frames[i].codec_specific_info.template_structure);
+ }
+ }
+
+ for (int sl_idx = num_spatial_layers - 1; sl_idx > 0; --sl_idx) {
+ bitrate_allocation.SetBitrate(sl_idx, 0, 0);
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ // With `sl_idx` spatial layer disabled, there are `sl_idx` spatial layers
+ // left.
+ ASSERT_THAT(frames, SizeIs(num_frames_to_encode * sl_idx));
+ for (size_t i = 0; i < frames.size(); ++i) {
+ EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info);
+ EXPECT_FALSE(frames[i].codec_specific_info.template_structure);
+ }
+ }
+}
+
+MATCHER_P2(GenericLayerIs, spatial_id, temporal_id, "") {
+ if (arg.codec_specific_info.generic_frame_info == absl::nullopt) {
+ *result_listener << " miss generic_frame_info";
+ return false;
+ }
+ const auto& layer = *arg.codec_specific_info.generic_frame_info;
+ if (layer.spatial_id != spatial_id || layer.temporal_id != temporal_id) {
+ *result_listener << " frame from layer (" << layer.spatial_id << ", "
+ << layer.temporal_id << ")";
+ return false;
+ }
+ return true;
+}
+
+TEST(Vp9ImplTest, SpatialUpswitchNotAtGOFBoundary) {
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ ConfigureSvc(codec_settings, /*num_spatial_layers=*/3,
+ /*num_temporal_layers=*/3);
+ codec_settings.SetFrameDropEnabled(true);
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ EncodedVideoFrameProducer producer(*encoder);
+ producer.SetResolution({kWidth, kHeight});
+
+ // Disable all but spatial_layer = 0;
+ VideoBitrateAllocation bitrate_allocation;
+ int layer_bitrate_bps = codec_settings.spatialLayers[0].targetBitrate * 1000;
+ bitrate_allocation.SetBitrate(0, 0, layer_bitrate_bps);
+ bitrate_allocation.SetBitrate(0, 1, layer_bitrate_bps);
+ bitrate_allocation.SetBitrate(0, 2, layer_bitrate_bps);
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+ EXPECT_THAT(producer.SetNumInputFrames(3).Encode(),
+ ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(0, 2),
+ GenericLayerIs(0, 1)));
+
+ // Upswitch to spatial_layer = 1
+ layer_bitrate_bps = codec_settings.spatialLayers[1].targetBitrate * 1000;
+ bitrate_allocation.SetBitrate(1, 0, layer_bitrate_bps);
+ bitrate_allocation.SetBitrate(1, 1, layer_bitrate_bps);
+ bitrate_allocation.SetBitrate(1, 2, layer_bitrate_bps);
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+ // Expect upswitch doesn't happen immediately since there is no S1 frame that
+ // S1T2 frame can reference.
+ EXPECT_THAT(producer.SetNumInputFrames(1).Encode(),
+ ElementsAre(GenericLayerIs(0, 2)));
+ // Expect spatial upswitch happens now, at T0 frame.
+ EXPECT_THAT(producer.SetNumInputFrames(1).Encode(),
+ ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(1, 0)));
+}
+// TODO(bugs.webrtc.org/13442) Enable once a forward fix has landed in WebRTC.
+TEST_F(TestVp9Impl, DISABLED_DisableEnableBaseLayerTriggersKeyFrame) {
+ // Configure encoder to produce N spatial layers. Encode frames for all
+ // layers. Then disable all but the last layer. Then reenable all back again.
+ test::ScopedFieldTrials override_field_trials(
+ "WebRTC-Vp9ExternalRefCtrl/Enabled/");
+ const size_t num_spatial_layers = 3;
+ const size_t num_temporal_layers = 3;
+ // Must not be multiple of temporal period to exercise all code paths.
+ const size_t num_frames_to_encode = 5;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->flexibleMode = false;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic;
+ codec_settings_.mode = VideoCodecMode::kRealtimeVideo;
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, tl_idx,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ }
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ }
+
+ // Disable all but top layer.
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0);
+ }
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ bool seen_ss_data = false;
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ // SS available immediatly after switching on base temporal layer.
+ if (seen_ss_data) {
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ false);
+ } else {
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ codec_specific_info[0].codecSpecific.VP9.temporal_idx == 0);
+ seen_ss_data |=
+ codec_specific_info[0].codecSpecific.VP9.ss_data_available;
+ }
+ // No key-frames generated for disabling layers.
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+ }
+ EXPECT_TRUE(seen_ss_data);
+
+ // Force key-frame.
+ std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey};
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), &frame_types));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ // Key-frame should be produced.
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey);
+ EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+
+ // Encode some more frames.
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+ }
+
+ // Enable the second layer back.
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(
+ 1, tl_idx, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(encoded_frame.size(), 2u);
+ // SS available immediatly after switching on.
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ // Keyframe should be generated when enabling lower layers.
+ const VideoFrameType expected_type = frame_num == 0
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+ EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+ EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1);
+ EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2);
+ }
+
+ // Enable the first layer back.
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(
+ 0, tl_idx, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(encoded_frame.size(), 3u);
+ // SS available immediatly after switching on.
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ // Keyframe should be generated when enabling lower layers.
+ const VideoFrameType expected_type = frame_num == 0
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+ EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+ }
+}
+// TODO(bugs.webrtc.org/13442) Enable once a forward fix has landed in WebRTC.
+TEST(Vp9ImplTest,
+ DISABLED_DisableEnableBaseLayerWithSvcControllerTriggersKeyFrame) {
+ // Configure encoder to produce N spatial layers. Encode frames for all
+ // layers. Then disable all but the last layer. Then reenable all back again.
+ const size_t num_spatial_layers = 3;
+ const size_t num_temporal_layers = 3;
+ // Must not be multiple of temporal period to exercise all code paths.
+ const size_t num_frames_to_encode = 5;
+
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ ConfigureSvc(codec_settings, num_spatial_layers, num_temporal_layers);
+ codec_settings.SetFrameDropEnabled(false);
+ codec_settings.VP9()->flexibleMode = false;
+ codec_settings.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic;
+ codec_settings.mode = VideoCodecMode::kRealtimeVideo;
+
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, tl_idx,
+ codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ }
+ }
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ EncodedVideoFrameProducer producer(*encoder);
+ producer.SetResolution({kWidth, kHeight});
+
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ ASSERT_THAT(frames, SizeIs(num_frames_to_encode * num_spatial_layers));
+
+ // Disable all but top spatial layer.
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0);
+ }
+ }
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ EXPECT_THAT(frames, SizeIs(num_frames_to_encode));
+ for (const auto& frame : frames) {
+ // Expect no key-frames generated.
+ EXPECT_FALSE(frame.codec_specific_info.template_structure);
+ ASSERT_TRUE(frame.codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frame.codec_specific_info.generic_frame_info->spatial_id, 2);
+ }
+
+ frames = producer.ForceKeyFrame().SetNumInputFrames(1).Encode();
+ ASSERT_THAT(frames, SizeIs(1));
+ // Key-frame should be produced.
+ EXPECT_EQ(frames[0].encoded_image._frameType, VideoFrameType::kVideoFrameKey);
+ ASSERT_TRUE(frames[0].codec_specific_info.template_structure);
+ ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 2);
+
+ frames = producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ ASSERT_THAT(frames, SizeIs(num_frames_to_encode));
+ for (const auto& frame : frames) {
+ EXPECT_EQ(frame.encoded_image._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_FALSE(frame.codec_specific_info.template_structure);
+ ASSERT_TRUE(frame.codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frame.codec_specific_info.generic_frame_info->spatial_id, 2);
+ }
+
+ // Enable the second layer back.
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(
+ 1, tl_idx, codec_settings.spatialLayers[0].targetBitrate * 1000 * 2);
+ }
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ ASSERT_THAT(frames, SizeIs(num_frames_to_encode * 2));
+ EXPECT_EQ(frames[0].encoded_image._frameType, VideoFrameType::kVideoFrameKey);
+ EXPECT_TRUE(frames[0].codec_specific_info.template_structure);
+ ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 1);
+ for (size_t i = 1; i < frames.size(); ++i) {
+ EXPECT_EQ(frames[i].encoded_image._frameType,
+ VideoFrameType::kVideoFrameDelta);
+ EXPECT_FALSE(frames[i].codec_specific_info.template_structure);
+ ASSERT_TRUE(frames[i].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[i].codec_specific_info.generic_frame_info->spatial_id,
+ 1 + static_cast<int>(i % 2));
+ }
+
+ // Enable the first layer back.
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(
+ 0, tl_idx, codec_settings.spatialLayers[1].targetBitrate * 1000 * 2);
+ }
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(num_frames_to_encode).Encode();
+ ASSERT_THAT(frames, SizeIs(num_frames_to_encode * 3));
+ EXPECT_TRUE(frames[0].codec_specific_info.template_structure);
+ ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 0);
+ for (size_t i = 1; i < frames.size(); ++i) {
+ EXPECT_FALSE(frames[i].codec_specific_info.template_structure);
+ ASSERT_TRUE(frames[i].codec_specific_info.generic_frame_info);
+ EXPECT_EQ(frames[i].codec_specific_info.generic_frame_info->spatial_id,
+ static_cast<int>(i % 3));
+ }
+}
+
+TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrameForScreenshare) {
+ // Configure encoder to produce N spatial layers. Encode frames for all
+ // layers. Then disable all but the last layer. Then reenable all back again.
+ const size_t num_spatial_layers = 3;
+ const size_t num_frames_to_encode = 5;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.mode = VideoCodecMode::kScreensharing;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ codec_settings_.VP9()->flexibleMode = true;
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ }
+
+ // Disable all but top layer.
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
+ bitrate_allocation.SetBitrate(sl_idx, 0, 0);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ // SS available immediatly after switching off.
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ // No key-frames generated for disabling layers.
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+ }
+
+ // Force key-frame.
+ std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey};
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), &frame_types));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ // Key-frame should be produced.
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey);
+
+ // Enable the second layer back.
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(encoded_frame.size(), 2u);
+ // SS available immediatly after switching on.
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ // Keyframe should be generated when enabling lower layers.
+ const VideoFrameType expected_type = frame_num == 0
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+ EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+ EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1);
+ EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2);
+ }
+
+ // Enable the first layer back.
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(encoded_frame.size(), 3u);
+ // SS available immediatly after switching on.
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+ frame_num == 0);
+ // Keyframe should be generated when enabling lower layers.
+ const VideoFrameType expected_type = frame_num == 0
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+ EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+ }
+}
+
+TEST_F(TestVp9Impl, EndOfPicture) {
+ const size_t num_spatial_layers = 2;
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // Encode both base and upper layers. Check that end-of-superframe flag is
+ // set on upper layer frame but not on base layer frame.
+ VideoBitrateAllocation bitrate_allocation;
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000);
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+
+ std::vector<EncodedImage> frames;
+ std::vector<CodecSpecificInfo> codec_specific;
+ ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+ EXPECT_FALSE(codec_specific[0].end_of_picture);
+ EXPECT_TRUE(codec_specific[1].end_of_picture);
+
+ // Encode only base layer. Check that end-of-superframe flag is
+ // set on base layer frame.
+ bitrate_allocation.SetBitrate(1, 0, 0);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+
+ ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+ EXPECT_FALSE(frames[0].SpatialIndex());
+ EXPECT_TRUE(codec_specific[0].end_of_picture);
+}
+
+TEST_F(TestVp9Impl, InterLayerPred) {
+ const size_t num_spatial_layers = 2;
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t i = 0; i < num_spatial_layers; ++i) {
+ bitrate_allocation.SetBitrate(
+ i, 0, codec_settings_.spatialLayers[i].targetBitrate * 1000);
+ }
+
+ const std::vector<InterLayerPredMode> inter_layer_pred_modes = {
+ InterLayerPredMode::kOff, InterLayerPredMode::kOn,
+ InterLayerPredMode::kOnKeyPic};
+
+ for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) {
+ codec_settings_.VP9()->interLayerPred = inter_layer_pred;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+
+ std::vector<EncodedImage> frames;
+ std::vector<CodecSpecificInfo> codec_specific;
+ ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+
+ // Key frame.
+ ASSERT_EQ(frames[0].SpatialIndex(), 0);
+ ASSERT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
+ EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted);
+ EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
+ inter_layer_pred == InterLayerPredMode::kOff);
+ EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.ss_data_available);
+
+ ASSERT_EQ(frames[1].SpatialIndex(), 1);
+ ASSERT_FALSE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted);
+ EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted,
+ inter_layer_pred == InterLayerPredMode::kOn ||
+ inter_layer_pred == InterLayerPredMode::kOnKeyPic);
+ EXPECT_EQ(codec_specific[1].codecSpecific.VP9.ss_data_available,
+ inter_layer_pred == InterLayerPredMode::kOff);
+ EXPECT_TRUE(
+ codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
+
+ // Delta frame.
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+
+ ASSERT_EQ(frames[0].SpatialIndex(), 0);
+ ASSERT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
+ EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted);
+ EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
+ inter_layer_pred != InterLayerPredMode::kOn);
+ EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.ss_data_available);
+
+ ASSERT_EQ(frames[1].SpatialIndex(), 1);
+ ASSERT_TRUE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted);
+ EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted,
+ inter_layer_pred == InterLayerPredMode::kOn);
+ EXPECT_TRUE(
+ codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
+ EXPECT_FALSE(codec_specific[1].codecSpecific.VP9.ss_data_available);
+ }
+}
+
+TEST_F(TestVp9Impl,
+ EnablingUpperLayerTriggersKeyFrameIfInterLayerPredIsDisabled) {
+ const size_t num_spatial_layers = 3;
+ const size_t num_frames_to_encode = 2;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+
+ const std::vector<InterLayerPredMode> inter_layer_pred_modes = {
+ InterLayerPredMode::kOff, InterLayerPredMode::kOn,
+ InterLayerPredMode::kOnKeyPic};
+
+ for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) {
+ codec_settings_.VP9()->interLayerPred = inter_layer_pred;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(sl_idx + 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+
+ const bool is_first_upper_layer_frame = (sl_idx > 0 && frame_num == 0);
+ if (is_first_upper_layer_frame) {
+ if (inter_layer_pred == InterLayerPredMode::kOn) {
+ EXPECT_EQ(encoded_frame[0]._frameType,
+ VideoFrameType::kVideoFrameDelta);
+ } else {
+ EXPECT_EQ(encoded_frame[0]._frameType,
+ VideoFrameType::kVideoFrameKey);
+ }
+ } else if (sl_idx == 0 && frame_num == 0) {
+ EXPECT_EQ(encoded_frame[0]._frameType,
+ VideoFrameType::kVideoFrameKey);
+ } else {
+ for (size_t i = 0; i <= sl_idx; ++i) {
+ EXPECT_EQ(encoded_frame[i]._frameType,
+ VideoFrameType::kVideoFrameDelta);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST_F(TestVp9Impl,
+ EnablingUpperLayerUnsetsInterPicPredictedInInterlayerPredModeOn) {
+ const size_t num_spatial_layers = 3;
+ const size_t num_frames_to_encode = 2;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->flexibleMode = false;
+
+ const std::vector<InterLayerPredMode> inter_layer_pred_modes = {
+ InterLayerPredMode::kOff, InterLayerPredMode::kOn,
+ InterLayerPredMode::kOnKeyPic};
+
+ for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) {
+ codec_settings_.VP9()->interLayerPred = inter_layer_pred;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(sl_idx + 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+
+ ASSERT_EQ(codec_specific_info.size(), sl_idx + 1);
+
+ for (size_t i = 0; i <= sl_idx; ++i) {
+ const bool is_keyframe =
+ encoded_frame[0]._frameType == VideoFrameType::kVideoFrameKey;
+ const bool is_first_upper_layer_frame =
+ (i == sl_idx && frame_num == 0);
+ // Interframe references are there, unless it's a keyframe,
+ // or it's a first activated frame in a upper layer
+ const bool expect_no_references =
+ is_keyframe || (is_first_upper_layer_frame &&
+ inter_layer_pred == InterLayerPredMode::kOn);
+ EXPECT_EQ(
+ codec_specific_info[i].codecSpecific.VP9.inter_pic_predicted,
+ !expect_no_references);
+ }
+ }
+ }
+ }
+}
+
+TEST_F(TestVp9Impl, EnablingDisablingUpperLayerInTheSameGof) {
+ const size_t num_spatial_layers = 2;
+ const size_t num_temporal_layers = 2;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->flexibleMode = false;
+
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+
+ // Enable both spatial and both temporal layers.
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+
+ // Encode 3 frames.
+ for (int i = 0; i < 3; ++i) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ }
+
+ // Disable SL1 layer.
+ bitrate_allocation.SetBitrate(1, 0, 0);
+ bitrate_allocation.SetBitrate(1, 1, 0);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Encode 1 frame.
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 1u);
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true);
+
+ // Enable SL1 layer.
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Encode 1 frame.
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true);
+ EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted, true);
+}
+
+TEST_F(TestVp9Impl, EnablingDisablingUpperLayerAccrossGof) {
+ const size_t num_spatial_layers = 2;
+ const size_t num_temporal_layers = 2;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->flexibleMode = false;
+
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+
+ // Enable both spatial and both temporal layers.
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ std::vector<EncodedImage> encoded_frame;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+
+ // Encode 3 frames.
+ for (int i = 0; i < 3; ++i) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ }
+
+ // Disable SL1 layer.
+ bitrate_allocation.SetBitrate(1, 0, 0);
+ bitrate_allocation.SetBitrate(1, 1, 0);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Encode 11 frames. More than Gof length 2, and odd to end at TL1 frame.
+ for (int i = 0; i < 11; ++i) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 1u);
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1 - i % 2);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted,
+ true);
+ }
+
+ // Enable SL1 layer.
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ bitrate_allocation.SetBitrate(
+ 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Encode 1 frame.
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+ ASSERT_EQ(codec_specific_info.size(), 2u);
+ EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true);
+ EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted,
+ false);
+}
+
+TEST_F(TestVp9Impl, EnablingNewLayerInScreenshareForcesAllLayersWithSS) {
+ const size_t num_spatial_layers = 3;
+ // Chosen by hand, the 2nd frame is dropped with configured per-layer max
+ // framerate.
+ const size_t num_frames_to_encode_before_drop = 1;
+
+ codec_settings_.maxFramerate = 30;
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.spatialLayers[0].maxFramerate = 5.0;
+ // use 30 for the SL 1 instead of 10, so even if SL 0 frame is dropped due to
+ // framerate capping we would still get back at least a middle layer. It
+ // simplifies the test.
+ codec_settings_.spatialLayers[1].maxFramerate = 30.0;
+ codec_settings_.spatialLayers[2].maxFramerate = 30.0;
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.mode = VideoCodecMode::kScreensharing;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ codec_settings_.VP9()->flexibleMode = true;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // Enable all but the last layer.
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Encode enough frames to force drop due to framerate capping.
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ }
+
+ // Enable the last layer.
+ bitrate_allocation.SetBitrate(
+ num_spatial_layers - 1, 0,
+ codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate *
+ 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // All layers are encoded, even though frame dropping should happen.
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ // Now all 3 layers should be encoded.
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ EXPECT_EQ(encoded_frames.size(), 3u);
+ // Scalability structure has to be triggered.
+ EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
+}
+
+TEST_F(TestVp9Impl, ScreenshareFrameDropping) {
+ const int num_spatial_layers = 3;
+ const int num_frames_to_detect_drops = 2;
+
+ codec_settings_.maxFramerate = 30;
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ // use 30 for the SL0 and SL1 because it simplifies the test.
+ codec_settings_.spatialLayers[0].maxFramerate = 30.0;
+ codec_settings_.spatialLayers[1].maxFramerate = 30.0;
+ codec_settings_.spatialLayers[2].maxFramerate = 30.0;
+ codec_settings_.SetFrameDropEnabled(true);
+ codec_settings_.mode = VideoCodecMode::kScreensharing;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ codec_settings_.VP9()->flexibleMode = true;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // Enable all but the last layer.
+ VideoBitrateAllocation bitrate_allocation;
+ // Very low bitrate for the lowest spatial layer to ensure rate-control drops.
+ bitrate_allocation.SetBitrate(0, 0, 1000);
+ bitrate_allocation.SetBitrate(
+ 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000);
+ // Disable highest layer.
+ bitrate_allocation.SetBitrate(2, 0, 0);
+
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ bool frame_dropped = false;
+ // Encode enough frames to force drop due to rate-control.
+ for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ EXPECT_LE(encoded_frames.size(), 2u);
+ EXPECT_GE(encoded_frames.size(), 1u);
+ if (encoded_frames.size() == 1) {
+ frame_dropped = true;
+ // Dropped frame is on the SL0.
+ EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1);
+ }
+ }
+ EXPECT_TRUE(frame_dropped);
+
+ // Enable the last layer.
+ bitrate_allocation.SetBitrate(
+ 2, 0, codec_settings_.spatialLayers[2].targetBitrate * 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ // No drop allowed.
+ EXPECT_EQ(encoded_frames.size(), 3u);
+
+ // Verify that frame-dropping is re-enabled back.
+ frame_dropped = false;
+ // Encode enough frames to force drop due to rate-control.
+ for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ EXPECT_LE(encoded_frames.size(), 3u);
+ EXPECT_GE(encoded_frames.size(), 2u);
+ if (encoded_frames.size() == 2) {
+ frame_dropped = true;
+ // Dropped frame is on the SL0.
+ EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1);
+ EXPECT_EQ(encoded_frames[1].SpatialIndex(), 2);
+ }
+ }
+ EXPECT_TRUE(frame_dropped);
+}
+
+TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) {
+ const size_t num_spatial_layers = 3;
+ // Chosen by hand, the 2nd frame is dropped with configured per-layer max
+ // framerate.
+ const size_t num_frames_to_encode_before_drop = 1;
+ // Chosen by hand, exactly 5 frames are dropped for input fps=30 and max
+ // framerate = 5.
+ const size_t num_dropped_frames = 5;
+
+ codec_settings_.maxFramerate = 30;
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+ codec_settings_.spatialLayers[0].maxFramerate = 5.0;
+ // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
+ // framerate capping we would still get back at least a middle layer. It
+ // simplifies the test.
+ codec_settings_.spatialLayers[1].maxFramerate = 30.0;
+ codec_settings_.spatialLayers[2].maxFramerate = 30.0;
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.mode = VideoCodecMode::kScreensharing;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ codec_settings_.VP9()->flexibleMode = true;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // All layers are enabled from the start.
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Encode enough frames to force drop due to framerate capping.
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop;
+ ++frame_num) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ }
+
+ // Now the first layer should not have frames in it.
+ for (size_t frame_num = 0; frame_num < num_dropped_frames - 2; ++frame_num) {
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ // First layer is dropped due to frame rate cap. The last layer should not
+ // be enabled yet.
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ // First layer is skipped.
+ EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1);
+ }
+
+ // Disable the last layer.
+ bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Still expected to drop first layer. Last layer has to be disable also.
+ for (size_t frame_num = num_dropped_frames - 2;
+ frame_num < num_dropped_frames; ++frame_num) {
+ // Expect back one frame.
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ // First layer is dropped due to frame rate cap. The last layer should not
+ // be enabled yet.
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ // First layer is skipped.
+ EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1);
+ // No SS data on non-base spatial layer.
+ EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
+ }
+
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ // First layer is not skipped now.
+ EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 0);
+ // SS data should be present.
+ EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
+}
+
+TEST_F(TestVp9Impl, DisableNewLayerInVideoDelaysSsInfoTillTL0) {
+ const size_t num_spatial_layers = 3;
+ const size_t num_temporal_layers = 2;
+ // Chosen by hand, the 2nd frame is dropped with configured per-layer max
+ // framerate.
+ ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.mode = VideoCodecMode::kRealtimeVideo;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic;
+ codec_settings_.VP9()->flexibleMode = false;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // Enable all the layers.
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, tl_idx,
+ codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 /
+ num_temporal_layers);
+ }
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific_info;
+
+ // Encode one TL0 frame
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u);
+
+ // Disable the last layer.
+ for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+ bitrate_allocation.SetBitrate(num_spatial_layers - 1, tl_idx, 0);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ // Next is TL1 frame. The last layer is disabled immediately, but SS structure
+ // is not provided here.
+ SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u);
+ EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
+
+ // Next is TL0 frame, which should have delayed SS structure.
+ SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u);
+ EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
+ EXPECT_TRUE(codec_specific_info[0]
+ .codecSpecific.VP9.spatial_layer_resolution_present);
+ EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.num_spatial_layers,
+ num_spatial_layers - 1);
+}
+
+TEST_F(TestVp9Impl,
+ LowLayerMarkedAsRefIfHighLayerNotEncodedAndInterLayerPredIsEnabled) {
+ ConfigureSvc(codec_settings_, 3);
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ bitrate_allocation.SetBitrate(
+ 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_info));
+ EXPECT_TRUE(codec_info.codecSpecific.VP9.ss_data_available);
+ EXPECT_FALSE(codec_info.codecSpecific.VP9.non_ref_for_inter_layer_pred);
+}
+
+TEST_F(TestVp9Impl, ScalabilityStructureIsAvailableInFlexibleMode) {
+ codec_settings_.VP9()->flexibleMode = true;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+ EXPECT_TRUE(codec_specific_info.codecSpecific.VP9.ss_data_available);
+}
+
+TEST_F(TestVp9Impl, Profile0PreferredPixelFormats) {
+ EXPECT_THAT(encoder_->GetEncoderInfo().preferred_pixel_formats,
+ testing::UnorderedElementsAre(VideoFrameBuffer::Type::kNV12,
+ VideoFrameBuffer::Type::kI420));
+}
+
+TEST_F(TestVp9Impl, EncoderInfoWithoutResolutionBitrateLimits) {
+ EXPECT_TRUE(encoder_->GetEncoderInfo().resolution_bitrate_limits.empty());
+}
+
+TEST_F(TestVp9Impl, EncoderInfoWithBitrateLimitsFromFieldTrial) {
+ test::ScopedFieldTrials field_trials(
+ "WebRTC-VP9-GetEncoderInfoOverride/"
+ "frame_size_pixels:123|456|789,"
+ "min_start_bitrate_bps:11000|22000|33000,"
+ "min_bitrate_bps:44000|55000|66000,"
+ "max_bitrate_bps:77000|88000|99000/");
+ SetUp();
+
+ EXPECT_THAT(
+ encoder_->GetEncoderInfo().resolution_bitrate_limits,
+ ::testing::ElementsAre(
+ VideoEncoder::ResolutionBitrateLimits{123, 11000, 44000, 77000},
+ VideoEncoder::ResolutionBitrateLimits{456, 22000, 55000, 88000},
+ VideoEncoder::ResolutionBitrateLimits{789, 33000, 66000, 99000}));
+}
+
+TEST_F(TestVp9Impl, EncoderInfoFpsAllocation) {
+ const uint8_t kNumSpatialLayers = 3;
+ const uint8_t kNumTemporalLayers = 3;
+
+ codec_settings_.maxFramerate = 30;
+ codec_settings_.VP9()->numberOfSpatialLayers = kNumSpatialLayers;
+ codec_settings_.VP9()->numberOfTemporalLayers = kNumTemporalLayers;
+
+ for (uint8_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) {
+ codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width;
+ codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height;
+ codec_settings_.spatialLayers[sl_idx].minBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].maxBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].targetBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].active = true;
+ codec_settings_.spatialLayers[sl_idx].maxFramerate =
+ codec_settings_.maxFramerate;
+ }
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ FramerateFractions expected_fps_allocation[kMaxSpatialLayers];
+ expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 4);
+ expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 2);
+ expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction);
+ expected_fps_allocation[1] = expected_fps_allocation[0];
+ expected_fps_allocation[2] = expected_fps_allocation[0];
+ EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation,
+ ElementsAreArray(expected_fps_allocation));
+}
+
+TEST_F(TestVp9Impl, EncoderInfoFpsAllocationFlexibleMode) {
+ const uint8_t kNumSpatialLayers = 3;
+
+ codec_settings_.maxFramerate = 30;
+ codec_settings_.VP9()->numberOfSpatialLayers = kNumSpatialLayers;
+ codec_settings_.VP9()->numberOfTemporalLayers = 1;
+ codec_settings_.VP9()->flexibleMode = true;
+
+ VideoEncoder::RateControlParameters rate_params;
+ for (uint8_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) {
+ codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width;
+ codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height;
+ codec_settings_.spatialLayers[sl_idx].minBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].maxBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].targetBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].active = true;
+ // Force different frame rates for different layers, to verify that total
+ // fraction is correct.
+ codec_settings_.spatialLayers[sl_idx].maxFramerate =
+ codec_settings_.maxFramerate / (kNumSpatialLayers - sl_idx);
+ rate_params.bitrate.SetBitrate(sl_idx, 0,
+ codec_settings_.startBitrate * 1000);
+ }
+ rate_params.bandwidth_allocation =
+ DataRate::BitsPerSec(rate_params.bitrate.get_sum_bps());
+ rate_params.framerate_fps = codec_settings_.maxFramerate;
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ // No temporal layers allowed when spatial layers have different fps targets.
+ FramerateFractions expected_fps_allocation[kMaxSpatialLayers];
+ expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 3);
+ expected_fps_allocation[1].push_back(EncoderInfo::kMaxFramerateFraction / 2);
+ expected_fps_allocation[2].push_back(EncoderInfo::kMaxFramerateFraction);
+ EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation,
+ ::testing::ElementsAreArray(expected_fps_allocation));
+
+ // SetRates with current fps does not alter outcome.
+ encoder_->SetRates(rate_params);
+ EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation,
+ ::testing::ElementsAreArray(expected_fps_allocation));
+
+ // Higher fps than the codec wants, should still not affect outcome.
+ rate_params.framerate_fps *= 2;
+ encoder_->SetRates(rate_params);
+ EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation,
+ ::testing::ElementsAreArray(expected_fps_allocation));
+}
+
+class Vp9ImplWithLayeringTest
+ : public ::testing::TestWithParam<std::tuple<int, int, bool>> {
+ protected:
+ Vp9ImplWithLayeringTest()
+ : num_spatial_layers_(std::get<0>(GetParam())),
+ num_temporal_layers_(std::get<1>(GetParam())),
+ override_field_trials_(std::get<2>(GetParam())
+ ? "WebRTC-Vp9ExternalRefCtrl/Enabled/"
+ : "") {}
+
+ const uint8_t num_spatial_layers_;
+ const uint8_t num_temporal_layers_;
+ const test::ScopedFieldTrials override_field_trials_;
+};
+
+TEST_P(Vp9ImplWithLayeringTest, FlexibleMode) {
+ // In flexible mode encoder wrapper obtains actual list of references from
+ // encoder and writes it into RTP payload descriptor. Check that reference
+ // list in payload descriptor matches the predefined one, which is used
+ // in non-flexible mode.
+ std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+ VideoCodec codec_settings = DefaultCodecSettings();
+ codec_settings.VP9()->flexibleMode = true;
+ codec_settings.SetFrameDropEnabled(false);
+ codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers_;
+ codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers_;
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ GofInfoVP9 gof;
+ if (num_temporal_layers_ == 1) {
+ gof.SetGofInfoVP9(kTemporalStructureMode1);
+ } else if (num_temporal_layers_ == 2) {
+ gof.SetGofInfoVP9(kTemporalStructureMode2);
+ } else if (num_temporal_layers_ == 3) {
+ gof.SetGofInfoVP9(kTemporalStructureMode3);
+ }
+
+ // Encode at least (num_frames_in_gof + 1) frames to verify references
+ // of non-key frame with gof_idx = 0.
+ int num_input_frames = gof.num_frames_in_gof + 1;
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ EncodedVideoFrameProducer(*encoder)
+ .SetNumInputFrames(num_input_frames)
+ .SetResolution({kWidth, kHeight})
+ .Encode();
+ ASSERT_THAT(frames, SizeIs(num_input_frames * num_spatial_layers_));
+
+ for (size_t i = 0; i < frames.size(); ++i) {
+ const EncodedVideoFrameProducer::EncodedFrame& frame = frames[i];
+ const size_t picture_idx = i / num_spatial_layers_;
+ const size_t gof_idx = picture_idx % gof.num_frames_in_gof;
+
+ const CodecSpecificInfoVP9& vp9 =
+ frame.codec_specific_info.codecSpecific.VP9;
+ EXPECT_EQ(frame.encoded_image.SpatialIndex(),
+ num_spatial_layers_ == 1
+ ? absl::nullopt
+ : absl::optional<int>(i % num_spatial_layers_))
+ << "Frame " << i;
+ EXPECT_EQ(vp9.temporal_idx, num_temporal_layers_ == 1
+ ? kNoTemporalIdx
+ : gof.temporal_idx[gof_idx])
+ << "Frame " << i;
+ EXPECT_EQ(vp9.temporal_up_switch, gof.temporal_up_switch[gof_idx])
+ << "Frame " << i;
+ if (picture_idx == 0) {
+ EXPECT_EQ(vp9.num_ref_pics, 0) << "Frame " << i;
+ } else {
+ EXPECT_THAT(rtc::MakeArrayView(vp9.p_diff, vp9.num_ref_pics),
+ UnorderedElementsAreArray(gof.pid_diff[gof_idx],
+ gof.num_ref_pics[gof_idx]))
+ << "Frame " << i;
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(All,
+ Vp9ImplWithLayeringTest,
+ ::testing::Combine(::testing::Values(1, 2, 3),
+ ::testing::Values(1, 2, 3),
+ ::testing::Bool()));
+
+class TestVp9ImplFrameDropping : public TestVp9Impl {
+ protected:
+ void ModifyCodecSettings(VideoCodec* codec_settings) override {
+ webrtc::test::CodecSettings(kVideoCodecVP9, codec_settings);
+ // We need to encode quite a lot of frames in this test. Use low resolution
+ // to reduce execution time.
+ codec_settings->width = 64;
+ codec_settings->height = 64;
+ codec_settings->mode = VideoCodecMode::kScreensharing;
+ }
+};
+
+TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) {
+ const size_t num_frames_to_encode = 100;
+ const float input_framerate_fps = 30.0;
+ const float video_duration_secs = num_frames_to_encode / input_framerate_fps;
+ const float expected_framerate_fps = 5.0f;
+ const float max_abs_framerate_error_fps = expected_framerate_fps * 0.1f;
+
+ codec_settings_.maxFramerate = static_cast<uint32_t>(expected_framerate_fps);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoFrame input_frame = NextInputFrame();
+ for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
+ const size_t timestamp = input_frame.timestamp() +
+ kVideoPayloadTypeFrequency / input_framerate_fps;
+ input_frame.set_timestamp(static_cast<uint32_t>(timestamp));
+ }
+
+ const size_t num_encoded_frames = GetNumEncodedFrames();
+ const float encoded_framerate_fps = num_encoded_frames / video_duration_secs;
+ EXPECT_NEAR(encoded_framerate_fps, expected_framerate_fps,
+ max_abs_framerate_error_fps);
+}
+
+TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) {
+ // Assign different frame rate to spatial layers and check that result frame
+ // rate is close to the assigned one.
+ const uint8_t num_spatial_layers = 3;
+ const float input_framerate_fps = 30.0;
+ const size_t video_duration_secs = 3;
+ const size_t num_input_frames = video_duration_secs * input_framerate_fps;
+
+ codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers;
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->flexibleMode = true;
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ // Frame rate increases from low to high layer.
+ const uint32_t framerate_fps = 10 * (sl_idx + 1);
+
+ codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width;
+ codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height;
+ codec_settings_.spatialLayers[sl_idx].maxFramerate = framerate_fps;
+ codec_settings_.spatialLayers[sl_idx].minBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].maxBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].targetBitrate =
+ codec_settings_.startBitrate;
+ codec_settings_.spatialLayers[sl_idx].active = true;
+
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ }
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ VideoFrame input_frame = NextInputFrame();
+ for (size_t frame_num = 0; frame_num < num_input_frames; ++frame_num) {
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
+ const size_t timestamp = input_frame.timestamp() +
+ kVideoPayloadTypeFrequency / input_framerate_fps;
+ input_frame.set_timestamp(static_cast<uint32_t>(timestamp));
+ }
+
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_infos;
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_infos));
+
+ std::vector<size_t> num_encoded_frames(num_spatial_layers, 0);
+ for (EncodedImage& encoded_frame : encoded_frames) {
+ ++num_encoded_frames[encoded_frame.SpatialIndex().value_or(0)];
+ }
+
+ for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ const float layer_target_framerate_fps =
+ codec_settings_.spatialLayers[sl_idx].maxFramerate;
+ const float layer_output_framerate_fps =
+ static_cast<float>(num_encoded_frames[sl_idx]) / video_duration_secs;
+ const float max_framerate_error_fps = layer_target_framerate_fps * 0.1f;
+ EXPECT_NEAR(layer_output_framerate_fps, layer_target_framerate_fps,
+ max_framerate_error_fps);
+ }
+}
+
+class TestVp9ImplProfile2 : public TestVp9Impl {
+ protected:
+ void SetUp() override {
+ // Profile 2 might not be available on some platforms until
+ // https://bugs.chromium.org/p/webm/issues/detail?id=1544 is solved.
+ bool profile_2_is_supported = false;
+ for (const auto& codec : SupportedVP9Codecs()) {
+ if (ParseSdpForVP9Profile(codec.parameters)
+ .value_or(VP9Profile::kProfile0) == VP9Profile::kProfile2) {
+ profile_2_is_supported = true;
+ }
+ }
+ if (!profile_2_is_supported)
+ return;
+
+ TestVp9Impl::SetUp();
+ input_frame_generator_ = test::CreateSquareFrameGenerator(
+ codec_settings_.width, codec_settings_.height,
+ test::FrameGeneratorInterface::OutputType::kI010,
+ absl::optional<int>());
+ }
+
+ std::unique_ptr<VideoEncoder> CreateEncoder() override {
+ cricket::VideoCodec profile2_codec;
+ profile2_codec.SetParam(kVP9FmtpProfileId,
+ VP9ProfileToString(VP9Profile::kProfile2));
+ return VP9Encoder::Create(profile2_codec);
+ }
+
+ std::unique_ptr<VideoDecoder> CreateDecoder() override {
+ return VP9Decoder::Create();
+ }
+};
+
+TEST_F(TestVp9ImplProfile2, EncodeDecode) {
+ if (!encoder_)
+ return;
+
+ VideoFrame input_frame = NextInputFrame();
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+ // First frame should be a key frame.
+ encoded_frame._frameType = VideoFrameType::kVideoFrameKey;
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
+ std::unique_ptr<VideoFrame> decoded_frame;
+ absl::optional<uint8_t> decoded_qp;
+ ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+ ASSERT_TRUE(decoded_frame);
+
+ // TODO(emircan): Add PSNR for different color depths.
+ EXPECT_GT(I420PSNR(*input_frame.video_frame_buffer()->ToI420(),
+ *decoded_frame->video_frame_buffer()->ToI420()),
+ 31);
+}
+
+TEST_F(TestVp9Impl, EncodeWithDynamicRate) {
+ // Configured dynamic rate field trial and re-create the encoder.
+ test::ScopedFieldTrials field_trials(
+ "WebRTC-VideoRateControl/vp9_dynamic_rate:true/");
+ SetUp();
+
+ // Set 300kbps target with 100% headroom.
+ VideoEncoder::RateControlParameters params;
+ params.bandwidth_allocation = DataRate::BitsPerSec(300000);
+ params.bitrate.SetBitrate(0, 0, params.bandwidth_allocation.bps());
+ params.framerate_fps = 30.0;
+
+ encoder_->SetRates(params);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ EncodedImage encoded_frame;
+ CodecSpecificInfo codec_specific_info;
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+
+ // Set no headroom and encode again.
+ params.bandwidth_allocation = DataRate::Zero();
+ encoder_->SetRates(params);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+}
+
+TEST_F(TestVp9Impl, ReenablingUpperLayerAfterKFWithInterlayerPredIsEnabled) {
+ const size_t num_spatial_layers = 2;
+ const int num_frames_to_encode = 10;
+ codec_settings_.VP9()->flexibleMode = true;
+ codec_settings_.SetFrameDropEnabled(false);
+ codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers;
+ codec_settings_.VP9()->numberOfTemporalLayers = 1;
+ codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+ // Force low frame-rate, so all layers are present for all frames.
+ codec_settings_.maxFramerate = 5;
+
+ ConfigureSvc(codec_settings_, num_spatial_layers);
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, kSettings));
+
+ VideoBitrateAllocation bitrate_allocation;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+ }
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ std::vector<EncodedImage> encoded_frames;
+ std::vector<CodecSpecificInfo> codec_specific;
+
+ for (int i = 0; i < num_frames_to_encode; ++i) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+ EXPECT_EQ(encoded_frames.size(), num_spatial_layers);
+ }
+
+ // Disable the last layer.
+ bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ for (int i = 0; i < num_frames_to_encode; ++i) {
+ SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+ EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1);
+ }
+
+ std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey};
+
+ // Force a key-frame with the last layer still disabled.
+ SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(NextInputFrame(), &frame_types));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+ EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1);
+ ASSERT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameKey);
+
+ // Re-enable the last layer.
+ bitrate_allocation.SetBitrate(
+ num_spatial_layers - 1, 0,
+ codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate *
+ 1000);
+ encoder_->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings_.maxFramerate));
+
+ SetWaitForEncodedFramesThreshold(num_spatial_layers);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
+ ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+ EXPECT_EQ(encoded_frames.size(), num_spatial_layers);
+ EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta);
+}
+
+TEST_F(TestVp9Impl, HandlesEmptyDecoderConfigure) {
+ std::unique_ptr<VideoDecoder> decoder = CreateDecoder();
+ // Check that default settings are ok for decoder.
+ EXPECT_TRUE(decoder->Configure({}));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ TestVp9ImplForPixelFormat,
+ TestVp9ImplForPixelFormat,
+ ::testing::Values(test::FrameGeneratorInterface::OutputType::kI420,
+ test::FrameGeneratorInterface::OutputType::kNV12),
+ [](const auto& info) {
+ return test::FrameGeneratorInterface::OutputTypeToString(info.param);
+ });
+
+// Helper function to populate an vpx_image_t instance with dimensions and
+// potential image data.
+std::function<vpx_image_t*(vpx_image_t*,
+ vpx_img_fmt_t,
+ unsigned int,
+ unsigned int,
+ unsigned int,
+ unsigned char* img_data)>
+GetWrapImageFunction(vpx_image_t* img) {
+ return [img](vpx_image_t* /*img*/, vpx_img_fmt_t fmt, unsigned int d_w,
+ unsigned int d_h, unsigned int /*stride_align*/,
+ unsigned char* img_data) {
+ img->fmt = fmt;
+ img->d_w = d_w;
+ img->d_h = d_h;
+ img->img_data = img_data;
+ return img;
+ };
+}
+
+TEST(Vp9SpeedSettingsTrialsTest, NoSvcUsesGlobalSpeedFromTl0InLayerConfig) {
+ // TL0 speed 8 at >= 480x270, 5 if below that.
+ test::ExplicitKeyValueConfig trials(
+ "WebRTC-VP9-PerformanceFlags/"
+ "use_per_layer_speed,"
+ "min_pixel_count:0|129600,"
+ "base_layer_speed:4|8,"
+ "high_layer_speed:5|9,"
+ "deblock_mode:1|0/");
+
+ // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise
+ // passed on to LibvpxVp9Encoder.
+ auto* const vpx = new NiceMock<MockLibvpxInterface>();
+ LibvpxVp9Encoder encoder(cricket::VideoCodec(),
+ absl::WrapUnique<LibvpxInterface>(vpx), trials);
+
+ VideoCodec settings = DefaultCodecSettings();
+ settings.width = 480;
+ settings.height = 270;
+ vpx_image_t img;
+
+ ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img));
+ ON_CALL(*vpx, codec_enc_config_default)
+ .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) {
+ memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t));
+ }),
+ Return(VPX_CODEC_OK)));
+ EXPECT_CALL(*vpx, codec_control(_, _, An<int>())).Times(AnyNumber());
+
+ EXPECT_CALL(*vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS,
+ A<vpx_svc_extra_cfg_t*>()))
+ .Times(0);
+
+ EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(8)));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings));
+
+ encoder.Release();
+ settings.width = 352;
+ settings.height = 216;
+
+ EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(4)));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings));
+}
+
+TEST(Vp9SpeedSettingsTrialsTest,
+ NoPerLayerFlagUsesGlobalSpeedFromTopLayerInConfig) {
+ // TL0 speed 8 at >= 480x270, 5 if below that.
+ test::ExplicitKeyValueConfig trials(
+ "WebRTC-VP9-PerformanceFlags/"
+ "min_pixel_count:0|129600,"
+ "base_layer_speed:4|8,"
+ "high_layer_speed:5|9,"
+ "deblock_mode:1|0/");
+
+ // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise
+ // passed on to LibvpxVp9Encoder.
+ auto* const vpx = new NiceMock<MockLibvpxInterface>();
+ LibvpxVp9Encoder encoder(cricket::VideoCodec(),
+ absl::WrapUnique<LibvpxInterface>(vpx), trials);
+
+ VideoCodec settings = DefaultCodecSettings();
+ settings.width = 480;
+ settings.height = 270;
+ ConfigureSvc(settings, 2, 3);
+ vpx_image_t img;
+
+ ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img));
+ ON_CALL(*vpx, codec_enc_config_default)
+ .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) {
+ memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t));
+ }),
+ Return(VPX_CODEC_OK)));
+ EXPECT_CALL(*vpx, codec_control(_, _, An<int>())).Times(AnyNumber());
+
+ // Speed settings not populated when 'use_per_layer_speed' flag is absent.
+ EXPECT_CALL(*vpx,
+ codec_control(
+ _, VP9E_SET_SVC_PARAMETERS,
+ SafeMatcherCast<vpx_svc_extra_cfg_t*>(AllOf(
+ Field(&vpx_svc_extra_cfg_t::speed_per_layer, Each(0)),
+ Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, Each(0))))))
+ .Times(2);
+
+ EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(8)));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings));
+
+ encoder.Release();
+ settings.width = 476;
+ settings.height = 268;
+ settings.spatialLayers[0].width = settings.width / 2;
+ settings.spatialLayers[0].height = settings.height / 2;
+ settings.spatialLayers[1].width = settings.width;
+ settings.spatialLayers[1].height = settings.height;
+
+ EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(4)));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings));
+}
+
+TEST(Vp9SpeedSettingsTrialsTest, DefaultPerLayerFlagsWithSvc) {
+ // Per-temporal and spatial layer speed settings:
+ // SL0: TL0 = speed 5, TL1/TL2 = speed 8.
+ // SL1/2: TL0 = speed 7, TL1/TL2 = speed 8.
+ // Deblocking-mode per spatial layer:
+ // SL0: mode 1, SL1/2: mode 0.
+ test::ExplicitKeyValueConfig trials(
+ "WebRTC-VP9-PerformanceFlags/"
+ "use_per_layer_speed,"
+ "min_pixel_count:0|129600,"
+ "base_layer_speed:5|7,"
+ "high_layer_speed:8|8,"
+ "deblock_mode:1|0/");
+
+ // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise
+ // passed on to LibvpxVp9Encoder.
+ auto* const vpx = new NiceMock<MockLibvpxInterface>();
+ LibvpxVp9Encoder encoder(cricket::VideoCodec(),
+ absl::WrapUnique<LibvpxInterface>(vpx), trials);
+
+ VideoCodec settings = DefaultCodecSettings();
+ constexpr int kNumSpatialLayers = 3;
+ constexpr int kNumTemporalLayers = 3;
+ ConfigureSvc(settings, kNumSpatialLayers, kNumTemporalLayers);
+ VideoBitrateAllocation bitrate_allocation;
+ for (int si = 0; si < kNumSpatialLayers; ++si) {
+ for (int ti = 0; ti < kNumTemporalLayers; ++ti) {
+ uint32_t bitrate_bps =
+ settings.spatialLayers[si].targetBitrate * 1'000 / kNumTemporalLayers;
+ bitrate_allocation.SetBitrate(si, ti, bitrate_bps);
+ }
+ }
+ vpx_image_t img;
+
+ // Speed settings per spatial layer, for TL0.
+ const int kBaseTlSpeed[VPX_MAX_LAYERS] = {5, 7, 7};
+ // Speed settings per spatial layer, for TL1, TL2.
+ const int kHighTlSpeed[VPX_MAX_LAYERS] = {8, 8, 8};
+ // Loopfilter settings are handled within libvpx, so this array is valid for
+ // both TL0 and higher.
+ const int kLoopFilter[VPX_MAX_LAYERS] = {1, 0, 0};
+
+ ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img));
+ ON_CALL(*vpx, codec_enc_init)
+ .WillByDefault(WithArg<0>([](vpx_codec_ctx_t* ctx) {
+ memset(ctx, 0, sizeof(*ctx));
+ return VPX_CODEC_OK;
+ }));
+ ON_CALL(*vpx, codec_enc_config_default)
+ .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) {
+ memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t));
+ }),
+ Return(VPX_CODEC_OK)));
+ EXPECT_CALL(
+ *vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS,
+ SafeMatcherCast<vpx_svc_extra_cfg_t*>(
+ AllOf(Field(&vpx_svc_extra_cfg_t::speed_per_layer,
+ ElementsAreArray(kBaseTlSpeed)),
+ Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl,
+ ElementsAreArray(kLoopFilter))))));
+
+ // Capture the callback into the vp9 wrapper.
+ vpx_codec_priv_output_cx_pkt_cb_pair_t callback_pointer = {};
+ EXPECT_CALL(*vpx, codec_control(_, VP9E_REGISTER_CX_CALLBACK, A<void*>()))
+ .WillOnce(WithArg<2>([&](void* cbp) {
+ callback_pointer =
+ *reinterpret_cast<vpx_codec_priv_output_cx_pkt_cb_pair_t*>(cbp);
+ return VPX_CODEC_OK;
+ }));
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings));
+
+ encoder.SetRates(VideoEncoder::RateControlParameters(bitrate_allocation,
+ settings.maxFramerate));
+
+ MockEncodedImageCallback callback;
+ encoder.RegisterEncodeCompleteCallback(&callback);
+ auto frame_generator = test::CreateSquareFrameGenerator(
+ kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, 10);
+ Mock::VerifyAndClearExpectations(vpx);
+
+ uint8_t data[1] = {0};
+ vpx_codec_cx_pkt encoded_data = {};
+ encoded_data.data.frame.buf = &data;
+ encoded_data.data.frame.sz = 1;
+
+ const auto kImageOk =
+ EncodedImageCallback::Result(EncodedImageCallback::Result::OK);
+
+ int spatial_id = 0;
+ int temporal_id = 0;
+ EXPECT_CALL(*vpx,
+ codec_control(_, VP9E_SET_SVC_LAYER_ID, A<vpx_svc_layer_id_t*>()))
+ .Times(AnyNumber());
+ EXPECT_CALL(*vpx,
+ codec_control(_, VP9E_GET_SVC_LAYER_ID, A<vpx_svc_layer_id_t*>()))
+ .WillRepeatedly(WithArg<2>([&](vpx_svc_layer_id_t* layer_id) {
+ layer_id->spatial_layer_id = spatial_id;
+ layer_id->temporal_layer_id = temporal_id;
+ return VPX_CODEC_OK;
+ }));
+ vpx_svc_ref_frame_config_t stored_refs = {};
+ ON_CALL(*vpx, codec_control(_, VP9E_SET_SVC_REF_FRAME_CONFIG,
+ A<vpx_svc_ref_frame_config_t*>()))
+ .WillByDefault(
+ DoAll(SaveArgPointee<2>(&stored_refs), Return(VPX_CODEC_OK)));
+ ON_CALL(*vpx, codec_control(_, VP9E_GET_SVC_REF_FRAME_CONFIG,
+ A<vpx_svc_ref_frame_config_t*>()))
+ .WillByDefault(
+ DoAll(SetArgPointee<2>(ByRef(stored_refs)), Return(VPX_CODEC_OK)));
+
+ // First frame is keyframe.
+ encoded_data.data.frame.flags = VPX_FRAME_IS_KEY;
+
+ // Default 3-layer temporal pattern: 0-2-1-2, then repeat and do two more.
+ for (int ti : {0, 2, 1, 2, 0, 2}) {
+ EXPECT_CALL(*vpx, codec_encode).WillOnce(Return(VPX_CODEC_OK));
+ // No update expected if flags haven't changed, and they change we we move
+ // between base temporal layer and non-base temporal layer.
+ if ((ti > 0) != (temporal_id > 0)) {
+ EXPECT_CALL(*vpx, codec_control(
+ _, VP9E_SET_SVC_PARAMETERS,
+ SafeMatcherCast<vpx_svc_extra_cfg_t*>(AllOf(
+ Field(&vpx_svc_extra_cfg_t::speed_per_layer,
+ ElementsAreArray(ti == 0 ? kBaseTlSpeed
+ : kHighTlSpeed)),
+ Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl,
+ ElementsAreArray(kLoopFilter))))));
+ } else {
+ EXPECT_CALL(*vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS,
+ A<vpx_svc_extra_cfg_t*>()))
+ .Times(0);
+ }
+
+ VideoFrame frame =
+ VideoFrame::Builder()
+ .set_video_frame_buffer(frame_generator->NextFrame().buffer)
+ .build();
+ encoder.Encode(frame, nullptr);
+
+ temporal_id = ti;
+ for (int si = 0; si < kNumSpatialLayers; ++si) {
+ spatial_id = si;
+
+ EXPECT_CALL(callback, OnEncodedImage).WillOnce(Return(kImageOk));
+ callback_pointer.output_cx_pkt(&encoded_data, callback_pointer.user_priv);
+ }
+
+ encoded_data.data.frame.flags = 0; // Following frames are delta frames.
+ }
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc
new file mode 100644
index 0000000000..222e57b6ba
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/codecs/vp9/include/vp9.h"
+
+#include <memory>
+
+#include "absl/container/inlined_vector.h"
+#include "api/transport/field_trial_based_config.h"
+#include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/sdp_video_format.h"
+#include "api/video_codecs/vp9_profile.h"
+#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h"
+#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
+#include "rtc_base/checks.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vp8dx.h"
+#include "vpx/vpx_codec.h"
+
+namespace webrtc {
+
+std::vector<SdpVideoFormat> SupportedVP9Codecs(bool add_scalability_modes) {
+#ifdef RTC_ENABLE_VP9
+ // Profile 2 might not be available on some platforms until
+ // https://bugs.chromium.org/p/webm/issues/detail?id=1544 is solved.
+ static bool vpx_supports_high_bit_depth =
+ (vpx_codec_get_caps(vpx_codec_vp9_cx()) & VPX_CODEC_CAP_HIGHBITDEPTH) !=
+ 0 &&
+ (vpx_codec_get_caps(vpx_codec_vp9_dx()) & VPX_CODEC_CAP_HIGHBITDEPTH) !=
+ 0;
+
+ absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> scalability_modes;
+ if (add_scalability_modes) {
+ for (const auto scalability_mode : kAllScalabilityModes) {
+ if (ScalabilityStructureConfig(scalability_mode).has_value()) {
+ scalability_modes.push_back(scalability_mode);
+ }
+ }
+ }
+ std::vector<SdpVideoFormat> supported_formats{SdpVideoFormat(
+ cricket::kVp9CodecName,
+ {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile0)}},
+ scalability_modes)};
+ if (vpx_supports_high_bit_depth) {
+ supported_formats.push_back(SdpVideoFormat(
+ cricket::kVp9CodecName,
+ {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile2)}},
+ scalability_modes));
+ }
+
+ return supported_formats;
+#else
+ return std::vector<SdpVideoFormat>();
+#endif
+}
+
+std::vector<SdpVideoFormat> SupportedVP9DecoderCodecs() {
+#ifdef RTC_ENABLE_VP9
+ std::vector<SdpVideoFormat> supported_formats = SupportedVP9Codecs();
+ // The WebRTC internal decoder supports VP9 profile 1 and 3. However, there's
+ // currently no way of sending VP9 profile 1 or 3 using the internal encoder.
+ // It would require extended support for I444, I422, and I440 buffers.
+ supported_formats.push_back(SdpVideoFormat(
+ cricket::kVp9CodecName,
+ {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile1)}}));
+ supported_formats.push_back(SdpVideoFormat(
+ cricket::kVp9CodecName,
+ {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile3)}}));
+ return supported_formats;
+#else
+ return std::vector<SdpVideoFormat>();
+#endif
+}
+
+std::unique_ptr<VP9Encoder> VP9Encoder::Create() {
+#ifdef RTC_ENABLE_VP9
+ return std::make_unique<LibvpxVp9Encoder>(cricket::VideoCodec(),
+ LibvpxInterface::Create(),
+ FieldTrialBasedConfig());
+#else
+ RTC_DCHECK_NOTREACHED();
+ return nullptr;
+#endif
+}
+
+std::unique_ptr<VP9Encoder> VP9Encoder::Create(
+ const cricket::VideoCodec& codec) {
+#ifdef RTC_ENABLE_VP9
+ return std::make_unique<LibvpxVp9Encoder>(codec, LibvpxInterface::Create(),
+ FieldTrialBasedConfig());
+#else
+ RTC_DCHECK_NOTREACHED();
+ return nullptr;
+#endif
+}
+
+bool VP9Encoder::SupportsScalabilityMode(ScalabilityMode scalability_mode) {
+ return ScalabilityStructureConfig(scalability_mode).has_value();
+}
+
+std::unique_ptr<VP9Decoder> VP9Decoder::Create() {
+#ifdef RTC_ENABLE_VP9
+ return std::make_unique<LibvpxVp9Decoder>();
+#else
+ RTC_DCHECK_NOTREACHED();
+ return nullptr;
+#endif
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc
new file mode 100644
index 0000000000..181550ce91
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifdef RTC_ENABLE_VP9
+
+#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
+
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vpx_frame_buffer.h"
+
+namespace webrtc {
+
+uint8_t* Vp9FrameBufferPool::Vp9FrameBuffer::GetData() {
+ return data_.data<uint8_t>();
+}
+
+size_t Vp9FrameBufferPool::Vp9FrameBuffer::GetDataSize() const {
+ return data_.size();
+}
+
+void Vp9FrameBufferPool::Vp9FrameBuffer::SetSize(size_t size) {
+ data_.SetSize(size);
+}
+
+bool Vp9FrameBufferPool::InitializeVpxUsePool(
+ vpx_codec_ctx* vpx_codec_context) {
+ RTC_DCHECK(vpx_codec_context);
+ // Tell libvpx to use this pool.
+ if (vpx_codec_set_frame_buffer_functions(
+ // In which context to use these callback functions.
+ vpx_codec_context,
+ // Called by libvpx when it needs another frame buffer.
+ &Vp9FrameBufferPool::VpxGetFrameBuffer,
+ // Called by libvpx when it no longer uses a frame buffer.
+ &Vp9FrameBufferPool::VpxReleaseFrameBuffer,
+ // `this` will be passed as `user_priv` to VpxGetFrameBuffer.
+ this)) {
+ // Failed to configure libvpx to use Vp9FrameBufferPool.
+ return false;
+ }
+ return true;
+}
+
+rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer>
+Vp9FrameBufferPool::GetFrameBuffer(size_t min_size) {
+ RTC_DCHECK_GT(min_size, 0);
+ rtc::scoped_refptr<Vp9FrameBuffer> available_buffer = nullptr;
+ {
+ MutexLock lock(&buffers_lock_);
+ // Do we have a buffer we can recycle?
+ for (const auto& buffer : allocated_buffers_) {
+ if (buffer->HasOneRef()) {
+ available_buffer = buffer;
+ break;
+ }
+ }
+ // Otherwise create one.
+ if (available_buffer == nullptr) {
+ available_buffer = new Vp9FrameBuffer();
+ allocated_buffers_.push_back(available_buffer);
+ if (allocated_buffers_.size() > max_num_buffers_) {
+ RTC_LOG(LS_WARNING)
+ << allocated_buffers_.size()
+ << " Vp9FrameBuffers have been "
+ "allocated by a Vp9FrameBufferPool (exceeding what is "
+ "considered reasonable, "
+ << max_num_buffers_ << ").";
+
+ // TODO(phoglund): this limit is being hit in tests since Oct 5 2016.
+ // See https://bugs.chromium.org/p/webrtc/issues/detail?id=6484.
+ // RTC_DCHECK_NOTREACHED();
+ }
+ }
+ }
+
+ available_buffer->SetSize(min_size);
+ return available_buffer;
+}
+
+int Vp9FrameBufferPool::GetNumBuffersInUse() const {
+ int num_buffers_in_use = 0;
+ MutexLock lock(&buffers_lock_);
+ for (const auto& buffer : allocated_buffers_) {
+ if (!buffer->HasOneRef())
+ ++num_buffers_in_use;
+ }
+ return num_buffers_in_use;
+}
+
+bool Vp9FrameBufferPool::Resize(size_t max_number_of_buffers) {
+ MutexLock lock(&buffers_lock_);
+ size_t used_buffers_count = 0;
+ for (const auto& buffer : allocated_buffers_) {
+ // If the buffer is in use, the ref count will be >= 2, one from the list we
+ // are looping over and one from the application. If the ref count is 1,
+ // then the list we are looping over holds the only reference and it's safe
+ // to reuse.
+ if (!buffer->HasOneRef()) {
+ used_buffers_count++;
+ }
+ }
+ if (used_buffers_count > max_number_of_buffers) {
+ return false;
+ }
+ max_num_buffers_ = max_number_of_buffers;
+
+ size_t buffers_to_purge = allocated_buffers_.size() - max_num_buffers_;
+ auto iter = allocated_buffers_.begin();
+ while (iter != allocated_buffers_.end() && buffers_to_purge > 0) {
+ if ((*iter)->HasOneRef()) {
+ iter = allocated_buffers_.erase(iter);
+ buffers_to_purge--;
+ } else {
+ ++iter;
+ }
+ }
+ return true;
+}
+
+void Vp9FrameBufferPool::ClearPool() {
+ MutexLock lock(&buffers_lock_);
+ allocated_buffers_.clear();
+}
+
+// static
+int32_t Vp9FrameBufferPool::VpxGetFrameBuffer(void* user_priv,
+ size_t min_size,
+ vpx_codec_frame_buffer* fb) {
+ RTC_DCHECK(user_priv);
+ RTC_DCHECK(fb);
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Limit size of 8k YUV highdef frame
+ size_t size_limit = 7680 * 4320 * 3 / 2 * 2;
+ if (min_size > size_limit)
+ return -1;
+#endif
+
+ Vp9FrameBufferPool* pool = static_cast<Vp9FrameBufferPool*>(user_priv);
+
+ rtc::scoped_refptr<Vp9FrameBuffer> buffer = pool->GetFrameBuffer(min_size);
+ fb->data = buffer->GetData();
+ fb->size = buffer->GetDataSize();
+ // Store Vp9FrameBuffer* in `priv` for use in VpxReleaseFrameBuffer.
+ // This also makes vpx_codec_get_frame return images with their `fb_priv` set
+ // to `buffer` which is important for external reference counting.
+ // Release from refptr so that the buffer's `ref_count_` remains 1 when
+ // `buffer` goes out of scope.
+ fb->priv = static_cast<void*>(buffer.release());
+ return 0;
+}
+
+// static
+int32_t Vp9FrameBufferPool::VpxReleaseFrameBuffer(void* user_priv,
+ vpx_codec_frame_buffer* fb) {
+ RTC_DCHECK(user_priv);
+ RTC_DCHECK(fb);
+ Vp9FrameBuffer* buffer = static_cast<Vp9FrameBuffer*>(fb->priv);
+ if (buffer != nullptr) {
+ buffer->Release();
+ // When libvpx fails to decode and you continue to try to decode (and fail)
+ // libvpx can for some reason try to release the same buffer multiple times.
+ // Setting `priv` to null protects against trying to Release multiple times.
+ fb->priv = nullptr;
+ }
+ return 0;
+}
+
+} // namespace webrtc
+
+#endif // RTC_ENABLE_VP9
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h
new file mode 100644
index 0000000000..f46f1b7ea2
--- /dev/null
+++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_
+#define MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_
+
+#ifdef RTC_ENABLE_VP9
+
+#include <vector>
+
+#include "api/ref_counted_base.h"
+#include "api/scoped_refptr.h"
+#include "rtc_base/buffer.h"
+#include "rtc_base/synchronization/mutex.h"
+
+struct vpx_codec_ctx;
+struct vpx_codec_frame_buffer;
+
+namespace webrtc {
+
+// If more buffers than this are allocated we print warnings and crash if in
+// debug mode. VP9 is defined to have 8 reference buffers, of which 3 can be
+// referenced by any frame, see
+// https://tools.ietf.org/html/draft-grange-vp9-bitstream-00#section-2.2.2.
+// Assuming VP9 holds on to at most 8 buffers, any more buffers than that
+// would have to be by application code. Decoded frames should not be
+// referenced for longer than necessary. If we allow ~60 additional buffers
+// then the application has ~1 second to e.g. render each frame of a 60 fps
+// video.
+constexpr size_t kDefaultMaxNumBuffers = 68;
+
+// This memory pool is used to serve buffers to libvpx for decoding purposes in
+// VP9, which is set up in InitializeVPXUsePool. After the initialization any
+// time libvpx wants to decode a frame it will use buffers provided and released
+// through VpxGetFrameBuffer and VpxReleaseFrameBuffer.
+// The benefit of owning the pool that libvpx relies on for decoding is that the
+// decoded frames returned by libvpx (from vpx_codec_get_frame) use parts of our
+// buffers for the decoded image data. By retaining ownership of this buffer
+// using scoped_refptr, the image buffer can be reused by VideoFrames and no
+// frame copy has to occur during decoding and frame delivery.
+//
+// Pseudo example usage case:
+// Vp9FrameBufferPool pool;
+// pool.InitializeVpxUsePool(decoder_ctx);
+// ...
+//
+// // During decoding, libvpx will get and release buffers from the pool.
+// vpx_codec_decode(decoder_ctx, ...);
+//
+// vpx_image_t* img = vpx_codec_get_frame(decoder_ctx, &iter);
+// // Important to use scoped_refptr to protect it against being recycled by
+// // the pool.
+// scoped_refptr<Vp9FrameBuffer> img_buffer = (Vp9FrameBuffer*)img->fb_priv;
+// ...
+//
+// // Destroying the codec will make libvpx release any buffers it was using.
+// vpx_codec_destroy(decoder_ctx);
+class Vp9FrameBufferPool {
+ public:
+ class Vp9FrameBuffer final
+ : public rtc::RefCountedNonVirtual<Vp9FrameBuffer> {
+ public:
+ uint8_t* GetData();
+ size_t GetDataSize() const;
+ void SetSize(size_t size);
+
+ using rtc::RefCountedNonVirtual<Vp9FrameBuffer>::HasOneRef;
+
+ private:
+ // Data as an easily resizable buffer.
+ rtc::Buffer data_;
+ };
+
+ // Configures libvpx to, in the specified context, use this memory pool for
+ // buffers used to decompress frames. This is only supported for VP9.
+ bool InitializeVpxUsePool(vpx_codec_ctx* vpx_codec_context);
+
+ // Gets a frame buffer of at least `min_size`, recycling an available one or
+ // creating a new one. When no longer referenced from the outside the buffer
+ // becomes recyclable.
+ rtc::scoped_refptr<Vp9FrameBuffer> GetFrameBuffer(size_t min_size);
+ // Gets the number of buffers currently in use (not ready to be recycled).
+ int GetNumBuffersInUse() const;
+ // Changes the max amount of buffers in the pool to the new value.
+ // Returns true if change was successful and false if the amount of already
+ // allocated buffers is bigger than new value.
+ bool Resize(size_t max_number_of_buffers);
+ // Releases allocated buffers, deleting available buffers. Buffers in use are
+ // not deleted until they are no longer referenced.
+ void ClearPool();
+
+ // InitializeVpxUsePool configures libvpx to call this function when it needs
+ // a new frame buffer. Parameters:
+ // `user_priv` Private data passed to libvpx, InitializeVpxUsePool sets it up
+ // to be a pointer to the pool.
+ // `min_size` Minimum size needed by libvpx (to decompress a frame).
+ // `fb` Pointer to the libvpx frame buffer object, this is updated to
+ // use the pool's buffer.
+ // Returns 0 on success. Returns < 0 on failure.
+ static int32_t VpxGetFrameBuffer(void* user_priv,
+ size_t min_size,
+ vpx_codec_frame_buffer* fb);
+
+ // InitializeVpxUsePool configures libvpx to call this function when it has
+ // finished using one of the pool's frame buffer. Parameters:
+ // `user_priv` Private data passed to libvpx, InitializeVpxUsePool sets it up
+ // to be a pointer to the pool.
+ // `fb` Pointer to the libvpx frame buffer object, its `priv` will be
+ // a pointer to one of the pool's Vp9FrameBuffer.
+ static int32_t VpxReleaseFrameBuffer(void* user_priv,
+ vpx_codec_frame_buffer* fb);
+
+ private:
+ // Protects `allocated_buffers_`.
+ mutable Mutex buffers_lock_;
+ // All buffers, in use or ready to be recycled.
+ std::vector<rtc::scoped_refptr<Vp9FrameBuffer>> allocated_buffers_
+ RTC_GUARDED_BY(buffers_lock_);
+ size_t max_num_buffers_ = kDefaultMaxNumBuffers;
+};
+
+} // namespace webrtc
+
+#endif // RTC_ENABLE_VP9
+
+#endif // MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_