diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/libwebrtc/modules/video_coding/codecs | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/video_coding/codecs')
104 files changed, 26316 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/BUILD.gn b/third_party/libwebrtc/modules/video_coding/codecs/av1/BUILD.gn new file mode 100644 index 0000000000..648778d969 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/BUILD.gn @@ -0,0 +1,153 @@ +# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("//third_party/libaom/options.gni") +import("../../../../webrtc.gni") + +rtc_library("av1_svc_config") { + sources = [ + "av1_svc_config.cc", + "av1_svc_config.h", + ] + deps = [ + "../../../../api/video_codecs:video_codecs_api", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../../../rtc_base:stringutils", + "../../svc:scalability_mode_util", + "../../svc:scalability_structures", + "../../svc:scalable_video_controller", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/container:inlined_vector" ] +} + +rtc_library("dav1d_decoder") { + visibility = [ "*" ] + poisonous = [ "software_video_codecs" ] + public = [ "dav1d_decoder.h" ] + sources = [ "dav1d_decoder.cc" ] + + deps = [ + "../..:video_codec_interface", + "../../../../api:scoped_refptr", + "../../../../api/video:encoded_image", + "../../../../api/video:video_frame", + "../../../../api/video_codecs:video_codecs_api", + "../../../../common_video", + "../../../../rtc_base:logging", + "//third_party/dav1d", + "//third_party/libyuv", + ] + if (build_with_mozilla) { + deps -= [ + "//third_party/dav1d", + "//third_party/libyuv", + ] + include_dirs = [ + "/media/libdav1d/", + "/media/libyuv", + "/media/libyuv/libyuv/include", + "/third_party/dav1d/include/dav1d", + ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("libaom_av1_decoder") { + visibility = [ "*" ] + poisonous = [ "software_video_codecs" ] + public = [ "libaom_av1_decoder.h" ] + deps = [ "../../../../api/video_codecs:video_codecs_api" ] + absl_deps = [ "//third_party/abseil-cpp/absl/base:core_headers" ] + + if (enable_libaom) { + sources = [ "libaom_av1_decoder.cc" ] + deps += [ + "../..:video_codec_interface", + "../../../../api:scoped_refptr", + "../../../../api/video:encoded_image", + "../../../../api/video:video_frame", + "../../../../common_video", + "../../../../rtc_base:logging", + "//third_party/libaom", + "//third_party/libyuv", + ] + if (build_with_mozilla) { + deps -= [ + "//third_party/libaom", + "//third_party/libyuv", + ] + } + absl_deps += [ "//third_party/abseil-cpp/absl/types:optional" ] + } else { + sources = [ "libaom_av1_decoder_absent.cc" ] + } +} + +rtc_library("libaom_av1_encoder") { + visibility = [ "*" ] + poisonous = [ "software_video_codecs" ] + public = [ "libaom_av1_encoder.h" ] + sources = [ "libaom_av1_encoder.cc" ] + deps = [ + "../..:video_codec_interface", + "../../../../api:scoped_refptr", + "../../../../api/video:encoded_image", + "../../../../api/video:video_frame", + "../../../../api/video_codecs:video_codecs_api", + "../../../../common_video", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../svc:scalability_structures", + "../../svc:scalable_video_controller", + ] + if (enable_libaom) { + deps += [ "//third_party/libaom" ] + } + absl_deps = [ + "//third_party/abseil-cpp/absl/algorithm:container", + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/strings:strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +if (rtc_include_tests) { + rtc_library("video_coding_codecs_av1_tests") { + testonly = true + + sources = [ "av1_svc_config_unittest.cc" ] + deps = [ + ":av1_svc_config", + "../../../../api/video_codecs:video_codecs_api", + "../../../../test:test_support", + ] + + if (enable_libaom) { + sources += [ + "libaom_av1_encoder_unittest.cc", + "libaom_av1_unittest.cc", + ] + deps += [ + ":libaom_av1_decoder", + ":libaom_av1_encoder", + "../..:encoded_video_frame_producer", + "../..:video_codec_interface", + "../../../../api:mock_video_encoder", + "../../../../api/units:data_size", + "../../../../api/units:time_delta", + "../../../../api/video:video_frame", + "../../svc:scalability_mode_util", + "../../svc:scalability_structures", + "../../svc:scalable_video_controller", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + } + } +} diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/DEPS b/third_party/libwebrtc/modules/video_coding/codecs/av1/DEPS new file mode 100644 index 0000000000..bfb1c733d4 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/DEPS @@ -0,0 +1,4 @@ +include_rules = [ + "+third_party/libaom", + "+third_party/dav1d", +] diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.cc new file mode 100644 index 0000000000..43dcf96ab7 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/av1/av1_svc_config.h" + +#include <algorithm> +#include <cmath> +#include <memory> + +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { +namespace { +absl::optional<ScalabilityMode> BuildScalabilityMode(int num_temporal_layers, + int num_spatial_layers) { + char name[20]; + rtc::SimpleStringBuilder ss(name); + ss << "L" << num_spatial_layers << "T" << num_temporal_layers; + if (num_spatial_layers > 1) { + ss << "_KEY"; + } + + return ScalabilityModeFromString(name); +} +} // namespace + +absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> +LibaomAv1EncoderSupportedScalabilityModes() { + absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> scalability_modes; + for (ScalabilityMode scalability_mode : kAllScalabilityModes) { + if (ScalabilityStructureConfig(scalability_mode) != absl::nullopt) { + scalability_modes.push_back(scalability_mode); + } + } + return scalability_modes; +} + +bool LibaomAv1EncoderSupportsScalabilityMode(ScalabilityMode scalability_mode) { + // For libaom AV1, the scalability mode is supported if we can create the + // scalability structure. + return ScalabilityStructureConfig(scalability_mode) != absl::nullopt; +} + +bool SetAv1SvcConfig(VideoCodec& video_codec, + int num_temporal_layers, + int num_spatial_layers) { + RTC_DCHECK_EQ(video_codec.codecType, kVideoCodecAV1); + + absl::optional<ScalabilityMode> scalability_mode = + video_codec.GetScalabilityMode(); + if (!scalability_mode.has_value()) { + scalability_mode = + BuildScalabilityMode(num_temporal_layers, num_spatial_layers); + if (!scalability_mode) { + RTC_LOG(LS_WARNING) << "Scalability mode is not set, using 'L1T1'."; + scalability_mode = ScalabilityMode::kL1T1; + } + } + + std::unique_ptr<ScalableVideoController> structure = + CreateScalabilityStructure(*scalability_mode); + if (structure == nullptr) { + RTC_LOG(LS_WARNING) << "Failed to create structure " + << static_cast<int>(*scalability_mode); + return false; + } + + video_codec.SetScalabilityMode(*scalability_mode); + + ScalableVideoController::StreamLayersConfig info = structure->StreamConfig(); + for (int sl_idx = 0; sl_idx < info.num_spatial_layers; ++sl_idx) { + SpatialLayer& spatial_layer = video_codec.spatialLayers[sl_idx]; + spatial_layer.width = video_codec.width * info.scaling_factor_num[sl_idx] / + info.scaling_factor_den[sl_idx]; + spatial_layer.height = video_codec.height * + info.scaling_factor_num[sl_idx] / + info.scaling_factor_den[sl_idx]; + spatial_layer.maxFramerate = video_codec.maxFramerate; + spatial_layer.numberOfTemporalLayers = info.num_temporal_layers; + spatial_layer.active = true; + } + + if (info.num_spatial_layers == 1) { + SpatialLayer& spatial_layer = video_codec.spatialLayers[0]; + spatial_layer.minBitrate = video_codec.minBitrate; + spatial_layer.maxBitrate = video_codec.maxBitrate; + spatial_layer.targetBitrate = + (video_codec.minBitrate + video_codec.maxBitrate) / 2; + return true; + } + + for (int sl_idx = 0; sl_idx < info.num_spatial_layers; ++sl_idx) { + SpatialLayer& spatial_layer = video_codec.spatialLayers[sl_idx]; + // minBitrate and maxBitrate formulas are copied from vp9 settings and + // are not yet tuned for av1. + const int num_pixels = spatial_layer.width * spatial_layer.height; + int min_bitrate_kbps = (600.0 * std::sqrt(num_pixels) - 95'000.0) / 1000.0; + spatial_layer.minBitrate = std::max(min_bitrate_kbps, 20); + spatial_layer.maxBitrate = 50 + static_cast<int>(1.6 * num_pixels / 1000.0); + spatial_layer.targetBitrate = + (spatial_layer.minBitrate + spatial_layer.maxBitrate) / 2; + } + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.h b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.h new file mode 100644 index 0000000000..05b886b9f4 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_AV1_SVC_CONFIG_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_AV1_SVC_CONFIG_H_ + +#include <vector> + +#include "absl/container/inlined_vector.h" +#include "api/video_codecs/video_codec.h" + +namespace webrtc { + +absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> +LibaomAv1EncoderSupportedScalabilityModes(); + +bool LibaomAv1EncoderSupportsScalabilityMode(ScalabilityMode scalability_mode); + +// Fills `video_codec.spatialLayers` using other members. +bool SetAv1SvcConfig(VideoCodec& video_codec, + int num_temporal_layers, + int num_spatial_layers); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_AV1_SVC_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_gn/moz.build b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_gn/moz.build new file mode 100644 index 0000000000..f3bef360d8 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("av1_svc_config_gn") diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_unittest.cc new file mode 100644 index 0000000000..9f1da9865c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/av1_svc_config_unittest.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/av1/av1_svc_config.h" + +#include "api/video_codecs/video_codec.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kDontCare = 0; + +TEST(Av1SvcConfigTest, TreatsEmptyAsL1T1) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_TRUE(video_codec.spatialLayers[0].active); + EXPECT_EQ(video_codec.spatialLayers[0].numberOfTemporalLayers, 1); + EXPECT_FALSE(video_codec.spatialLayers[1].active); +} + +TEST(Av1SvcConfigTest, ScalabilityModeFromNumberOfTemporalLayers) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/3, + /*num_spatial_layers=*/1)); + EXPECT_EQ(video_codec.spatialLayers[0].numberOfTemporalLayers, 3); +} + +TEST(Av1SvcConfigTest, ScalabilityModeFromNumberOfSpatialLayers) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/3, + /*num_spatial_layers=*/2)); + EXPECT_EQ(video_codec.spatialLayers[0].numberOfTemporalLayers, 3); + EXPECT_TRUE(video_codec.spatialLayers[0].active); + EXPECT_TRUE(video_codec.spatialLayers[1].active); + EXPECT_FALSE(video_codec.spatialLayers[2].active); +} + +TEST(Av1SvcConfigTest, SetsActiveSpatialLayersFromScalabilityMode) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + video_codec.SetScalabilityMode(ScalabilityMode::kL2T1); + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_TRUE(video_codec.spatialLayers[0].active); + EXPECT_TRUE(video_codec.spatialLayers[1].active); + EXPECT_FALSE(video_codec.spatialLayers[2].active); +} + +TEST(Av1SvcConfigTest, ConfiguresDobuleResolutionRatioFromScalabilityMode) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + video_codec.SetScalabilityMode(ScalabilityMode::kL2T1); + video_codec.width = 1200; + video_codec.height = 800; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_EQ(video_codec.spatialLayers[0].width, 600); + EXPECT_EQ(video_codec.spatialLayers[0].height, 400); + EXPECT_EQ(video_codec.spatialLayers[1].width, 1200); + EXPECT_EQ(video_codec.spatialLayers[1].height, 800); +} + +TEST(Av1SvcConfigTest, ConfiguresSmallResolutionRatioFromScalabilityMode) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + // h mode uses 1.5:1 ratio + video_codec.SetScalabilityMode(ScalabilityMode::kL2T1h); + video_codec.width = 1500; + video_codec.height = 900; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_EQ(video_codec.spatialLayers[0].width, 1000); + EXPECT_EQ(video_codec.spatialLayers[0].height, 600); + EXPECT_EQ(video_codec.spatialLayers[1].width, 1500); + EXPECT_EQ(video_codec.spatialLayers[1].height, 900); +} + +TEST(Av1SvcConfigTest, CopiesFramrate) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + // h mode uses 1.5:1 ratio + video_codec.SetScalabilityMode(ScalabilityMode::kL2T1); + video_codec.maxFramerate = 27; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_EQ(video_codec.spatialLayers[0].maxFramerate, 27); + EXPECT_EQ(video_codec.spatialLayers[1].maxFramerate, 27); +} + +TEST(Av1SvcConfigTest, SetsNumberOfTemporalLayers) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + video_codec.SetScalabilityMode(ScalabilityMode::kL1T3); + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_EQ(video_codec.spatialLayers[0].numberOfTemporalLayers, 3); +} + +TEST(Av1SvcConfigTest, CopiesMinMaxBitrateForSingleSpatialLayer) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + video_codec.SetScalabilityMode(ScalabilityMode::kL1T3); + video_codec.minBitrate = 100; + video_codec.maxBitrate = 500; + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_EQ(video_codec.spatialLayers[0].minBitrate, 100u); + EXPECT_EQ(video_codec.spatialLayers[0].maxBitrate, 500u); + EXPECT_LE(video_codec.spatialLayers[0].minBitrate, + video_codec.spatialLayers[0].targetBitrate); + EXPECT_LE(video_codec.spatialLayers[0].targetBitrate, + video_codec.spatialLayers[0].maxBitrate); +} + +TEST(Av1SvcConfigTest, SetsBitratesForMultipleSpatialLayers) { + VideoCodec video_codec; + video_codec.codecType = kVideoCodecAV1; + video_codec.SetScalabilityMode(ScalabilityMode::kL3T3); + + EXPECT_TRUE(SetAv1SvcConfig(video_codec, /*num_temporal_layers=*/kDontCare, + /*num_spatial_layers=*/kDontCare)); + + EXPECT_GT(video_codec.spatialLayers[0].minBitrate, 0u); + EXPECT_LE(video_codec.spatialLayers[0].minBitrate, + video_codec.spatialLayers[0].targetBitrate); + EXPECT_LE(video_codec.spatialLayers[0].targetBitrate, + video_codec.spatialLayers[0].maxBitrate); + + EXPECT_GT(video_codec.spatialLayers[1].minBitrate, 0u); + EXPECT_LE(video_codec.spatialLayers[1].minBitrate, + video_codec.spatialLayers[1].targetBitrate); + EXPECT_LE(video_codec.spatialLayers[1].targetBitrate, + video_codec.spatialLayers[1].maxBitrate); + + EXPECT_GT(video_codec.spatialLayers[2].minBitrate, 0u); + EXPECT_LE(video_codec.spatialLayers[2].minBitrate, + video_codec.spatialLayers[2].targetBitrate); + EXPECT_LE(video_codec.spatialLayers[2].targetBitrate, + video_codec.spatialLayers[2].maxBitrate); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.cc new file mode 100644 index 0000000000..5551666811 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/av1/dav1d_decoder.h" + +#include <algorithm> + +#include "api/scoped_refptr.h" +#include "api/video/encoded_image.h" +#include "api/video/i420_buffer.h" +#include "common_video/include/video_frame_buffer_pool.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/logging.h" +#if defined(WEBRTC_MOZILLA_BUILD) +#include "dav1d/dav1d.h" +#include "libyuv/include/libyuv/convert.h" +#else +#include "third_party/dav1d/libdav1d/include/dav1d/dav1d.h" +#include "third_party/libyuv/include/libyuv/convert.h" +#endif + +namespace webrtc { +namespace { + +class Dav1dDecoder : public VideoDecoder { + public: + Dav1dDecoder(); + Dav1dDecoder(const Dav1dDecoder&) = delete; + Dav1dDecoder& operator=(const Dav1dDecoder&) = delete; + + ~Dav1dDecoder() override; + + bool Configure(const Settings& settings) override; + int32_t Decode(const EncodedImage& encoded_image, + bool missing_frames, + int64_t render_time_ms) override; + int32_t RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) override; + int32_t Release() override; + DecoderInfo GetDecoderInfo() const override; + const char* ImplementationName() const override; + + private: + VideoFrameBufferPool buffer_pool_; + Dav1dContext* context_ = nullptr; + DecodedImageCallback* decode_complete_callback_ = nullptr; +}; + +class ScopedDav1dData { + public: + ~ScopedDav1dData() { dav1d_data_unref(&data_); } + + Dav1dData& Data() { return data_; } + + private: + Dav1dData data_ = {}; +}; + +class ScopedDav1dPicture { + public: + ~ScopedDav1dPicture() { dav1d_picture_unref(&picture_); } + + Dav1dPicture& Picture() { return picture_; } + + private: + Dav1dPicture picture_ = {}; +}; + +constexpr char kDav1dName[] = "dav1d"; + +// Calling `dav1d_data_wrap` requires a `free_callback` to be registered. +void NullFreeCallback(const uint8_t* buffer, void* opaque) {} + +Dav1dDecoder::Dav1dDecoder() + : buffer_pool_(/*zero_initialize=*/false, /*max_number_of_buffers=*/150) {} + +Dav1dDecoder::~Dav1dDecoder() { + Release(); +} + +bool Dav1dDecoder::Configure(const Settings& settings) { + Dav1dSettings s; + dav1d_default_settings(&s); + + s.n_threads = std::max(2, settings.number_of_cores()); + s.max_frame_delay = 1; // For low latency decoding. + s.all_layers = 0; // Don't output a frame for every spatial layer. + s.operating_point = 31; // Decode all operating points. + + return dav1d_open(&context_, &s) == 0; +} + +int32_t Dav1dDecoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* decode_complete_callback) { + decode_complete_callback_ = decode_complete_callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t Dav1dDecoder::Release() { + dav1d_close(&context_); + if (context_ != nullptr) { + return WEBRTC_VIDEO_CODEC_MEMORY; + } + buffer_pool_.Release(); + return WEBRTC_VIDEO_CODEC_OK; +} + +VideoDecoder::DecoderInfo Dav1dDecoder::GetDecoderInfo() const { + DecoderInfo info; + info.implementation_name = kDav1dName; + info.is_hardware_accelerated = false; + return info; +} + +const char* Dav1dDecoder::ImplementationName() const { + return kDav1dName; +} + +int32_t Dav1dDecoder::Decode(const EncodedImage& encoded_image, + bool /*missing_frames*/, + int64_t /*render_time_ms*/) { + if (!context_ || decode_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + ScopedDav1dData scoped_dav1d_data; + Dav1dData& dav1d_data = scoped_dav1d_data.Data(); + dav1d_data_wrap(&dav1d_data, encoded_image.data(), encoded_image.size(), + /*free_callback=*/&NullFreeCallback, + /*user_data=*/nullptr); + + if (int decode_res = dav1d_send_data(context_, &dav1d_data)) { + RTC_LOG(LS_WARNING) + << "Dav1dDecoder::Decode decoding failed with error code " + << decode_res; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + ScopedDav1dPicture scoped_dav1d_picture; + Dav1dPicture& dav1d_picture = scoped_dav1d_picture.Picture(); + if (int get_picture_res = dav1d_get_picture(context_, &dav1d_picture)) { + RTC_LOG(LS_WARNING) + << "Dav1dDecoder::Decode getting picture failed with error code " + << get_picture_res; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Only accept I420 pixel format and 8 bit depth. + if (dav1d_picture.p.layout != DAV1D_PIXEL_LAYOUT_I420 || + dav1d_picture.p.bpc != 8) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + rtc::scoped_refptr<I420Buffer> buffer = + buffer_pool_.CreateI420Buffer(dav1d_picture.p.w, dav1d_picture.p.h); + if (!buffer.get()) { + RTC_LOG(LS_WARNING) + << "Dav1dDecoder::Decode failed to get frame from the buffer pool."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + uint8_t* y_data = static_cast<uint8_t*>(dav1d_picture.data[0]); + uint8_t* u_data = static_cast<uint8_t*>(dav1d_picture.data[1]); + uint8_t* v_data = static_cast<uint8_t*>(dav1d_picture.data[2]); + int y_stride = dav1d_picture.stride[0]; + int uv_stride = dav1d_picture.stride[1]; + libyuv::I420Copy(y_data, y_stride, // + u_data, uv_stride, // + v_data, uv_stride, // + buffer->MutableDataY(), buffer->StrideY(), // + buffer->MutableDataU(), buffer->StrideU(), // + buffer->MutableDataV(), buffer->StrideV(), // + dav1d_picture.p.w, // + dav1d_picture.p.h); // + + VideoFrame decoded_frame = VideoFrame::Builder() + .set_video_frame_buffer(buffer) + .set_timestamp_rtp(encoded_image.Timestamp()) + .set_ntp_time_ms(encoded_image.ntp_time_ms_) + .set_color_space(encoded_image.ColorSpace()) + .build(); + + decode_complete_callback_->Decoded(decoded_frame, absl::nullopt, + absl::nullopt); + + return WEBRTC_VIDEO_CODEC_OK; +} + +} // namespace + +std::unique_ptr<VideoDecoder> CreateDav1dDecoder() { + return std::make_unique<Dav1dDecoder>(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.h b/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.h new file mode 100644 index 0000000000..c9396d1e03 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_DAV1D_DECODER_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_DAV1D_DECODER_H_ + +#include <memory> + +#include "api/video_codecs/video_decoder.h" + +namespace webrtc { + +std::unique_ptr<VideoDecoder> CreateDav1dDecoder(); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_DAV1D_DECODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder_gn/moz.build b/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder_gn/moz.build new file mode 100644 index 0000000000..c21b84284b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder_gn/moz.build @@ -0,0 +1,218 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/media/libdav1d/", + "/media/libyuv/", + "/media/libyuv/libyuv/include/", + "/third_party/dav1d/include/dav1d/", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/codecs/av1/dav1d_decoder.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "GLESv2", + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "dl", + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("dav1d_decoder_gn") diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder.cc new file mode 100644 index 0000000000..b05a1f7539 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/libaom_av1_decoder.h" + +#include <stdint.h> + +#include <memory> + +#include "absl/types/optional.h" +#include "api/scoped_refptr.h" +#include "api/video/encoded_image.h" +#include "api/video/i420_buffer.h" +#include "api/video_codecs/video_decoder.h" +#include "common_video/include/video_frame_buffer_pool.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/logging.h" +#include "third_party/libaom/source/libaom/aom/aom_decoder.h" +#include "third_party/libaom/source/libaom/aom/aomdx.h" +#include "third_party/libyuv/include/libyuv/convert.h" + +namespace webrtc { +namespace { + +constexpr int kConfigLowBitDepth = 1; // 8-bits per luma/chroma sample. +constexpr int kDecFlags = 0; // 0 signals no post processing. + +class LibaomAv1Decoder final : public VideoDecoder { + public: + LibaomAv1Decoder(); + LibaomAv1Decoder(const LibaomAv1Decoder&) = delete; + LibaomAv1Decoder& operator=(const LibaomAv1Decoder&) = delete; + ~LibaomAv1Decoder(); + + // Implements VideoDecoder. + bool Configure(const Settings& settings) override; + + // Decode an encoded video frame. + int32_t Decode(const EncodedImage& encoded_image, + bool missing_frames, + int64_t render_time_ms) override; + + int32_t RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) override; + + int32_t Release() override; + + DecoderInfo GetDecoderInfo() const override; + const char* ImplementationName() const override; + + private: + aom_codec_ctx_t context_; + bool inited_; + // Pool of memory buffers to store decoded image data for application access. + VideoFrameBufferPool buffer_pool_; + DecodedImageCallback* decode_complete_callback_; +}; + +LibaomAv1Decoder::LibaomAv1Decoder() + : context_(), // Force value initialization instead of default one. + inited_(false), + buffer_pool_(false, /*max_number_of_buffers=*/150), + decode_complete_callback_(nullptr) {} + +LibaomAv1Decoder::~LibaomAv1Decoder() { + Release(); +} + +bool LibaomAv1Decoder::Configure(const Settings& settings) { + aom_codec_dec_cfg_t config = {}; + config.threads = static_cast<unsigned int>(settings.number_of_cores()); + config.allow_lowbitdepth = kConfigLowBitDepth; + + aom_codec_err_t ret = + aom_codec_dec_init(&context_, aom_codec_av1_dx(), &config, kDecFlags); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Decoder::Configure returned " << ret + << " on aom_codec_dec_init."; + return false; + } + inited_ = true; + return true; +} + +int32_t LibaomAv1Decoder::Decode(const EncodedImage& encoded_image, + bool missing_frames, + int64_t /*render_time_ms*/) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (decode_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + // Decode one video frame. + aom_codec_err_t ret = + aom_codec_decode(&context_, encoded_image.data(), encoded_image.size(), + /*user_priv=*/nullptr); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Decoder::Decode returned " << ret + << " on aom_codec_decode."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Get decoded frame data. + int corrupted_frame = 0; + aom_codec_iter_t iter = nullptr; + while (aom_image_t* decoded_image = aom_codec_get_frame(&context_, &iter)) { + if (aom_codec_control(&context_, AOMD_GET_FRAME_CORRUPTED, + &corrupted_frame)) { + RTC_LOG(LS_WARNING) << "LibaomAv1Decoder::Decode " + "AOM_GET_FRAME_CORRUPTED."; + } + // Check that decoded image format is I420 and has 8-bit depth. + if (decoded_image->fmt != AOM_IMG_FMT_I420) { + RTC_LOG(LS_WARNING) << "LibaomAv1Decoder::Decode invalid image format"; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Return decoded frame data. + int qp; + ret = aom_codec_control(&context_, AOMD_GET_LAST_QUANTIZER, &qp); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Decoder::Decode returned " << ret + << " on control AOME_GET_LAST_QUANTIZER."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Allocate memory for decoded frame. + rtc::scoped_refptr<I420Buffer> buffer = + buffer_pool_.CreateI420Buffer(decoded_image->d_w, decoded_image->d_h); + if (!buffer.get()) { + // Pool has too many pending frames. + RTC_LOG(LS_WARNING) << "LibaomAv1Decoder::Decode returned due to lack of" + " space in decoded frame buffer pool."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Copy decoded_image to decoded_frame. + libyuv::I420Copy( + decoded_image->planes[AOM_PLANE_Y], decoded_image->stride[AOM_PLANE_Y], + decoded_image->planes[AOM_PLANE_U], decoded_image->stride[AOM_PLANE_U], + decoded_image->planes[AOM_PLANE_V], decoded_image->stride[AOM_PLANE_V], + buffer->MutableDataY(), buffer->StrideY(), buffer->MutableDataU(), + buffer->StrideU(), buffer->MutableDataV(), buffer->StrideV(), + decoded_image->d_w, decoded_image->d_h); + VideoFrame decoded_frame = VideoFrame::Builder() + .set_video_frame_buffer(buffer) + .set_timestamp_rtp(encoded_image.Timestamp()) + .set_ntp_time_ms(encoded_image.ntp_time_ms_) + .set_color_space(encoded_image.ColorSpace()) + .build(); + + decode_complete_callback_->Decoded(decoded_frame, absl::nullopt, + absl::nullopt); + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t LibaomAv1Decoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* decode_complete_callback) { + decode_complete_callback_ = decode_complete_callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t LibaomAv1Decoder::Release() { + if (aom_codec_destroy(&context_) != AOM_CODEC_OK) { + return WEBRTC_VIDEO_CODEC_MEMORY; + } + buffer_pool_.Release(); + inited_ = false; + return WEBRTC_VIDEO_CODEC_OK; +} + +VideoDecoder::DecoderInfo LibaomAv1Decoder::GetDecoderInfo() const { + DecoderInfo info; + info.implementation_name = "libaom"; + info.is_hardware_accelerated = false; + return info; +} + +const char* LibaomAv1Decoder::ImplementationName() const { + return "libaom"; +} + +} // namespace + +ABSL_CONST_INIT const bool kIsLibaomAv1DecoderSupported = true; + +std::unique_ptr<VideoDecoder> CreateLibaomAv1Decoder() { + return std::make_unique<LibaomAv1Decoder>(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder.h b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder.h new file mode 100644 index 0000000000..9b01285c73 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_AV1_DECODER_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_AV1_DECODER_H_ + +#include <memory> + +#include "absl/base/attributes.h" +#include "api/video_codecs/video_decoder.h" + +namespace webrtc { + +ABSL_CONST_INIT extern const bool kIsLibaomAv1DecoderSupported; + +std::unique_ptr<VideoDecoder> CreateLibaomAv1Decoder(); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_AV1_DECODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_absent.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_absent.cc new file mode 100644 index 0000000000..1b387d17ed --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_absent.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/libaom_av1_decoder.h" + +#include <memory> + +#include "api/video_codecs/video_decoder.h" + +namespace webrtc { + +ABSL_CONST_INIT const bool kIsLibaomAv1DecoderSupported = false; + +std::unique_ptr<VideoDecoder> CreateLibaomAv1Decoder() { + return nullptr; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_gn/moz.build b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_gn/moz.build new file mode 100644 index 0000000000..15ec27b21d --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_decoder_absent.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("aom_av1_decoder_gn") diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc new file mode 100644 index 0000000000..807513bc7b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc @@ -0,0 +1,819 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/libaom_av1_encoder.h" + +#include <stddef.h> +#include <stdint.h> + +#include <memory> +#include <utility> +#include <vector> + +#include "absl/algorithm/container.h" +#include "absl/base/macros.h" +#include "absl/types/optional.h" +#include "api/scoped_refptr.h" +#include "api/video/encoded_image.h" +#include "api/video/i420_buffer.h" +#include "api/video/video_frame.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "third_party/libaom/source/libaom/aom/aom_codec.h" +#include "third_party/libaom/source/libaom/aom/aom_encoder.h" +#include "third_party/libaom/source/libaom/aom/aomcx.h" + +#define SET_ENCODER_PARAM_OR_RETURN_ERROR(param_id, param_value) \ + do { \ + if (!SetEncoderControlParameters(param_id, param_value)) { \ + return WEBRTC_VIDEO_CODEC_ERROR; \ + } \ + } while (0) + +namespace webrtc { +namespace { + +// Encoder configuration parameters +constexpr int kQpMin = 10; +constexpr int kUsageProfile = AOM_USAGE_REALTIME; +constexpr int kMinQindex = 145; // Min qindex threshold for QP scaling. +constexpr int kMaxQindex = 205; // Max qindex threshold for QP scaling. +constexpr int kBitDepth = 8; +constexpr int kLagInFrames = 0; // No look ahead. +constexpr int kRtpTicksPerSecond = 90000; +constexpr float kMinimumFrameRate = 1.0; + +aom_superblock_size_t GetSuperblockSize(int width, int height, int threads) { + int resolution = width * height; + if (threads >= 4 && resolution >= 960 * 540 && resolution < 1920 * 1080) + return AOM_SUPERBLOCK_SIZE_64X64; + else + return AOM_SUPERBLOCK_SIZE_DYNAMIC; +} + +class LibaomAv1Encoder final : public VideoEncoder { + public: + explicit LibaomAv1Encoder( + const absl::optional<LibaomAv1EncoderAuxConfig>& aux_config); + ~LibaomAv1Encoder(); + + int InitEncode(const VideoCodec* codec_settings, + const Settings& settings) override; + + int32_t RegisterEncodeCompleteCallback( + EncodedImageCallback* encoded_image_callback) override; + + int32_t Release() override; + + int32_t Encode(const VideoFrame& frame, + const std::vector<VideoFrameType>* frame_types) override; + + void SetRates(const RateControlParameters& parameters) override; + + EncoderInfo GetEncoderInfo() const override; + + private: + template <typename P> + bool SetEncoderControlParameters(int param_id, P param_value); + + // Get value to be used for encoder cpu_speed setting + int GetCpuSpeed(int width, int height); + + // Determine number of encoder threads to use. + int NumberOfThreads(int width, int height, int number_of_cores); + + bool SvcEnabled() const { return svc_params_.has_value(); } + // Fills svc_params_ memeber value. Returns false on error. + bool SetSvcParams(ScalableVideoController::StreamLayersConfig svc_config); + // Configures the encoder with layer for the next frame. + void SetSvcLayerId( + const ScalableVideoController::LayerFrameConfig& layer_frame); + // Configures the encoder which buffers next frame updates and can reference. + void SetSvcRefFrameConfig( + const ScalableVideoController::LayerFrameConfig& layer_frame); + // If pixel format doesn't match, then reallocate. + void MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt); + + std::unique_ptr<ScalableVideoController> svc_controller_; + bool inited_; + bool rates_configured_; + absl::optional<aom_svc_params_t> svc_params_; + VideoCodec encoder_settings_; + absl::optional<LibaomAv1EncoderAuxConfig> aux_config_; + aom_image_t* frame_for_encode_; + aom_codec_ctx_t ctx_; + aom_codec_enc_cfg_t cfg_; + EncodedImageCallback* encoded_image_callback_; +}; + +int32_t VerifyCodecSettings(const VideoCodec& codec_settings) { + if (codec_settings.width < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (codec_settings.height < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // maxBitrate == 0 represents an unspecified maxBitRate. + if (codec_settings.maxBitrate > 0 && + codec_settings.minBitrate > codec_settings.maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (codec_settings.maxBitrate > 0 && + codec_settings.startBitrate > codec_settings.maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (codec_settings.startBitrate < codec_settings.minBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (codec_settings.maxFramerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +LibaomAv1Encoder::LibaomAv1Encoder( + const absl::optional<LibaomAv1EncoderAuxConfig>& aux_config) + : inited_(false), + rates_configured_(false), + aux_config_(aux_config), + frame_for_encode_(nullptr), + encoded_image_callback_(nullptr) {} + +LibaomAv1Encoder::~LibaomAv1Encoder() { + Release(); +} + +int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings, + const Settings& settings) { + if (codec_settings == nullptr) { + RTC_LOG(LS_WARNING) << "No codec settings provided to " + "LibaomAv1Encoder."; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (settings.number_of_cores < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inited_) { + RTC_LOG(LS_WARNING) << "Initing LibaomAv1Encoder without first releasing."; + Release(); + } + encoder_settings_ = *codec_settings; + + // Sanity checks for encoder configuration. + const int32_t result = VerifyCodecSettings(encoder_settings_); + if (result < 0) { + RTC_LOG(LS_WARNING) << "Incorrect codec settings provided to " + "LibaomAv1Encoder."; + return result; + } + if (encoder_settings_.numberOfSimulcastStreams > 1) { + RTC_LOG(LS_WARNING) << "Simulcast is not implemented by LibaomAv1Encoder."; + return result; + } + absl::optional<ScalabilityMode> scalability_mode = + encoder_settings_.GetScalabilityMode(); + if (!scalability_mode.has_value()) { + RTC_LOG(LS_WARNING) << "Scalability mode is not set, using 'L1T1'."; + scalability_mode = ScalabilityMode::kL1T1; + } + svc_controller_ = CreateScalabilityStructure(*scalability_mode); + if (svc_controller_ == nullptr) { + RTC_LOG(LS_WARNING) << "Failed to set scalability mode " + << static_cast<int>(*scalability_mode); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + if (!SetSvcParams(svc_controller_->StreamConfig())) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Initialize encoder configuration structure with default values + aom_codec_err_t ret = + aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg_, kUsageProfile); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret + << " on aom_codec_enc_config_default."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Overwrite default config with input encoder settings & RTC-relevant values. + cfg_.g_w = encoder_settings_.width; + cfg_.g_h = encoder_settings_.height; + cfg_.g_threads = + NumberOfThreads(cfg_.g_w, cfg_.g_h, settings.number_of_cores); + cfg_.g_timebase.num = 1; + cfg_.g_timebase.den = kRtpTicksPerSecond; + cfg_.rc_target_bitrate = encoder_settings_.maxBitrate; // kilobits/sec. + cfg_.g_input_bit_depth = kBitDepth; + cfg_.kf_mode = AOM_KF_DISABLED; + cfg_.rc_min_quantizer = kQpMin; + cfg_.rc_max_quantizer = encoder_settings_.qpMax; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_initial_sz = 600; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.g_usage = kUsageProfile; + cfg_.g_error_resilient = 0; + // Low-latency settings. + cfg_.rc_end_usage = AOM_CBR; // Constant Bit Rate (CBR) mode + cfg_.g_pass = AOM_RC_ONE_PASS; // One-pass rate control + cfg_.g_lag_in_frames = kLagInFrames; // No look ahead when lag equals 0. + + if (frame_for_encode_ != nullptr) { + aom_img_free(frame_for_encode_); + frame_for_encode_ = nullptr; + } + + // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH + aom_codec_flags_t flags = 0; + + // Initialize an encoder instance. + ret = aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, flags); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret + << " on aom_codec_enc_init."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + inited_ = true; + + // Set control parameters + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, + GetCpuSpeed(cfg_.g_w, cfg_.g_h)); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CDEF, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MODE_COST_UPD_FREQ, 3); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MV_COST_UPD_FREQ, 3); + + if (codec_settings->mode == VideoCodecMode::kScreensharing) { + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TUNE_CONTENT, + AOM_CONTENT_SCREEN); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 1); + } else { + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 0); + } + + if (cfg_.g_threads == 4 && cfg_.g_w == 640 && + (cfg_.g_h == 360 || cfg_.g_h == 480)) { + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, + static_cast<int>(log2(cfg_.g_threads))); + } else { + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, + static_cast<int>(log2(cfg_.g_threads))); + } + + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ROW_MT, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_OBMC, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_NOISE_SENSITIVITY, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_WARPED_MOTION, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_REF_FRAME_MVS, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR( + AV1E_SET_SUPERBLOCK_SIZE, + GetSuperblockSize(cfg_.g_w, cfg_.g_h, cfg_.g_threads)); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CFL_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ANGLE_DELTA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_FILTER_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DISABLE_TRELLIS_QUANT, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIST_WTD_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DUAL_FILTER, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRABC, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_MASKED_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PAETH_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_QM, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RECT_PARTITIONS, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RESTORATION, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TX64, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MAX_REFERENCE_FRAMES, 3); + + return WEBRTC_VIDEO_CODEC_OK; +} + +template <typename P> +bool LibaomAv1Encoder::SetEncoderControlParameters(int param_id, + P param_value) { + aom_codec_err_t error_code = aom_codec_control(&ctx_, param_id, param_value); + if (error_code != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) + << "LibaomAv1Encoder::SetEncoderControlParameters returned " + << error_code << " on id: " << param_id << "."; + } + return error_code == AOM_CODEC_OK; +} + +// Only positive speeds, range for real-time coding currently is: 6 - 8. +// Lower means slower/better quality, higher means fastest/lower quality. +int LibaomAv1Encoder::GetCpuSpeed(int width, int height) { + if (aux_config_) { + if (auto it = aux_config_->max_pixel_count_to_cpu_speed.lower_bound(width * + height); + it != aux_config_->max_pixel_count_to_cpu_speed.end()) { + return it->second; + } + + return 10; + } else { + // For smaller resolutions, use lower speed setting (get some coding gain at + // the cost of increased encoding complexity). + switch (encoder_settings_.GetVideoEncoderComplexity()) { + case VideoCodecComplexity::kComplexityHigh: + if (width * height <= 320 * 180) + return 8; + else if (width * height <= 640 * 360) + return 9; + else + return 10; + case VideoCodecComplexity::kComplexityHigher: + if (width * height <= 320 * 180) + return 7; + else if (width * height <= 640 * 360) + return 8; + else if (width * height <= 1280 * 720) + return 9; + else + return 10; + case VideoCodecComplexity::kComplexityMax: + if (width * height <= 320 * 180) + return 6; + else if (width * height <= 640 * 360) + return 7; + else if (width * height <= 1280 * 720) + return 8; + else + return 9; + default: + return 10; + } + } +} + +int LibaomAv1Encoder::NumberOfThreads(int width, + int height, + int number_of_cores) { + // Keep the number of encoder threads equal to the possible number of + // column/row tiles, which is (1, 2, 4, 8). See comments below for + // AV1E_SET_TILE_COLUMNS/ROWS. + if (width * height >= 640 * 360 && number_of_cores > 4) { + return 4; + } else if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } else { +// Use 2 threads for low res on ARM. +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) + if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } +#endif + // 1 thread less than VGA. + return 1; + } +} + +bool LibaomAv1Encoder::SetSvcParams( + ScalableVideoController::StreamLayersConfig svc_config) { + bool svc_enabled = + svc_config.num_spatial_layers > 1 || svc_config.num_temporal_layers > 1; + if (!svc_enabled) { + svc_params_ = absl::nullopt; + return true; + } + if (svc_config.num_spatial_layers < 1 || svc_config.num_spatial_layers > 4) { + RTC_LOG(LS_WARNING) << "Av1 supports up to 4 spatial layers. " + << svc_config.num_spatial_layers << " configured."; + return false; + } + if (svc_config.num_temporal_layers < 1 || + svc_config.num_temporal_layers > 8) { + RTC_LOG(LS_WARNING) << "Av1 supports up to 8 temporal layers. " + << svc_config.num_temporal_layers << " configured."; + return false; + } + aom_svc_params_t& svc_params = svc_params_.emplace(); + svc_params.number_spatial_layers = svc_config.num_spatial_layers; + svc_params.number_temporal_layers = svc_config.num_temporal_layers; + + int num_layers = + svc_config.num_spatial_layers * svc_config.num_temporal_layers; + for (int i = 0; i < num_layers; ++i) { + svc_params.min_quantizers[i] = kQpMin; + svc_params.max_quantizers[i] = encoder_settings_.qpMax; + } + + // Assume each temporal layer doubles framerate. + for (int tid = 0; tid < svc_config.num_temporal_layers; ++tid) { + svc_params.framerate_factor[tid] = + 1 << (svc_config.num_temporal_layers - tid - 1); + } + + for (int sid = 0; sid < svc_config.num_spatial_layers; ++sid) { + svc_params.scaling_factor_num[sid] = svc_config.scaling_factor_num[sid]; + svc_params.scaling_factor_den[sid] = svc_config.scaling_factor_den[sid]; + } + + return true; +} + +void LibaomAv1Encoder::SetSvcLayerId( + const ScalableVideoController::LayerFrameConfig& layer_frame) { + aom_svc_layer_id_t layer_id = {}; + layer_id.spatial_layer_id = layer_frame.SpatialId(); + layer_id.temporal_layer_id = layer_frame.TemporalId(); + SetEncoderControlParameters(AV1E_SET_SVC_LAYER_ID, &layer_id); +} + +void LibaomAv1Encoder::SetSvcRefFrameConfig( + const ScalableVideoController::LayerFrameConfig& layer_frame) { + // Buffer name to use for each layer_frame.buffers position. In particular + // when there are 2 buffers are referenced, prefer name them last and golden, + // because av1 bitstream format has dedicated fields for these two names. + // See last_frame_idx and golden_frame_idx in the av1 spec + // https://aomediacodec.github.io/av1-spec/av1-spec.pdf + static constexpr int kPreferedSlotName[] = {0, // Last + 3, // Golden + 1, 2, 4, 5, 6}; + static constexpr int kAv1NumBuffers = 8; + + aom_svc_ref_frame_config_t ref_frame_config = {}; + RTC_CHECK_LE(layer_frame.Buffers().size(), ABSL_ARRAYSIZE(kPreferedSlotName)); + for (size_t i = 0; i < layer_frame.Buffers().size(); ++i) { + const CodecBufferUsage& buffer = layer_frame.Buffers()[i]; + int slot_name = kPreferedSlotName[i]; + RTC_CHECK_GE(buffer.id, 0); + RTC_CHECK_LT(buffer.id, kAv1NumBuffers); + ref_frame_config.ref_idx[slot_name] = buffer.id; + if (buffer.referenced) { + ref_frame_config.reference[slot_name] = 1; + } + if (buffer.updated) { + ref_frame_config.refresh[buffer.id] = 1; + } + } + + SetEncoderControlParameters(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); +} + +int32_t LibaomAv1Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* encoded_image_callback) { + encoded_image_callback_ = encoded_image_callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t LibaomAv1Encoder::Release() { + if (frame_for_encode_ != nullptr) { + aom_img_free(frame_for_encode_); + frame_for_encode_ = nullptr; + } + if (inited_) { + if (aom_codec_destroy(&ctx_)) { + return WEBRTC_VIDEO_CODEC_MEMORY; + } + inited_ = false; + } + rates_configured_ = false; + return WEBRTC_VIDEO_CODEC_OK; +} + +void LibaomAv1Encoder::MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt) { + if (!frame_for_encode_) { + frame_for_encode_ = + aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr); + + } else if (frame_for_encode_->fmt != fmt) { + RTC_LOG(LS_INFO) << "Switching AV1 encoder pixel format to " + << (fmt == AOM_IMG_FMT_NV12 ? "NV12" : "I420"); + aom_img_free(frame_for_encode_); + frame_for_encode_ = + aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr); + } + // else no-op since the image is already in the right format. +} + +int32_t LibaomAv1Encoder::Encode( + const VideoFrame& frame, + const std::vector<VideoFrameType>* frame_types) { + if (!inited_ || encoded_image_callback_ == nullptr || !rates_configured_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + bool keyframe_required = + frame_types != nullptr && + absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey); + + std::vector<ScalableVideoController::LayerFrameConfig> layer_frames = + svc_controller_->NextFrameConfig(keyframe_required); + + if (layer_frames.empty()) { + RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + rtc::scoped_refptr<VideoFrameBuffer> buffer = frame.video_frame_buffer(); + absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> + supported_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer; + if (buffer->type() != VideoFrameBuffer::Type::kNative) { + // `buffer` is already mapped. + mapped_buffer = buffer; + } else { + // Attempt to map to one of the supported formats. + mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats); + } + + // Convert input frame to I420, if needed. + if (!mapped_buffer || + (absl::c_find(supported_formats, mapped_buffer->type()) == + supported_formats.end() && + mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) { + rtc::scoped_refptr<I420BufferInterface> converted_buffer(buffer->ToI420()); + if (!converted_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + frame.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE; + } + RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 || + converted_buffer->type() == VideoFrameBuffer::Type::kI420A); + + mapped_buffer = converted_buffer; + } + + switch (mapped_buffer->type()) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI420A: { + // Set frame_for_encode_ data pointers and strides. + MaybeRewrapImgWithFormat(AOM_IMG_FMT_I420); + auto i420_buffer = mapped_buffer->GetI420(); + RTC_DCHECK(i420_buffer); + frame_for_encode_->planes[AOM_PLANE_Y] = + const_cast<unsigned char*>(i420_buffer->DataY()); + frame_for_encode_->planes[AOM_PLANE_U] = + const_cast<unsigned char*>(i420_buffer->DataU()); + frame_for_encode_->planes[AOM_PLANE_V] = + const_cast<unsigned char*>(i420_buffer->DataV()); + frame_for_encode_->stride[AOM_PLANE_Y] = i420_buffer->StrideY(); + frame_for_encode_->stride[AOM_PLANE_U] = i420_buffer->StrideU(); + frame_for_encode_->stride[AOM_PLANE_V] = i420_buffer->StrideV(); + break; + } + case VideoFrameBuffer::Type::kNV12: { + MaybeRewrapImgWithFormat(AOM_IMG_FMT_NV12); + const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12(); + RTC_DCHECK(nv12_buffer); + frame_for_encode_->planes[AOM_PLANE_Y] = + const_cast<unsigned char*>(nv12_buffer->DataY()); + frame_for_encode_->planes[AOM_PLANE_U] = + const_cast<unsigned char*>(nv12_buffer->DataUV()); + frame_for_encode_->planes[AOM_PLANE_V] = nullptr; + frame_for_encode_->stride[AOM_PLANE_Y] = nv12_buffer->StrideY(); + frame_for_encode_->stride[AOM_PLANE_U] = nv12_buffer->StrideUV(); + frame_for_encode_->stride[AOM_PLANE_V] = 0; + break; + } + default: + return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE; + } + + const uint32_t duration = + kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate); + + const size_t num_spatial_layers = + svc_params_ ? svc_params_->number_spatial_layers : 1; + auto next_layer_frame = layer_frames.begin(); + for (size_t i = 0; i < num_spatial_layers; ++i) { + // The libaom AV1 encoder requires that `aom_codec_encode` is called for + // every spatial layer, even if the configured bitrate for that layer is + // zero. For zero bitrate spatial layers no frames will be produced. + absl::optional<ScalableVideoController::LayerFrameConfig> + non_encoded_layer_frame; + ScalableVideoController::LayerFrameConfig* layer_frame; + if (next_layer_frame != layer_frames.end() && + next_layer_frame->SpatialId() == static_cast<int>(i)) { + layer_frame = &*next_layer_frame; + ++next_layer_frame; + } else { + // For layers that are not encoded only the spatial id matters. + non_encoded_layer_frame.emplace().S(i); + layer_frame = &*non_encoded_layer_frame; + } + const bool end_of_picture = (next_layer_frame == layer_frames.end()); + + aom_enc_frame_flags_t flags = + layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0; + + if (SvcEnabled()) { + SetSvcLayerId(*layer_frame); + SetSvcRefFrameConfig(*layer_frame); + + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ERROR_RESILIENT_MODE, + layer_frame->TemporalId() > 0 ? 1 : 0); + } + + // Encode a frame. + aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_, + frame.timestamp(), duration, flags); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret + << " on aom_codec_encode."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (non_encoded_layer_frame) { + continue; + } + + // Get encoded image data. + EncodedImage encoded_image; + aom_codec_iter_t iter = nullptr; + int data_pkt_count = 0; + while (const aom_codec_cx_pkt_t* pkt = + aom_codec_get_cx_data(&ctx_, &iter)) { + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) { + if (data_pkt_count > 0) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than " + "one data packet for an input video frame."; + Release(); + } + encoded_image.SetEncodedData(EncodedImageBuffer::Create( + /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf), + /*size=*/pkt->data.frame.sz)); + + if ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0) { + layer_frame->Keyframe(); + } + + encoded_image._frameType = layer_frame->IsKeyframe() + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + encoded_image.SetTimestamp(frame.timestamp()); + encoded_image.capture_time_ms_ = frame.render_time_ms(); + encoded_image.rotation_ = frame.rotation(); + encoded_image.content_type_ = VideoContentType::UNSPECIFIED; + // If encoded image width/height info are added to aom_codec_cx_pkt_t, + // use those values in lieu of the values in frame. + if (svc_params_) { + int n = svc_params_->scaling_factor_num[layer_frame->SpatialId()]; + int d = svc_params_->scaling_factor_den[layer_frame->SpatialId()]; + encoded_image._encodedWidth = cfg_.g_w * n / d; + encoded_image._encodedHeight = cfg_.g_h * n / d; + encoded_image.SetSpatialIndex(layer_frame->SpatialId()); + encoded_image.SetTemporalIndex(layer_frame->TemporalId()); + } else { + encoded_image._encodedWidth = cfg_.g_w; + encoded_image._encodedHeight = cfg_.g_h; + } + encoded_image.timing_.flags = VideoSendTiming::kInvalid; + + int qp = -1; + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp); + encoded_image.qp_ = qp; + + encoded_image.SetColorSpace(frame.color_space()); + ++data_pkt_count; + } + } + + // Deliver encoded image data. + if (encoded_image.size() > 0) { + CodecSpecificInfo codec_specific_info; + codec_specific_info.codecType = kVideoCodecAV1; + codec_specific_info.end_of_picture = end_of_picture; + bool is_keyframe = layer_frame->IsKeyframe(); + codec_specific_info.generic_frame_info = + svc_controller_->OnEncodeDone(*layer_frame); + if (is_keyframe && codec_specific_info.generic_frame_info) { + codec_specific_info.template_structure = + svc_controller_->DependencyStructure(); + auto& resolutions = codec_specific_info.template_structure->resolutions; + if (SvcEnabled()) { + resolutions.resize(svc_params_->number_spatial_layers); + for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) { + int n = svc_params_->scaling_factor_num[sid]; + int d = svc_params_->scaling_factor_den[sid]; + resolutions[sid] = + RenderResolution(cfg_.g_w * n / d, cfg_.g_h * n / d); + } + } else { + resolutions = {RenderResolution(cfg_.g_w, cfg_.g_h)}; + } + } + encoded_image_callback_->OnEncodedImage(encoded_image, + &codec_specific_info); + } + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +void LibaomAv1Encoder::SetRates(const RateControlParameters& parameters) { + if (!inited_) { + RTC_LOG(LS_WARNING) << "SetRates() while encoder is not initialized"; + return; + } + if (parameters.framerate_fps < kMinimumFrameRate) { + RTC_LOG(LS_WARNING) << "Unsupported framerate (must be >= " + << kMinimumFrameRate + << " ): " << parameters.framerate_fps; + return; + } + if (parameters.bitrate.get_sum_bps() == 0) { + RTC_LOG(LS_WARNING) << "Attempt to set target bit rate to zero"; + return; + } + + // The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is + // called depends on the currently configured `rc_target_bitrate`. If the + // total target bitrate is not updated first a division by zero could happen. + svc_controller_->OnRatesUpdated(parameters.bitrate); + cfg_.rc_target_bitrate = parameters.bitrate.get_sum_kbps(); + aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_); + if (error_code != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "Error configuring encoder, error code: " + << error_code; + } + + if (SvcEnabled()) { + for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) { + // libaom bitrate for spatial id S and temporal id T means bitrate + // of frames with spatial_id=S and temporal_id<=T + // while `parameters.bitrate` provdies bitrate of frames with + // spatial_id=S and temporal_id=T + int accumulated_bitrate_bps = 0; + for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) { + int layer_index = sid * svc_params_->number_temporal_layers + tid; + accumulated_bitrate_bps += parameters.bitrate.GetBitrate(sid, tid); + // `svc_params.layer_target_bitrate` expects bitrate in kbps. + svc_params_->layer_target_bitrate[layer_index] = + accumulated_bitrate_bps / 1000; + } + } + SetEncoderControlParameters(AV1E_SET_SVC_PARAMS, &*svc_params_); + } + + rates_configured_ = true; + + // Set frame rate to closest integer value. + encoder_settings_.maxFramerate = + static_cast<uint32_t>(parameters.framerate_fps + 0.5); +} + +VideoEncoder::EncoderInfo LibaomAv1Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "libaom"; + info.has_trusted_rate_controller = true; + info.is_hardware_accelerated = false; + info.scaling_settings = VideoEncoder::ScalingSettings(kMinQindex, kMaxQindex); + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + if (SvcEnabled()) { + for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) { + info.fps_allocation[sid].resize(svc_params_->number_temporal_layers); + for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) { + info.fps_allocation[sid][tid] = + encoder_settings_.maxFramerate / svc_params_->framerate_factor[tid]; + } + } + } + return info; +} + +} // namespace + +std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder() { + return std::make_unique<LibaomAv1Encoder>(absl::nullopt); +} + +std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder( + const LibaomAv1EncoderAuxConfig& aux_config) { + return std::make_unique<LibaomAv1Encoder>(aux_config); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.h b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.h new file mode 100644 index 0000000000..2fd1d5a754 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_AV1_ENCODER_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_AV1_ENCODER_H_ + +#include <map> +#include <memory> + +#include "absl/strings/string_view.h" +#include "api/video_codecs/video_encoder.h" + +namespace webrtc { +struct LibaomAv1EncoderAuxConfig { + // A map of max pixel count --> cpu speed. + std::map<int, int> max_pixel_count_to_cpu_speed; +}; + +std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(); +std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder( + const LibaomAv1EncoderAuxConfig& aux_config); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_AV1_ENCODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc new file mode 100644 index 0000000000..5243edc1e4 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/av1/libaom_av1_encoder.h" + +#include <memory> +#include <vector> + +#include "absl/types/optional.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Field; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +VideoCodec DefaultCodecSettings() { + VideoCodec codec_settings; + codec_settings.width = 320; + codec_settings.height = 180; + codec_settings.maxFramerate = 30; + codec_settings.maxBitrate = 1000; + codec_settings.qpMax = 63; + return codec_settings; +} + +VideoEncoder::Settings DefaultEncoderSettings() { + return VideoEncoder::Settings( + VideoEncoder::Capabilities(/*loss_notification=*/false), + /*number_of_cores=*/1, /*max_payload_size=*/1200); +} + +TEST(LibaomAv1EncoderTest, CanCreate) { + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + EXPECT_TRUE(encoder); +} + +TEST(LibaomAv1EncoderTest, InitAndRelease) { + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + ASSERT_TRUE(encoder); + VideoCodec codec_settings = DefaultCodecSettings(); + EXPECT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + EXPECT_EQ(encoder->Release(), WEBRTC_VIDEO_CODEC_OK); +} + +TEST(LibaomAv1EncoderTest, NoBitrateOnTopLayerRefecltedInActiveDecodeTargets) { + // Configure encoder with 2 temporal layers. + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.SetScalabilityMode(ScalabilityMode::kL1T2); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + VideoEncoder::RateControlParameters rate_parameters; + rate_parameters.framerate_fps = 30; + rate_parameters.bitrate.SetBitrate(0, /*temporal_index=*/0, 300'000); + rate_parameters.bitrate.SetBitrate(0, /*temporal_index=*/1, 0); + encoder->SetRates(rate_parameters); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(1).Encode(); + ASSERT_THAT(encoded_frames, SizeIs(1)); + ASSERT_NE(encoded_frames[0].codec_specific_info.generic_frame_info, + absl::nullopt); + // Assuming L1T2 structure uses 1st decode target for T0 and 2nd decode target + // for T0+T1 frames, expect only 1st decode target is active. + EXPECT_EQ(encoded_frames[0] + .codec_specific_info.generic_frame_info->active_decode_targets, + 0b01); +} + +TEST(LibaomAv1EncoderTest, + SpatialScalabilityInTemporalUnitReportedAsDeltaFrame) { + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.SetScalabilityMode(ScalabilityMode::kL2T1); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + VideoEncoder::RateControlParameters rate_parameters; + rate_parameters.framerate_fps = 30; + rate_parameters.bitrate.SetBitrate(/*spatial_index=*/0, 0, 300'000); + rate_parameters.bitrate.SetBitrate(/*spatial_index=*/1, 0, 300'000); + encoder->SetRates(rate_parameters); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(1).Encode(); + ASSERT_THAT(encoded_frames, SizeIs(2)); + EXPECT_THAT(encoded_frames[0].encoded_image._frameType, + Eq(VideoFrameType::kVideoFrameKey)); + EXPECT_THAT(encoded_frames[1].encoded_image._frameType, + Eq(VideoFrameType::kVideoFrameDelta)); +} + +TEST(LibaomAv1EncoderTest, NoBitrateOnTopSpatialLayerProduceDeltaFrames) { + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.SetScalabilityMode(ScalabilityMode::kL2T1); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + VideoEncoder::RateControlParameters rate_parameters; + rate_parameters.framerate_fps = 30; + rate_parameters.bitrate.SetBitrate(/*spatial_index=*/0, 0, 300'000); + rate_parameters.bitrate.SetBitrate(/*spatial_index=*/1, 0, 0); + encoder->SetRates(rate_parameters); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(2).Encode(); + ASSERT_THAT(encoded_frames, SizeIs(2)); + EXPECT_THAT(encoded_frames[0].encoded_image._frameType, + Eq(VideoFrameType::kVideoFrameKey)); + EXPECT_THAT(encoded_frames[1].encoded_image._frameType, + Eq(VideoFrameType::kVideoFrameDelta)); +} + +TEST(LibaomAv1EncoderTest, SetsEndOfPictureForLastFrameInTemporalUnit) { + VideoBitrateAllocation allocation; + allocation.SetBitrate(0, 0, 30000); + allocation.SetBitrate(1, 0, 40000); + allocation.SetBitrate(2, 0, 30000); + + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + // Configure encoder with 3 spatial layers. + codec_settings.SetScalabilityMode(ScalabilityMode::kL3T1); + codec_settings.maxBitrate = allocation.get_sum_kbps(); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(2).Encode(); + ASSERT_THAT(encoded_frames, SizeIs(6)); + EXPECT_FALSE(encoded_frames[0].codec_specific_info.end_of_picture); + EXPECT_FALSE(encoded_frames[1].codec_specific_info.end_of_picture); + EXPECT_TRUE(encoded_frames[2].codec_specific_info.end_of_picture); + EXPECT_FALSE(encoded_frames[3].codec_specific_info.end_of_picture); + EXPECT_FALSE(encoded_frames[4].codec_specific_info.end_of_picture); + EXPECT_TRUE(encoded_frames[5].codec_specific_info.end_of_picture); +} + +TEST(LibaomAv1EncoderTest, CheckOddDimensionsWithSpatialLayers) { + VideoBitrateAllocation allocation; + allocation.SetBitrate(0, 0, 30000); + allocation.SetBitrate(1, 0, 40000); + allocation.SetBitrate(2, 0, 30000); + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + // Configure encoder with 3 spatial layers. + codec_settings.SetScalabilityMode(ScalabilityMode::kL3T1); + // Odd width and height values should not make encoder crash. + codec_settings.width = 623; + codec_settings.height = 405; + codec_settings.maxBitrate = allocation.get_sum_kbps(); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + EncodedVideoFrameProducer evfp(*encoder); + evfp.SetResolution(RenderResolution{623, 405}); + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + evfp.SetNumInputFrames(2).Encode(); + ASSERT_THAT(encoded_frames, SizeIs(6)); +} + +TEST(LibaomAv1EncoderTest, EncoderInfoProvidesFpsAllocation) { + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.SetScalabilityMode(ScalabilityMode::kL3T3); + codec_settings.maxFramerate = 60; + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + const auto& encoder_info = encoder->GetEncoderInfo(); + EXPECT_THAT(encoder_info.fps_allocation[0], ElementsAre(15, 30, 60)); + EXPECT_THAT(encoder_info.fps_allocation[1], ElementsAre(15, 30, 60)); + EXPECT_THAT(encoder_info.fps_allocation[2], ElementsAre(15, 30, 60)); + EXPECT_THAT(encoder_info.fps_allocation[3], IsEmpty()); +} + +TEST(LibaomAv1EncoderTest, PopulatesEncodedFrameSize) { + VideoBitrateAllocation allocation; + allocation.SetBitrate(0, 0, 30000); + allocation.SetBitrate(1, 0, 40000); + allocation.SetBitrate(2, 0, 30000); + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.maxBitrate = allocation.get_sum_kbps(); + ASSERT_GT(codec_settings.width, 4); + // Configure encoder with 3 spatial layers. + codec_settings.SetScalabilityMode(ScalabilityMode::kL3T1); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + using Frame = EncodedVideoFrameProducer::EncodedFrame; + std::vector<Frame> encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(1).Encode(); + EXPECT_THAT( + encoded_frames, + ElementsAre( + Field(&Frame::encoded_image, + AllOf(Field(&EncodedImage::_encodedWidth, + codec_settings.width / 4), + Field(&EncodedImage::_encodedHeight, + codec_settings.height / 4))), + Field(&Frame::encoded_image, + AllOf(Field(&EncodedImage::_encodedWidth, + codec_settings.width / 2), + Field(&EncodedImage::_encodedHeight, + codec_settings.height / 2))), + Field(&Frame::encoded_image, + AllOf(Field(&EncodedImage::_encodedWidth, codec_settings.width), + Field(&EncodedImage::_encodedHeight, + codec_settings.height))))); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_unittest.cc new file mode 100644 index 0000000000..5d9c251bc7 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/av1/libaom_av1_unittest.cc @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <memory> +#include <ostream> +#include <tuple> +#include <vector> + +#include "absl/types/optional.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/codecs/av1/libaom_av1_decoder.h" +#include "modules/video_coding/codecs/av1/libaom_av1_encoder.h" +#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::ContainerEq; +using ::testing::Each; +using ::testing::ElementsAreArray; +using ::testing::Ge; +using ::testing::IsEmpty; +using ::testing::Not; +using ::testing::NotNull; +using ::testing::Pointwise; +using ::testing::SizeIs; +using ::testing::Truly; +using ::testing::Values; + +// Use small resolution for this test to make it faster. +constexpr int kWidth = 320; +constexpr int kHeight = 180; +constexpr int kFramerate = 30; + +VideoCodec DefaultCodecSettings() { + VideoCodec codec_settings; + codec_settings.SetScalabilityMode(ScalabilityMode::kL1T1); + codec_settings.width = kWidth; + codec_settings.height = kHeight; + codec_settings.maxFramerate = kFramerate; + codec_settings.maxBitrate = 1000; + codec_settings.qpMax = 63; + return codec_settings; +} +VideoEncoder::Settings DefaultEncoderSettings() { + return VideoEncoder::Settings( + VideoEncoder::Capabilities(/*loss_notification=*/false), + /*number_of_cores=*/1, /*max_payload_size=*/1200); +} + +class TestAv1Decoder { + public: + explicit TestAv1Decoder(int decoder_id) + : decoder_id_(decoder_id), decoder_(CreateLibaomAv1Decoder()) { + if (decoder_ == nullptr) { + ADD_FAILURE() << "Failed to create a decoder#" << decoder_id_; + return; + } + EXPECT_TRUE(decoder_->Configure({})); + EXPECT_EQ(decoder_->RegisterDecodeCompleteCallback(&callback_), + WEBRTC_VIDEO_CODEC_OK); + } + // This class requires pointer stability and thus not copyable nor movable. + TestAv1Decoder(const TestAv1Decoder&) = delete; + TestAv1Decoder& operator=(const TestAv1Decoder&) = delete; + + void Decode(int64_t frame_id, const EncodedImage& image) { + ASSERT_THAT(decoder_, NotNull()); + int32_t error = decoder_->Decode(image, /*missing_frames=*/false, + /*render_time_ms=*/image.capture_time_ms_); + if (error != WEBRTC_VIDEO_CODEC_OK) { + ADD_FAILURE() << "Failed to decode frame id " << frame_id + << " with error code " << error << " by decoder#" + << decoder_id_; + return; + } + decoded_ids_.push_back(frame_id); + } + + const std::vector<int64_t>& decoded_frame_ids() const { return decoded_ids_; } + size_t num_output_frames() const { return callback_.num_called(); } + + private: + // Decoder callback that only counts how many times it was called. + // While it is tempting to replace it with a simple mock, that one requires + // to set expectation on number of calls in advance. Tests below unsure about + // expected number of calls until after calls are done. + class DecoderCallback : public DecodedImageCallback { + public: + size_t num_called() const { return num_called_; } + + private: + int32_t Decoded(VideoFrame& /*decoded_image*/) override { + ++num_called_; + return 0; + } + void Decoded(VideoFrame& /*decoded_image*/, + absl::optional<int32_t> /*decode_time_ms*/, + absl::optional<uint8_t> /*qp*/) override { + ++num_called_; + } + + int num_called_ = 0; + }; + + const int decoder_id_; + std::vector<int64_t> decoded_ids_; + DecoderCallback callback_; + const std::unique_ptr<VideoDecoder> decoder_; +}; + +TEST(LibaomAv1Test, EncodeDecode) { + TestAv1Decoder decoder(0); + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + VideoBitrateAllocation allocation; + allocation.SetBitrate(0, 0, 300000); + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(4).Encode(); + for (size_t frame_id = 0; frame_id < encoded_frames.size(); ++frame_id) { + decoder.Decode(static_cast<int64_t>(frame_id), + encoded_frames[frame_id].encoded_image); + } + + // Check encoder produced some frames for decoder to decode. + ASSERT_THAT(encoded_frames, Not(IsEmpty())); + // Check decoder found all of them valid. + EXPECT_THAT(decoder.decoded_frame_ids(), SizeIs(encoded_frames.size())); + // Check each of them produced an output frame. + EXPECT_EQ(decoder.num_output_frames(), decoder.decoded_frame_ids().size()); +} + +struct LayerId { + friend bool operator==(const LayerId& lhs, const LayerId& rhs) { + return std::tie(lhs.spatial_id, lhs.temporal_id) == + std::tie(rhs.spatial_id, rhs.temporal_id); + } + friend bool operator<(const LayerId& lhs, const LayerId& rhs) { + return std::tie(lhs.spatial_id, lhs.temporal_id) < + std::tie(rhs.spatial_id, rhs.temporal_id); + } + friend std::ostream& operator<<(std::ostream& s, const LayerId& layer) { + return s << "S" << layer.spatial_id << "T" << layer.temporal_id; + } + + int spatial_id = 0; + int temporal_id = 0; +}; + +struct SvcTestParam { + ScalabilityMode GetScalabilityMode() const { + absl::optional<ScalabilityMode> scalability_mode = + ScalabilityModeFromString(name); + RTC_CHECK(scalability_mode.has_value()); + return *scalability_mode; + } + + std::string name; + int num_frames_to_generate; + std::map<LayerId, DataRate> configured_bitrates; +}; + +class LibaomAv1SvcTest : public ::testing::TestWithParam<SvcTestParam> {}; + +TEST_P(LibaomAv1SvcTest, EncodeAndDecodeAllDecodeTargets) { + const SvcTestParam param = GetParam(); + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(param.GetScalabilityMode()); + ASSERT_TRUE(svc_controller); + VideoBitrateAllocation allocation; + if (param.configured_bitrates.empty()) { + ScalableVideoController::StreamLayersConfig config = + svc_controller->StreamConfig(); + for (int sid = 0; sid < config.num_spatial_layers; ++sid) { + for (int tid = 0; tid < config.num_temporal_layers; ++tid) { + allocation.SetBitrate(sid, tid, 100'000); + } + } + } else { + for (const auto& kv : param.configured_bitrates) { + allocation.SetBitrate(kv.first.spatial_id, kv.first.temporal_id, + kv.second.bps()); + } + } + + size_t num_decode_targets = + svc_controller->DependencyStructure().num_decode_targets; + + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.SetScalabilityMode(GetParam().GetScalabilityMode()); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(GetParam().num_frames_to_generate) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT( + encoded_frames, + Each(Truly([&](const EncodedVideoFrameProducer::EncodedFrame& frame) { + return frame.codec_specific_info.generic_frame_info && + frame.codec_specific_info.generic_frame_info + ->decode_target_indications.size() == num_decode_targets; + }))); + + for (size_t dt = 0; dt < num_decode_targets; ++dt) { + TestAv1Decoder decoder(dt); + std::vector<int64_t> requested_ids; + for (int64_t frame_id = 0; + frame_id < static_cast<int64_t>(encoded_frames.size()); ++frame_id) { + const EncodedVideoFrameProducer::EncodedFrame& frame = + encoded_frames[frame_id]; + if (frame.codec_specific_info.generic_frame_info + ->decode_target_indications[dt] != + DecodeTargetIndication::kNotPresent) { + requested_ids.push_back(frame_id); + decoder.Decode(frame_id, frame.encoded_image); + } + } + + ASSERT_THAT(requested_ids, SizeIs(Ge(2u))); + // Check decoder found all of them valid. + EXPECT_THAT(decoder.decoded_frame_ids(), ContainerEq(requested_ids)) + << "Decoder#" << dt; + // Check each of them produced an output frame. + EXPECT_EQ(decoder.num_output_frames(), decoder.decoded_frame_ids().size()) + << "Decoder#" << dt; + } +} + +MATCHER(SameLayerIdAndBitrateIsNear, "") { + // First check if layer id is the same. + return std::get<0>(arg).first == std::get<1>(arg).first && + // check measured bitrate is not much lower than requested. + std::get<0>(arg).second >= std::get<1>(arg).second * 0.8 && + // check measured bitrate is not much larger than requested. + std::get<0>(arg).second <= std::get<1>(arg).second * 1.1; +} + +TEST_P(LibaomAv1SvcTest, SetRatesMatchMeasuredBitrate) { + const SvcTestParam param = GetParam(); + if (param.configured_bitrates.empty()) { + // Rates are not configured for this particular structure, skip the test. + return; + } + constexpr TimeDelta kDuration = TimeDelta::Seconds(5); + + VideoBitrateAllocation allocation; + for (const auto& kv : param.configured_bitrates) { + allocation.SetBitrate(kv.first.spatial_id, kv.first.temporal_id, + kv.second.bps()); + } + + std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder(); + ASSERT_TRUE(encoder); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.SetScalabilityMode(param.GetScalabilityMode()); + codec_settings.maxBitrate = allocation.get_sum_kbps(); + codec_settings.maxFramerate = 30; + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(codec_settings.maxFramerate * kDuration.seconds()) + .SetResolution({codec_settings.width, codec_settings.height}) + .SetFramerateFps(codec_settings.maxFramerate) + .Encode(); + + // Calculate size of each layer. + std::map<LayerId, DataSize> layer_size; + for (const auto& frame : encoded_frames) { + ASSERT_TRUE(frame.codec_specific_info.generic_frame_info); + const auto& layer = *frame.codec_specific_info.generic_frame_info; + LayerId layer_id = {layer.spatial_id, layer.temporal_id}; + // This is almost same as + // layer_size[layer_id] += DataSize::Bytes(frame.encoded_image.size()); + // but avoids calling deleted default constructor for DataSize. + layer_size.emplace(layer_id, DataSize::Zero()).first->second += + DataSize::Bytes(frame.encoded_image.size()); + } + // Convert size of the layer into bitrate of that layer. + std::vector<std::pair<LayerId, DataRate>> measured_bitrates; + for (const auto& kv : layer_size) { + measured_bitrates.emplace_back(kv.first, kv.second / kDuration); + } + EXPECT_THAT(measured_bitrates, Pointwise(SameLayerIdAndBitrateIsNear(), + param.configured_bitrates)); +} + +INSTANTIATE_TEST_SUITE_P( + Svc, + LibaomAv1SvcTest, + Values(SvcTestParam{"L1T1", /*num_frames_to_generate=*/4}, + SvcTestParam{"L1T2", + /*num_frames_to_generate=*/4, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(60)}, + {{0, 1}, DataRate::KilobitsPerSec(40)}}}, + SvcTestParam{"L1T3", /*num_frames_to_generate=*/8}, + SvcTestParam{"L2T1", + /*num_frames_to_generate=*/3, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(30)}, + {{1, 0}, DataRate::KilobitsPerSec(70)}}}, + SvcTestParam{"L2T1h", + /*num_frames_to_generate=*/3, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(30)}, + {{1, 0}, DataRate::KilobitsPerSec(70)}}}, + SvcTestParam{"L2T1_KEY", /*num_frames_to_generate=*/3}, + SvcTestParam{"L3T1", /*num_frames_to_generate=*/3}, + SvcTestParam{"L3T3", /*num_frames_to_generate=*/8}, + SvcTestParam{"S2T1", /*num_frames_to_generate=*/3}, + SvcTestParam{"S3T3", /*num_frames_to_generate=*/8}, + SvcTestParam{"L2T2", /*num_frames_to_generate=*/4}, + SvcTestParam{"L2T2_KEY", /*num_frames_to_generate=*/4}, + SvcTestParam{"L2T2_KEY_SHIFT", + /*num_frames_to_generate=*/4, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(70)}, + {{0, 1}, DataRate::KilobitsPerSec(30)}, + {{1, 0}, DataRate::KilobitsPerSec(110)}, + {{1, 1}, DataRate::KilobitsPerSec(80)}}}), + [](const testing::TestParamInfo<SvcTestParam>& info) { + return info.param.name; + }); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/DEPS b/third_party/libwebrtc/modules/video_coding/codecs/h264/DEPS new file mode 100644 index 0000000000..4e110917d8 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/DEPS @@ -0,0 +1,5 @@ +include_rules = [ + "+third_party/ffmpeg", + "+third_party/openh264", + "+media/base", +] diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/OWNERS b/third_party/libwebrtc/modules/video_coding/codecs/h264/OWNERS new file mode 100644 index 0000000000..4b06c4e32b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/OWNERS @@ -0,0 +1,2 @@ +sprang@webrtc.org +ssilkin@webrtc.org diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264.cc new file mode 100644 index 0000000000..23580d7a4a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include "modules/video_coding/codecs/h264/include/h264.h" + +#include <memory> +#include <string> + +#include "absl/container/inlined_vector.h" +#include "absl/types/optional.h" +#include "api/video_codecs/sdp_video_format.h" +#include "media/base/media_constants.h" +#include "rtc_base/trace_event.h" + +#if defined(WEBRTC_USE_H264) +#include "modules/video_coding/codecs/h264/h264_decoder_impl.h" +#include "modules/video_coding/codecs/h264/h264_encoder_impl.h" +#endif + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { + +#if defined(WEBRTC_USE_H264) +bool g_rtc_use_h264 = true; +#endif + +// If H.264 OpenH264/FFmpeg codec is supported. +bool IsH264CodecSupported() { +#if defined(WEBRTC_USE_H264) + return g_rtc_use_h264; +#else + return false; +#endif +} + +constexpr ScalabilityMode kSupportedScalabilityModes[] = { + ScalabilityMode::kL1T1, ScalabilityMode::kL1T2, ScalabilityMode::kL1T3}; + +} // namespace + +SdpVideoFormat CreateH264Format(H264Profile profile, + H264Level level, + const std::string& packetization_mode, + bool add_scalability_modes) { + const absl::optional<std::string> profile_string = + H264ProfileLevelIdToString(H264ProfileLevelId(profile, level)); + RTC_CHECK(profile_string); + absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> scalability_modes; + if (add_scalability_modes) { + for (const auto scalability_mode : kSupportedScalabilityModes) { + scalability_modes.push_back(scalability_mode); + } + } + return SdpVideoFormat( + cricket::kH264CodecName, + {{cricket::kH264FmtpProfileLevelId, *profile_string}, + {cricket::kH264FmtpLevelAsymmetryAllowed, "1"}, + {cricket::kH264FmtpPacketizationMode, packetization_mode}}, + scalability_modes); +} + +void DisableRtcUseH264() { +#if defined(WEBRTC_USE_H264) + g_rtc_use_h264 = false; +#endif +} + +std::vector<SdpVideoFormat> SupportedH264Codecs(bool add_scalability_modes) { + TRACE_EVENT0("webrtc", __func__); + if (!IsH264CodecSupported()) + return std::vector<SdpVideoFormat>(); + // We only support encoding Constrained Baseline Profile (CBP), but the + // decoder supports more profiles. We can list all profiles here that are + // supported by the decoder and that are also supersets of CBP, i.e. the + // decoder for that profile is required to be able to decode CBP. This means + // we can encode and send CBP even though we negotiated a potentially + // higher profile. See the H264 spec for more information. + // + // We support both packetization modes 0 (mandatory) and 1 (optional, + // preferred). + return {CreateH264Format(H264Profile::kProfileBaseline, H264Level::kLevel3_1, + "1", add_scalability_modes), + CreateH264Format(H264Profile::kProfileBaseline, H264Level::kLevel3_1, + "0", add_scalability_modes), + CreateH264Format(H264Profile::kProfileConstrainedBaseline, + H264Level::kLevel3_1, "1", add_scalability_modes), + CreateH264Format(H264Profile::kProfileConstrainedBaseline, + H264Level::kLevel3_1, "0", add_scalability_modes), + CreateH264Format(H264Profile::kProfileMain, H264Level::kLevel3_1, "1", + add_scalability_modes), + CreateH264Format(H264Profile::kProfileMain, H264Level::kLevel3_1, "0", + add_scalability_modes)}; +} + +std::vector<SdpVideoFormat> SupportedH264DecoderCodecs() { + TRACE_EVENT0("webrtc", __func__); + if (!IsH264CodecSupported()) + return std::vector<SdpVideoFormat>(); + + std::vector<SdpVideoFormat> supportedCodecs = SupportedH264Codecs(); + + // OpenH264 doesn't yet support High Predictive 4:4:4 encoding but it does + // support decoding. + supportedCodecs.push_back(CreateH264Format( + H264Profile::kProfilePredictiveHigh444, H264Level::kLevel3_1, "1")); + supportedCodecs.push_back(CreateH264Format( + H264Profile::kProfilePredictiveHigh444, H264Level::kLevel3_1, "0")); + + return supportedCodecs; +} + +std::unique_ptr<H264Encoder> H264Encoder::Create( + const cricket::VideoCodec& codec) { + RTC_DCHECK(H264Encoder::IsSupported()); +#if defined(WEBRTC_USE_H264) + RTC_CHECK(g_rtc_use_h264); + RTC_LOG(LS_INFO) << "Creating H264EncoderImpl."; + return std::make_unique<H264EncoderImpl>(codec); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +bool H264Encoder::IsSupported() { + return IsH264CodecSupported(); +} + +bool H264Encoder::SupportsScalabilityMode(ScalabilityMode scalability_mode) { + for (const auto& entry : kSupportedScalabilityModes) { + if (entry == scalability_mode) { + return true; + } + } + return false; +} + +std::unique_ptr<H264Decoder> H264Decoder::Create() { + RTC_DCHECK(H264Decoder::IsSupported()); +#if defined(WEBRTC_USE_H264) + RTC_CHECK(g_rtc_use_h264); + RTC_LOG(LS_INFO) << "Creating H264DecoderImpl."; + return std::make_unique<H264DecoderImpl>(); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +bool H264Decoder::IsSupported() { + return IsH264CodecSupported(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.cc new file mode 100644 index 0000000000..59921263e3 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.cc @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Everything declared/defined in this header is only required when WebRTC is +// build with H264 support, please do not move anything out of the +// #ifdef unless needed and tested. +#ifdef WEBRTC_USE_H264 + +#include "modules/video_coding/codecs/h264/h264_color_space.h" + +namespace webrtc { + +ColorSpace ExtractH264ColorSpace(AVCodecContext* codec) { + ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified; + switch (codec->color_primaries) { + case AVCOL_PRI_BT709: + primaries = ColorSpace::PrimaryID::kBT709; + break; + case AVCOL_PRI_BT470M: + primaries = ColorSpace::PrimaryID::kBT470M; + break; + case AVCOL_PRI_BT470BG: + primaries = ColorSpace::PrimaryID::kBT470BG; + break; + case AVCOL_PRI_SMPTE170M: + primaries = ColorSpace::PrimaryID::kSMPTE170M; + break; + case AVCOL_PRI_SMPTE240M: + primaries = ColorSpace::PrimaryID::kSMPTE240M; + break; + case AVCOL_PRI_FILM: + primaries = ColorSpace::PrimaryID::kFILM; + break; + case AVCOL_PRI_BT2020: + primaries = ColorSpace::PrimaryID::kBT2020; + break; + case AVCOL_PRI_SMPTE428: + primaries = ColorSpace::PrimaryID::kSMPTEST428; + break; + case AVCOL_PRI_SMPTE431: + primaries = ColorSpace::PrimaryID::kSMPTEST431; + break; + case AVCOL_PRI_SMPTE432: + primaries = ColorSpace::PrimaryID::kSMPTEST432; + break; + case AVCOL_PRI_JEDEC_P22: + primaries = ColorSpace::PrimaryID::kJEDECP22; + break; + case AVCOL_PRI_RESERVED0: + case AVCOL_PRI_UNSPECIFIED: + case AVCOL_PRI_RESERVED: + default: + break; + } + + ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified; + switch (codec->color_trc) { + case AVCOL_TRC_BT709: + transfer = ColorSpace::TransferID::kBT709; + break; + case AVCOL_TRC_GAMMA22: + transfer = ColorSpace::TransferID::kGAMMA22; + break; + case AVCOL_TRC_GAMMA28: + transfer = ColorSpace::TransferID::kGAMMA28; + break; + case AVCOL_TRC_SMPTE170M: + transfer = ColorSpace::TransferID::kSMPTE170M; + break; + case AVCOL_TRC_SMPTE240M: + transfer = ColorSpace::TransferID::kSMPTE240M; + break; + case AVCOL_TRC_LINEAR: + transfer = ColorSpace::TransferID::kLINEAR; + break; + case AVCOL_TRC_LOG: + transfer = ColorSpace::TransferID::kLOG; + break; + case AVCOL_TRC_LOG_SQRT: + transfer = ColorSpace::TransferID::kLOG_SQRT; + break; + case AVCOL_TRC_IEC61966_2_4: + transfer = ColorSpace::TransferID::kIEC61966_2_4; + break; + case AVCOL_TRC_BT1361_ECG: + transfer = ColorSpace::TransferID::kBT1361_ECG; + break; + case AVCOL_TRC_IEC61966_2_1: + transfer = ColorSpace::TransferID::kIEC61966_2_1; + break; + case AVCOL_TRC_BT2020_10: + transfer = ColorSpace::TransferID::kBT2020_10; + break; + case AVCOL_TRC_BT2020_12: + transfer = ColorSpace::TransferID::kBT2020_12; + break; + case AVCOL_TRC_SMPTE2084: + transfer = ColorSpace::TransferID::kSMPTEST2084; + break; + case AVCOL_TRC_SMPTE428: + transfer = ColorSpace::TransferID::kSMPTEST428; + break; + case AVCOL_TRC_ARIB_STD_B67: + transfer = ColorSpace::TransferID::kARIB_STD_B67; + break; + case AVCOL_TRC_RESERVED0: + case AVCOL_TRC_UNSPECIFIED: + case AVCOL_TRC_RESERVED: + default: + break; + } + + ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified; + switch (codec->colorspace) { + case AVCOL_SPC_RGB: + matrix = ColorSpace::MatrixID::kRGB; + break; + case AVCOL_SPC_BT709: + matrix = ColorSpace::MatrixID::kBT709; + break; + case AVCOL_SPC_FCC: + matrix = ColorSpace::MatrixID::kFCC; + break; + case AVCOL_SPC_BT470BG: + matrix = ColorSpace::MatrixID::kBT470BG; + break; + case AVCOL_SPC_SMPTE170M: + matrix = ColorSpace::MatrixID::kSMPTE170M; + break; + case AVCOL_SPC_SMPTE240M: + matrix = ColorSpace::MatrixID::kSMPTE240M; + break; + case AVCOL_SPC_YCGCO: + matrix = ColorSpace::MatrixID::kYCOCG; + break; + case AVCOL_SPC_BT2020_NCL: + matrix = ColorSpace::MatrixID::kBT2020_NCL; + break; + case AVCOL_SPC_BT2020_CL: + matrix = ColorSpace::MatrixID::kBT2020_CL; + break; + case AVCOL_SPC_SMPTE2085: + matrix = ColorSpace::MatrixID::kSMPTE2085; + break; + case AVCOL_SPC_CHROMA_DERIVED_NCL: + case AVCOL_SPC_CHROMA_DERIVED_CL: + case AVCOL_SPC_ICTCP: + case AVCOL_SPC_UNSPECIFIED: + case AVCOL_SPC_RESERVED: + default: + break; + } + + ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid; + switch (codec->color_range) { + case AVCOL_RANGE_MPEG: + range = ColorSpace::RangeID::kLimited; + break; + case AVCOL_RANGE_JPEG: + range = ColorSpace::RangeID::kFull; + break; + case AVCOL_RANGE_UNSPECIFIED: + default: + break; + } + return ColorSpace(primaries, transfer, matrix, range); +} + +} // namespace webrtc + +#endif // WEBRTC_USE_H264 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.h b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.h new file mode 100644 index 0000000000..392ccaf563 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_color_space.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_H264_COLOR_SPACE_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_H264_COLOR_SPACE_H_ + +// Everything declared in this header is only required when WebRTC is +// build with H264 support, please do not move anything out of the +// #ifdef unless needed and tested. +#ifdef WEBRTC_USE_H264 + +#if defined(WEBRTC_WIN) && !defined(__clang__) +#error "See: bugs.webrtc.org/9213#c13." +#endif + +#include "api/video/color_space.h" + +extern "C" { +#include "third_party/ffmpeg/libavcodec/avcodec.h" +} // extern "C" + +namespace webrtc { + +// Helper class for extracting color space information from H264 stream. +ColorSpace ExtractH264ColorSpace(AVCodecContext* codec); + +} // namespace webrtc + +#endif // WEBRTC_USE_H264 + +#endif // MODULES_VIDEO_CODING_CODECS_H264_H264_COLOR_SPACE_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc new file mode 100644 index 0000000000..e654e1835b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +// Everything declared/defined in this header is only required when WebRTC is +// build with H264 support, please do not move anything out of the +// #ifdef unless needed and tested. +#ifdef WEBRTC_USE_H264 + +#include "modules/video_coding/codecs/h264/h264_decoder_impl.h" + +#include <algorithm> +#include <limits> +#include <memory> + +extern "C" { +#include "third_party/ffmpeg/libavcodec/avcodec.h" +#include "third_party/ffmpeg/libavformat/avformat.h" +#include "third_party/ffmpeg/libavutil/imgutils.h" +} // extern "C" + +#include "api/video/color_space.h" +#include "api/video/i010_buffer.h" +#include "api/video/i420_buffer.h" +#include "common_video/include/video_frame_buffer.h" +#include "modules/video_coding/codecs/h264/h264_color_space.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" +#include "third_party/libyuv/include/libyuv/convert.h" + +namespace webrtc { + +namespace { + +constexpr std::array<AVPixelFormat, 8> kPixelFormatsSupported = { + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, + AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, + AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE}; +const size_t kYPlaneIndex = 0; +const size_t kUPlaneIndex = 1; +const size_t kVPlaneIndex = 2; + +// Used by histograms. Values of entries should not be changed. +enum H264DecoderImplEvent { + kH264DecoderEventInit = 0, + kH264DecoderEventError = 1, + kH264DecoderEventMax = 16, +}; + +struct ScopedPtrAVFreePacket { + void operator()(AVPacket* packet) { av_packet_free(&packet); } +}; +typedef std::unique_ptr<AVPacket, ScopedPtrAVFreePacket> ScopedAVPacket; + +ScopedAVPacket MakeScopedAVPacket() { + ScopedAVPacket packet(av_packet_alloc()); + return packet; +} + +} // namespace + +int H264DecoderImpl::AVGetBuffer2(AVCodecContext* context, + AVFrame* av_frame, + int flags) { + // Set in `Configure`. + H264DecoderImpl* decoder = static_cast<H264DecoderImpl*>(context->opaque); + // DCHECK values set in `Configure`. + RTC_DCHECK(decoder); + // Necessary capability to be allowed to provide our own buffers. + RTC_DCHECK(context->codec->capabilities | AV_CODEC_CAP_DR1); + + auto pixelFormatSupported = std::find_if( + kPixelFormatsSupported.begin(), kPixelFormatsSupported.end(), + [context](AVPixelFormat format) { return context->pix_fmt == format; }); + + RTC_CHECK(pixelFormatSupported != kPixelFormatsSupported.end()); + + // `av_frame->width` and `av_frame->height` are set by FFmpeg. These are the + // actual image's dimensions and may be different from `context->width` and + // `context->coded_width` due to reordering. + int width = av_frame->width; + int height = av_frame->height; + // See `lowres`, if used the decoder scales the image by 1/2^(lowres). This + // has implications on which resolutions are valid, but we don't use it. + RTC_CHECK_EQ(context->lowres, 0); + // Adjust the `width` and `height` to values acceptable by the decoder. + // Without this, FFmpeg may overflow the buffer. If modified, `width` and/or + // `height` are larger than the actual image and the image has to be cropped + // (top-left corner) after decoding to avoid visible borders to the right and + // bottom of the actual image. + avcodec_align_dimensions(context, &width, &height); + + RTC_CHECK_GE(width, 0); + RTC_CHECK_GE(height, 0); + int ret = av_image_check_size(static_cast<unsigned int>(width), + static_cast<unsigned int>(height), 0, nullptr); + if (ret < 0) { + RTC_LOG(LS_ERROR) << "Invalid picture size " << width << "x" << height; + decoder->ReportError(); + return ret; + } + + // The video frame is stored in `frame_buffer`. `av_frame` is FFmpeg's version + // of a video frame and will be set up to reference `frame_buffer`'s data. + + // FFmpeg expects the initial allocation to be zero-initialized according to + // http://crbug.com/390941. Our pool is set up to zero-initialize new buffers. + // TODO(https://crbug.com/390941): Delete that feature from the video pool, + // instead add an explicit call to InitializeData here. + rtc::scoped_refptr<PlanarYuvBuffer> frame_buffer; + rtc::scoped_refptr<I444Buffer> i444_buffer; + rtc::scoped_refptr<I420Buffer> i420_buffer; + rtc::scoped_refptr<I422Buffer> i422_buffer; + rtc::scoped_refptr<I010Buffer> i010_buffer; + rtc::scoped_refptr<I210Buffer> i210_buffer; + int bytes_per_pixel = 1; + switch (context->pix_fmt) { + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUVJ420P: + i420_buffer = + decoder->ffmpeg_buffer_pool_.CreateI420Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = i420_buffer->MutableDataY(); + av_frame->linesize[kYPlaneIndex] = i420_buffer->StrideY(); + av_frame->data[kUPlaneIndex] = i420_buffer->MutableDataU(); + av_frame->linesize[kUPlaneIndex] = i420_buffer->StrideU(); + av_frame->data[kVPlaneIndex] = i420_buffer->MutableDataV(); + av_frame->linesize[kVPlaneIndex] = i420_buffer->StrideV(); + RTC_DCHECK_EQ(av_frame->extended_data, av_frame->data); + frame_buffer = i420_buffer; + break; + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUVJ444P: + i444_buffer = + decoder->ffmpeg_buffer_pool_.CreateI444Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = i444_buffer->MutableDataY(); + av_frame->linesize[kYPlaneIndex] = i444_buffer->StrideY(); + av_frame->data[kUPlaneIndex] = i444_buffer->MutableDataU(); + av_frame->linesize[kUPlaneIndex] = i444_buffer->StrideU(); + av_frame->data[kVPlaneIndex] = i444_buffer->MutableDataV(); + av_frame->linesize[kVPlaneIndex] = i444_buffer->StrideV(); + frame_buffer = i444_buffer; + break; + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUVJ422P: + i422_buffer = + decoder->ffmpeg_buffer_pool_.CreateI422Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = i422_buffer->MutableDataY(); + av_frame->linesize[kYPlaneIndex] = i422_buffer->StrideY(); + av_frame->data[kUPlaneIndex] = i422_buffer->MutableDataU(); + av_frame->linesize[kUPlaneIndex] = i422_buffer->StrideU(); + av_frame->data[kVPlaneIndex] = i422_buffer->MutableDataV(); + av_frame->linesize[kVPlaneIndex] = i422_buffer->StrideV(); + frame_buffer = i422_buffer; + break; + case AV_PIX_FMT_YUV420P10LE: + i010_buffer = + decoder->ffmpeg_buffer_pool_.CreateI010Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = + reinterpret_cast<uint8_t*>(i010_buffer->MutableDataY()); + av_frame->linesize[kYPlaneIndex] = i010_buffer->StrideY() * 2; + av_frame->data[kUPlaneIndex] = + reinterpret_cast<uint8_t*>(i010_buffer->MutableDataU()); + av_frame->linesize[kUPlaneIndex] = i010_buffer->StrideU() * 2; + av_frame->data[kVPlaneIndex] = + reinterpret_cast<uint8_t*>(i010_buffer->MutableDataV()); + av_frame->linesize[kVPlaneIndex] = i010_buffer->StrideV() * 2; + frame_buffer = i010_buffer; + bytes_per_pixel = 2; + break; + case AV_PIX_FMT_YUV422P10LE: + i210_buffer = + decoder->ffmpeg_buffer_pool_.CreateI210Buffer(width, height); + // Set `av_frame` members as required by FFmpeg. + av_frame->data[kYPlaneIndex] = + reinterpret_cast<uint8_t*>(i210_buffer->MutableDataY()); + av_frame->linesize[kYPlaneIndex] = i210_buffer->StrideY() * 2; + av_frame->data[kUPlaneIndex] = + reinterpret_cast<uint8_t*>(i210_buffer->MutableDataU()); + av_frame->linesize[kUPlaneIndex] = i210_buffer->StrideU() * 2; + av_frame->data[kVPlaneIndex] = + reinterpret_cast<uint8_t*>(i210_buffer->MutableDataV()); + av_frame->linesize[kVPlaneIndex] = i210_buffer->StrideV() * 2; + frame_buffer = i210_buffer; + bytes_per_pixel = 2; + break; + default: + RTC_LOG(LS_ERROR) << "Unsupported buffer type " << context->pix_fmt + << ". Check supported supported pixel formats!"; + decoder->ReportError(); + return -1; + } + + int y_size = width * height * bytes_per_pixel; + int uv_size = frame_buffer->ChromaWidth() * frame_buffer->ChromaHeight() * + bytes_per_pixel; + // DCHECK that we have a continuous buffer as is required. + RTC_DCHECK_EQ(av_frame->data[kUPlaneIndex], + av_frame->data[kYPlaneIndex] + y_size); + RTC_DCHECK_EQ(av_frame->data[kVPlaneIndex], + av_frame->data[kUPlaneIndex] + uv_size); + int total_size = y_size + 2 * uv_size; + + av_frame->format = context->pix_fmt; + av_frame->reordered_opaque = context->reordered_opaque; + + // Create a VideoFrame object, to keep a reference to the buffer. + // TODO(nisse): The VideoFrame's timestamp and rotation info is not used. + // Refactor to do not use a VideoFrame object at all. + av_frame->buf[0] = av_buffer_create( + av_frame->data[kYPlaneIndex], total_size, AVFreeBuffer2, + static_cast<void*>( + std::make_unique<VideoFrame>(VideoFrame::Builder() + .set_video_frame_buffer(frame_buffer) + .set_rotation(kVideoRotation_0) + .set_timestamp_us(0) + .build()) + .release()), + 0); + RTC_CHECK(av_frame->buf[0]); + return 0; +} + +void H264DecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) { + // The buffer pool recycles the buffer used by `video_frame` when there are no + // more references to it. `video_frame` is a thin buffer holder and is not + // recycled. + VideoFrame* video_frame = static_cast<VideoFrame*>(opaque); + delete video_frame; +} + +H264DecoderImpl::H264DecoderImpl() + : ffmpeg_buffer_pool_(true), + decoded_image_callback_(nullptr), + has_reported_init_(false), + has_reported_error_(false), + preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode") + ? VideoFrameBuffer::Type::kNV12 + : VideoFrameBuffer::Type::kI420) {} + +H264DecoderImpl::~H264DecoderImpl() { + Release(); +} + +bool H264DecoderImpl::Configure(const Settings& settings) { + ReportInit(); + if (settings.codec_type() != kVideoCodecH264) { + ReportError(); + return false; + } + + // Release necessary in case of re-initializing. + int32_t ret = Release(); + if (ret != WEBRTC_VIDEO_CODEC_OK) { + ReportError(); + return false; + } + RTC_DCHECK(!av_context_); + + // Initialize AVCodecContext. + av_context_.reset(avcodec_alloc_context3(nullptr)); + + av_context_->codec_type = AVMEDIA_TYPE_VIDEO; + av_context_->codec_id = AV_CODEC_ID_H264; + const RenderResolution& resolution = settings.max_render_resolution(); + if (resolution.Valid()) { + av_context_->coded_width = resolution.Width(); + av_context_->coded_height = resolution.Height(); + } + av_context_->extradata = nullptr; + av_context_->extradata_size = 0; + + // If this is ever increased, look at `av_context_->thread_safe_callbacks` and + // make it possible to disable the thread checker in the frame buffer pool. + av_context_->thread_count = 1; + av_context_->thread_type = FF_THREAD_SLICE; + + // Function used by FFmpeg to get buffers to store decoded frames in. + av_context_->get_buffer2 = AVGetBuffer2; + // `get_buffer2` is called with the context, there `opaque` can be used to get + // a pointer `this`. + av_context_->opaque = this; + + const AVCodec* codec = avcodec_find_decoder(av_context_->codec_id); + if (!codec) { + // This is an indication that FFmpeg has not been initialized or it has not + // been compiled/initialized with the correct set of codecs. + RTC_LOG(LS_ERROR) << "FFmpeg H.264 decoder not found."; + Release(); + ReportError(); + return false; + } + int res = avcodec_open2(av_context_.get(), codec, nullptr); + if (res < 0) { + RTC_LOG(LS_ERROR) << "avcodec_open2 error: " << res; + Release(); + ReportError(); + return false; + } + + av_frame_.reset(av_frame_alloc()); + + if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) { + if (!ffmpeg_buffer_pool_.Resize(*buffer_pool_size) || + !output_buffer_pool_.Resize(*buffer_pool_size)) { + return false; + } + } + return true; +} + +int32_t H264DecoderImpl::Release() { + av_context_.reset(); + av_frame_.reset(); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t H264DecoderImpl::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + decoded_image_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t H264DecoderImpl::Decode(const EncodedImage& input_image, + bool /*missing_frames*/, + int64_t /*render_time_ms*/) { + if (!IsInitialized()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!decoded_image_callback_) { + RTC_LOG(LS_WARNING) + << "Configure() has been called, but a callback function " + "has not been set with RegisterDecodeCompleteCallback()"; + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!input_image.data() || !input_image.size()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + ScopedAVPacket packet = MakeScopedAVPacket(); + if (!packet) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + // packet.data has a non-const type, but isn't modified by + // avcodec_send_packet. + packet->data = const_cast<uint8_t*>(input_image.data()); + if (input_image.size() > + static_cast<size_t>(std::numeric_limits<int>::max())) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + packet->size = static_cast<int>(input_image.size()); + int64_t frame_timestamp_us = input_image.ntp_time_ms_ * 1000; // ms -> μs + av_context_->reordered_opaque = frame_timestamp_us; + + int result = avcodec_send_packet(av_context_.get(), packet.get()); + + if (result < 0) { + RTC_LOG(LS_ERROR) << "avcodec_send_packet error: " << result; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + result = avcodec_receive_frame(av_context_.get(), av_frame_.get()); + if (result < 0) { + RTC_LOG(LS_ERROR) << "avcodec_receive_frame error: " << result; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // We don't expect reordering. Decoded frame timestamp should match + // the input one. + RTC_DCHECK_EQ(av_frame_->reordered_opaque, frame_timestamp_us); + + // TODO(sakal): Maybe it is possible to get QP directly from FFmpeg. + h264_bitstream_parser_.ParseBitstream(input_image); + absl::optional<int> qp = h264_bitstream_parser_.GetLastSliceQp(); + + // Obtain the `video_frame` containing the decoded image. + VideoFrame* input_frame = + static_cast<VideoFrame*>(av_buffer_get_opaque(av_frame_->buf[0])); + RTC_DCHECK(input_frame); + rtc::scoped_refptr<VideoFrameBuffer> frame_buffer = + input_frame->video_frame_buffer(); + + // Instantiate Planar YUV buffer according to video frame buffer type + const webrtc::PlanarYuvBuffer* planar_yuv_buffer = nullptr; + const webrtc::PlanarYuv8Buffer* planar_yuv8_buffer = nullptr; + const webrtc::PlanarYuv16BBuffer* planar_yuv16_buffer = nullptr; + VideoFrameBuffer::Type video_frame_buffer_type = frame_buffer->type(); + switch (video_frame_buffer_type) { + case VideoFrameBuffer::Type::kI420: + planar_yuv_buffer = frame_buffer->GetI420(); + planar_yuv8_buffer = + reinterpret_cast<const webrtc::PlanarYuv8Buffer*>(planar_yuv_buffer); + break; + case VideoFrameBuffer::Type::kI444: + planar_yuv_buffer = frame_buffer->GetI444(); + planar_yuv8_buffer = + reinterpret_cast<const webrtc::PlanarYuv8Buffer*>(planar_yuv_buffer); + break; + case VideoFrameBuffer::Type::kI422: + planar_yuv_buffer = frame_buffer->GetI422(); + planar_yuv8_buffer = + reinterpret_cast<const webrtc::PlanarYuv8Buffer*>(planar_yuv_buffer); + break; + case VideoFrameBuffer::Type::kI010: + planar_yuv_buffer = frame_buffer->GetI010(); + planar_yuv16_buffer = reinterpret_cast<const webrtc::PlanarYuv16BBuffer*>( + planar_yuv_buffer); + break; + case VideoFrameBuffer::Type::kI210: + planar_yuv_buffer = frame_buffer->GetI210(); + planar_yuv16_buffer = reinterpret_cast<const webrtc::PlanarYuv16BBuffer*>( + planar_yuv_buffer); + break; + default: + // If this code is changed to allow other video frame buffer type, + // make sure that the code below which wraps I420/I422/I444 buffer and + // code which converts to NV12 is changed + // to work with new video frame buffer type + + RTC_LOG(LS_ERROR) << "frame_buffer type: " + << static_cast<int32_t>(video_frame_buffer_type) + << " is not supported!"; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // When needed, FFmpeg applies cropping by moving plane pointers and adjusting + // frame width/height. Ensure that cropped buffers lie within the allocated + // memory. + RTC_DCHECK_LE(av_frame_->width, planar_yuv_buffer->width()); + RTC_DCHECK_LE(av_frame_->height, planar_yuv_buffer->height()); + switch (video_frame_buffer_type) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI444: + case VideoFrameBuffer::Type::kI422: { + RTC_DCHECK_GE(av_frame_->data[kYPlaneIndex], planar_yuv8_buffer->DataY()); + RTC_DCHECK_LE( + av_frame_->data[kYPlaneIndex] + + av_frame_->linesize[kYPlaneIndex] * av_frame_->height, + planar_yuv8_buffer->DataY() + + planar_yuv8_buffer->StrideY() * planar_yuv8_buffer->height()); + RTC_DCHECK_GE(av_frame_->data[kUPlaneIndex], planar_yuv8_buffer->DataU()); + RTC_DCHECK_LE( + av_frame_->data[kUPlaneIndex] + + av_frame_->linesize[kUPlaneIndex] * + planar_yuv8_buffer->ChromaHeight(), + planar_yuv8_buffer->DataU() + planar_yuv8_buffer->StrideU() * + planar_yuv8_buffer->ChromaHeight()); + RTC_DCHECK_GE(av_frame_->data[kVPlaneIndex], planar_yuv8_buffer->DataV()); + RTC_DCHECK_LE( + av_frame_->data[kVPlaneIndex] + + av_frame_->linesize[kVPlaneIndex] * + planar_yuv8_buffer->ChromaHeight(), + planar_yuv8_buffer->DataV() + planar_yuv8_buffer->StrideV() * + planar_yuv8_buffer->ChromaHeight()); + break; + } + case VideoFrameBuffer::Type::kI010: + case VideoFrameBuffer::Type::kI210: { + RTC_DCHECK_GE( + av_frame_->data[kYPlaneIndex], + reinterpret_cast<const uint8_t*>(planar_yuv16_buffer->DataY())); + RTC_DCHECK_LE( + av_frame_->data[kYPlaneIndex] + + av_frame_->linesize[kYPlaneIndex] * av_frame_->height, + reinterpret_cast<const uint8_t*>(planar_yuv16_buffer->DataY()) + + planar_yuv16_buffer->StrideY() * 2 * + planar_yuv16_buffer->height()); + RTC_DCHECK_GE( + av_frame_->data[kUPlaneIndex], + reinterpret_cast<const uint8_t*>(planar_yuv16_buffer->DataU())); + RTC_DCHECK_LE( + av_frame_->data[kUPlaneIndex] + + av_frame_->linesize[kUPlaneIndex] * + planar_yuv16_buffer->ChromaHeight(), + reinterpret_cast<const uint8_t*>(planar_yuv16_buffer->DataU()) + + planar_yuv16_buffer->StrideU() * 2 * + planar_yuv16_buffer->ChromaHeight()); + RTC_DCHECK_GE( + av_frame_->data[kVPlaneIndex], + reinterpret_cast<const uint8_t*>(planar_yuv16_buffer->DataV())); + RTC_DCHECK_LE( + av_frame_->data[kVPlaneIndex] + + av_frame_->linesize[kVPlaneIndex] * + planar_yuv16_buffer->ChromaHeight(), + reinterpret_cast<const uint8_t*>(planar_yuv16_buffer->DataV()) + + planar_yuv16_buffer->StrideV() * 2 * + planar_yuv16_buffer->ChromaHeight()); + break; + } + default: + RTC_LOG(LS_ERROR) << "frame_buffer type: " + << static_cast<int32_t>(video_frame_buffer_type) + << " is not supported!"; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + rtc::scoped_refptr<webrtc::VideoFrameBuffer> cropped_buffer; + switch (video_frame_buffer_type) { + case VideoFrameBuffer::Type::kI420: + cropped_buffer = WrapI420Buffer( + av_frame_->width, av_frame_->height, av_frame_->data[kYPlaneIndex], + av_frame_->linesize[kYPlaneIndex], av_frame_->data[kUPlaneIndex], + av_frame_->linesize[kUPlaneIndex], av_frame_->data[kVPlaneIndex], + av_frame_->linesize[kVPlaneIndex], + // To keep reference alive. + [frame_buffer] {}); + break; + case VideoFrameBuffer::Type::kI444: + cropped_buffer = WrapI444Buffer( + av_frame_->width, av_frame_->height, av_frame_->data[kYPlaneIndex], + av_frame_->linesize[kYPlaneIndex], av_frame_->data[kUPlaneIndex], + av_frame_->linesize[kUPlaneIndex], av_frame_->data[kVPlaneIndex], + av_frame_->linesize[kVPlaneIndex], + // To keep reference alive. + [frame_buffer] {}); + break; + case VideoFrameBuffer::Type::kI422: + cropped_buffer = WrapI422Buffer( + av_frame_->width, av_frame_->height, av_frame_->data[kYPlaneIndex], + av_frame_->linesize[kYPlaneIndex], av_frame_->data[kUPlaneIndex], + av_frame_->linesize[kUPlaneIndex], av_frame_->data[kVPlaneIndex], + av_frame_->linesize[kVPlaneIndex], + // To keep reference alive. + [frame_buffer] {}); + break; + case VideoFrameBuffer::Type::kI010: + cropped_buffer = WrapI010Buffer( + av_frame_->width, av_frame_->height, + reinterpret_cast<const uint16_t*>(av_frame_->data[kYPlaneIndex]), + av_frame_->linesize[kYPlaneIndex] / 2, + reinterpret_cast<const uint16_t*>(av_frame_->data[kUPlaneIndex]), + av_frame_->linesize[kUPlaneIndex] / 2, + reinterpret_cast<const uint16_t*>(av_frame_->data[kVPlaneIndex]), + av_frame_->linesize[kVPlaneIndex] / 2, + // To keep reference alive. + [frame_buffer] {}); + break; + case VideoFrameBuffer::Type::kI210: + cropped_buffer = WrapI210Buffer( + av_frame_->width, av_frame_->height, + reinterpret_cast<const uint16_t*>(av_frame_->data[kYPlaneIndex]), + av_frame_->linesize[kYPlaneIndex] / 2, + reinterpret_cast<const uint16_t*>(av_frame_->data[kUPlaneIndex]), + av_frame_->linesize[kUPlaneIndex] / 2, + reinterpret_cast<const uint16_t*>(av_frame_->data[kVPlaneIndex]), + av_frame_->linesize[kVPlaneIndex] / 2, + // To keep reference alive. + [frame_buffer] {}); + break; + default: + RTC_LOG(LS_ERROR) << "frame_buffer type: " + << static_cast<int32_t>(video_frame_buffer_type) + << " is not supported!"; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + // Preference for NV12 output format is ignored if actual format isn't + // trivially convertible to it. + if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12 && + video_frame_buffer_type == VideoFrameBuffer::Type::kI420) { + auto nv12_buffer = output_buffer_pool_.CreateNV12Buffer( + cropped_buffer->width(), cropped_buffer->height()); + const PlanarYuv8Buffer* cropped_planar_yuv_buffer = + cropped_buffer->GetI420(); + libyuv::I420ToNV12(cropped_planar_yuv_buffer->DataY(), + cropped_planar_yuv_buffer->StrideY(), + cropped_planar_yuv_buffer->DataU(), + cropped_planar_yuv_buffer->StrideU(), + cropped_planar_yuv_buffer->DataV(), + cropped_planar_yuv_buffer->StrideV(), + nv12_buffer->MutableDataY(), nv12_buffer->StrideY(), + nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(), + planar_yuv_buffer->width(), planar_yuv_buffer->height()); + cropped_buffer = nv12_buffer; + } + + // Pass on color space from input frame if explicitly specified. + const ColorSpace& color_space = + input_image.ColorSpace() ? *input_image.ColorSpace() + : ExtractH264ColorSpace(av_context_.get()); + + VideoFrame decoded_frame = VideoFrame::Builder() + .set_video_frame_buffer(cropped_buffer) + .set_timestamp_rtp(input_image.Timestamp()) + .set_color_space(color_space) + .build(); + + // Return decoded frame. + // TODO(nisse): Timestamp and rotation are all zero here. Change decoder + // interface to pass a VideoFrameBuffer instead of a VideoFrame? + decoded_image_callback_->Decoded(decoded_frame, absl::nullopt, qp); + + // Stop referencing it, possibly freeing `input_frame`. + av_frame_unref(av_frame_.get()); + input_frame = nullptr; + + return WEBRTC_VIDEO_CODEC_OK; +} + +const char* H264DecoderImpl::ImplementationName() const { + return "FFmpeg"; +} + +bool H264DecoderImpl::IsInitialized() const { + return av_context_ != nullptr; +} + +void H264DecoderImpl::ReportInit() { + if (has_reported_init_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.H264DecoderImpl.Event", + kH264DecoderEventInit, kH264DecoderEventMax); + has_reported_init_ = true; +} + +void H264DecoderImpl::ReportError() { + if (has_reported_error_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.H264DecoderImpl.Event", + kH264DecoderEventError, kH264DecoderEventMax); + has_reported_error_ = true; +} + +} // namespace webrtc + +#endif // WEBRTC_USE_H264 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h new file mode 100644 index 0000000000..e5d9fd3871 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_H264_DECODER_IMPL_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_H264_DECODER_IMPL_H_ + +// Everything declared in this header is only required when WebRTC is +// build with H264 support, please do not move anything out of the +// #ifdef unless needed and tested. +#ifdef WEBRTC_USE_H264 + +#if defined(WEBRTC_WIN) && !defined(__clang__) +#error "See: bugs.webrtc.org/9213#c13." +#endif + +#include <memory> + +#include "modules/video_coding/codecs/h264/include/h264.h" + +// CAVEAT: According to ffmpeg docs for avcodec_send_packet, ffmpeg requires a +// few extra padding bytes after the end of input. And in addition, docs for +// AV_INPUT_BUFFER_PADDING_SIZE says "If the first 23 bits of the additional +// bytes are not 0, then damaged MPEG bitstreams could cause overread and +// segfault." +// +// WebRTC doesn't ensure any such padding, and REQUIRES ffmpeg to be compiled +// with CONFIG_SAFE_BITSTREAM_READER, which is intended to eliminate +// out-of-bounds reads. ffmpeg docs doesn't say explicitly what effects this +// flag has on the h.264 decoder or avcodec_send_packet, though, so this is in +// some way depending on undocumented behavior. If any problems turn up, we may +// have to add an extra copy operation, to enforce padding before buffers are +// passed to ffmpeg. + +extern "C" { +#include "third_party/ffmpeg/libavcodec/avcodec.h" +} // extern "C" + +#include "common_video/h264/h264_bitstream_parser.h" +#include "common_video/include/video_frame_buffer_pool.h" + +namespace webrtc { + +struct AVCodecContextDeleter { + void operator()(AVCodecContext* ptr) const { avcodec_free_context(&ptr); } +}; +struct AVFrameDeleter { + void operator()(AVFrame* ptr) const { av_frame_free(&ptr); } +}; + +class H264DecoderImpl : public H264Decoder { + public: + H264DecoderImpl(); + ~H264DecoderImpl() override; + + bool Configure(const Settings& settings) override; + int32_t Release() override; + + int32_t RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) override; + + // `missing_frames`, `fragmentation` and `render_time_ms` are ignored. + int32_t Decode(const EncodedImage& input_image, + bool /*missing_frames*/, + int64_t render_time_ms = -1) override; + + const char* ImplementationName() const override; + + private: + // Called by FFmpeg when it needs a frame buffer to store decoded frames in. + // The `VideoFrame` returned by FFmpeg at `Decode` originate from here. Their + // buffers are reference counted and freed by FFmpeg using `AVFreeBuffer2`. + static int AVGetBuffer2(AVCodecContext* context, + AVFrame* av_frame, + int flags); + // Called by FFmpeg when it is done with a video frame, see `AVGetBuffer2`. + static void AVFreeBuffer2(void* opaque, uint8_t* data); + + bool IsInitialized() const; + + // Reports statistics with histograms. + void ReportInit(); + void ReportError(); + + // Used by ffmpeg via `AVGetBuffer2()` to allocate I420 images. + VideoFrameBufferPool ffmpeg_buffer_pool_; + // Used to allocate NV12 images if NV12 output is preferred. + VideoFrameBufferPool output_buffer_pool_; + std::unique_ptr<AVCodecContext, AVCodecContextDeleter> av_context_; + std::unique_ptr<AVFrame, AVFrameDeleter> av_frame_; + + DecodedImageCallback* decoded_image_callback_; + + bool has_reported_init_; + bool has_reported_error_; + + webrtc::H264BitstreamParser h264_bitstream_parser_; + + // Decoder should produce this format if possible. + const VideoFrameBuffer::Type preferred_output_format_; +}; + +} // namespace webrtc + +#endif // WEBRTC_USE_H264 + +#endif // MODULES_VIDEO_CODING_CODECS_H264_H264_DECODER_IMPL_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc new file mode 100644 index 0000000000..f6d52c6539 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc @@ -0,0 +1,653 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +// Everything declared/defined in this header is only required when WebRTC is +// build with H264 support, please do not move anything out of the +// #ifdef unless needed and tested. +#ifdef WEBRTC_USE_H264 + +#include "modules/video_coding/codecs/h264/h264_encoder_impl.h" + +#include <algorithm> +#include <limits> +#include <string> + +#include "absl/strings/match.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/video_coding/utility/simulcast_rate_allocator.h" +#include "modules/video_coding/utility/simulcast_utility.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/metrics.h" +#include "third_party/libyuv/include/libyuv/convert.h" +#include "third_party/libyuv/include/libyuv/scale.h" +#include "third_party/openh264/src/codec/api/svc/codec_api.h" +#include "third_party/openh264/src/codec/api/svc/codec_app_def.h" +#include "third_party/openh264/src/codec/api/svc/codec_def.h" +#include "third_party/openh264/src/codec/api/svc/codec_ver.h" + +namespace webrtc { + +namespace { + +const bool kOpenH264EncoderDetailedLogging = false; + +// QP scaling thresholds. +static const int kLowH264QpThreshold = 24; +static const int kHighH264QpThreshold = 37; + +// Used by histograms. Values of entries should not be changed. +enum H264EncoderImplEvent { + kH264EncoderEventInit = 0, + kH264EncoderEventError = 1, + kH264EncoderEventMax = 16, +}; + +int NumberOfThreads(int width, int height, int number_of_cores) { + // TODO(hbos): In Chromium, multiple threads do not work with sandbox on Mac, + // see crbug.com/583348. Until further investigated, only use one thread. + // if (width * height >= 1920 * 1080 && number_of_cores > 8) { + // return 8; // 8 threads for 1080p on high perf machines. + // } else if (width * height > 1280 * 960 && number_of_cores >= 6) { + // return 3; // 3 threads for 1080p. + // } else if (width * height > 640 * 480 && number_of_cores >= 3) { + // return 2; // 2 threads for qHD/HD. + // } else { + // return 1; // 1 thread for VGA or less. + // } + // TODO(sprang): Also check sSliceArgument.uiSliceNum om GetEncoderPrams(), + // before enabling multithreading here. + return 1; +} + +VideoFrameType ConvertToVideoFrameType(EVideoFrameType type) { + switch (type) { + case videoFrameTypeIDR: + return VideoFrameType::kVideoFrameKey; + case videoFrameTypeSkip: + case videoFrameTypeI: + case videoFrameTypeP: + case videoFrameTypeIPMixed: + return VideoFrameType::kVideoFrameDelta; + case videoFrameTypeInvalid: + break; + } + RTC_DCHECK_NOTREACHED() << "Unexpected/invalid frame type: " << type; + return VideoFrameType::kEmptyFrame; +} + +} // namespace + +// Helper method used by H264EncoderImpl::Encode. +// Copies the encoded bytes from `info` to `encoded_image`. The +// `encoded_image->_buffer` may be deleted and reallocated if a bigger buffer is +// required. +// +// After OpenH264 encoding, the encoded bytes are stored in `info` spread out +// over a number of layers and "NAL units". Each NAL unit is a fragment starting +// with the four-byte start code {0,0,0,1}. All of this data (including the +// start codes) is copied to the `encoded_image->_buffer`. +static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { + // Calculate minimum buffer size required to hold encoded data. + size_t required_capacity = 0; + size_t fragments_count = 0; + for (int layer = 0; layer < info->iLayerNum; ++layer) { + const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; + for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++fragments_count) { + RTC_CHECK_GE(layerInfo.pNalLengthInByte[nal], 0); + // Ensure `required_capacity` will not overflow. + RTC_CHECK_LE(layerInfo.pNalLengthInByte[nal], + std::numeric_limits<size_t>::max() - required_capacity); + required_capacity += layerInfo.pNalLengthInByte[nal]; + } + } + auto buffer = EncodedImageBuffer::Create(required_capacity); + encoded_image->SetEncodedData(buffer); + + // Iterate layers and NAL units, note each NAL unit as a fragment and copy + // the data to `encoded_image->_buffer`. + const uint8_t start_code[4] = {0, 0, 0, 1}; + size_t frag = 0; + encoded_image->set_size(0); + for (int layer = 0; layer < info->iLayerNum; ++layer) { + const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; + // Iterate NAL units making up this layer, noting fragments. + size_t layer_len = 0; + for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++frag) { + // Because the sum of all layer lengths, `required_capacity`, fits in a + // `size_t`, we know that any indices in-between will not overflow. + RTC_DCHECK_GE(layerInfo.pNalLengthInByte[nal], 4); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 0], start_code[0]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 1], start_code[1]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 2], start_code[2]); + RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 3], start_code[3]); + layer_len += layerInfo.pNalLengthInByte[nal]; + } + // Copy the entire layer's data (including start codes). + memcpy(buffer->data() + encoded_image->size(), layerInfo.pBsBuf, layer_len); + encoded_image->set_size(encoded_image->size() + layer_len); + } +} + +H264EncoderImpl::H264EncoderImpl(const cricket::VideoCodec& codec) + : packetization_mode_(H264PacketizationMode::SingleNalUnit), + max_payload_size_(0), + number_of_cores_(0), + encoded_image_callback_(nullptr), + has_reported_init_(false), + has_reported_error_(false) { + RTC_CHECK(absl::EqualsIgnoreCase(codec.name, cricket::kH264CodecName)); + std::string packetization_mode_string; + if (codec.GetParam(cricket::kH264FmtpPacketizationMode, + &packetization_mode_string) && + packetization_mode_string == "1") { + packetization_mode_ = H264PacketizationMode::NonInterleaved; + } + downscaled_buffers_.reserve(kMaxSimulcastStreams - 1); + encoded_images_.reserve(kMaxSimulcastStreams); + encoders_.reserve(kMaxSimulcastStreams); + configurations_.reserve(kMaxSimulcastStreams); + tl0sync_limit_.reserve(kMaxSimulcastStreams); +} + +H264EncoderImpl::~H264EncoderImpl() { + Release(); +} + +int32_t H264EncoderImpl::InitEncode(const VideoCodec* inst, + const VideoEncoder::Settings& settings) { + ReportInit(); + if (!inst || inst->codecType != kVideoCodecH264) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate == 0) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + ReportError(); + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + int32_t release_ret = Release(); + if (release_ret != WEBRTC_VIDEO_CODEC_OK) { + ReportError(); + return release_ret; + } + + int number_of_streams = SimulcastUtility::NumberOfSimulcastStreams(*inst); + bool doing_simulcast = (number_of_streams > 1); + + if (doing_simulcast && + !SimulcastUtility::ValidSimulcastParameters(*inst, number_of_streams)) { + return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; + } + downscaled_buffers_.resize(number_of_streams - 1); + encoded_images_.resize(number_of_streams); + encoders_.resize(number_of_streams); + pictures_.resize(number_of_streams); + configurations_.resize(number_of_streams); + tl0sync_limit_.resize(number_of_streams); + + number_of_cores_ = settings.number_of_cores; + max_payload_size_ = settings.max_payload_size; + codec_ = *inst; + + // Code expects simulcastStream resolutions to be correct, make sure they are + // filled even when there are no simulcast layers. + if (codec_.numberOfSimulcastStreams == 0) { + codec_.simulcastStream[0].width = codec_.width; + codec_.simulcastStream[0].height = codec_.height; + } + + for (int i = 0, idx = number_of_streams - 1; i < number_of_streams; + ++i, --idx) { + ISVCEncoder* openh264_encoder; + // Create encoder. + if (WelsCreateSVCEncoder(&openh264_encoder) != 0) { + // Failed to create encoder. + RTC_LOG(LS_ERROR) << "Failed to create OpenH264 encoder"; + RTC_DCHECK(!openh264_encoder); + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(openh264_encoder); + if (kOpenH264EncoderDetailedLogging) { + int trace_level = WELS_LOG_DETAIL; + openh264_encoder->SetOption(ENCODER_OPTION_TRACE_LEVEL, &trace_level); + } + // else WELS_LOG_DEFAULT is used by default. + + // Store h264 encoder. + encoders_[i] = openh264_encoder; + + // Set internal settings from codec_settings + configurations_[i].simulcast_idx = idx; + configurations_[i].sending = false; + configurations_[i].width = codec_.simulcastStream[idx].width; + configurations_[i].height = codec_.simulcastStream[idx].height; + configurations_[i].max_frame_rate = static_cast<float>(codec_.maxFramerate); + configurations_[i].frame_dropping_on = codec_.GetFrameDropEnabled(); + configurations_[i].key_frame_interval = codec_.H264()->keyFrameInterval; + configurations_[i].num_temporal_layers = + std::max(codec_.H264()->numberOfTemporalLayers, + codec_.simulcastStream[idx].numberOfTemporalLayers); + + // Create downscaled image buffers. + if (i > 0) { + downscaled_buffers_[i - 1] = I420Buffer::Create( + configurations_[i].width, configurations_[i].height, + configurations_[i].width, configurations_[i].width / 2, + configurations_[i].width / 2); + } + + // Codec_settings uses kbits/second; encoder uses bits/second. + configurations_[i].max_bps = codec_.maxBitrate * 1000; + configurations_[i].target_bps = codec_.startBitrate * 1000; + + // Create encoder parameters based on the layer configuration. + SEncParamExt encoder_params = CreateEncoderParams(i); + + // Initialize. + if (openh264_encoder->InitializeExt(&encoder_params) != 0) { + RTC_LOG(LS_ERROR) << "Failed to initialize OpenH264 encoder"; + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + // TODO(pbos): Base init params on these values before submitting. + int video_format = EVideoFormatType::videoFormatI420; + openh264_encoder->SetOption(ENCODER_OPTION_DATAFORMAT, &video_format); + + // Initialize encoded image. Default buffer size: size of unencoded data. + + const size_t new_capacity = + CalcBufferSize(VideoType::kI420, codec_.simulcastStream[idx].width, + codec_.simulcastStream[idx].height); + encoded_images_[i].SetEncodedData(EncodedImageBuffer::Create(new_capacity)); + encoded_images_[i]._encodedWidth = codec_.simulcastStream[idx].width; + encoded_images_[i]._encodedHeight = codec_.simulcastStream[idx].height; + encoded_images_[i].set_size(0); + + tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + } + + SimulcastRateAllocator init_allocator(codec_); + VideoBitrateAllocation allocation = + init_allocator.Allocate(VideoBitrateAllocationParameters( + DataRate::KilobitsPerSec(codec_.startBitrate), codec_.maxFramerate)); + SetRates(RateControlParameters(allocation, codec_.maxFramerate)); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t H264EncoderImpl::Release() { + while (!encoders_.empty()) { + ISVCEncoder* openh264_encoder = encoders_.back(); + if (openh264_encoder) { + RTC_CHECK_EQ(0, openh264_encoder->Uninitialize()); + WelsDestroySVCEncoder(openh264_encoder); + } + encoders_.pop_back(); + } + downscaled_buffers_.clear(); + configurations_.clear(); + encoded_images_.clear(); + pictures_.clear(); + tl0sync_limit_.clear(); + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t H264EncoderImpl::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_image_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +void H264EncoderImpl::SetRates(const RateControlParameters& parameters) { + if (encoders_.empty()) { + RTC_LOG(LS_WARNING) << "SetRates() while uninitialized."; + return; + } + + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Invalid frame rate: " << parameters.framerate_fps; + return; + } + + if (parameters.bitrate.get_sum_bps() == 0) { + // Encoder paused, turn off all encoding. + for (size_t i = 0; i < configurations_.size(); ++i) { + configurations_[i].SetStreamState(false); + } + return; + } + + codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps); + + size_t stream_idx = encoders_.size() - 1; + for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) { + // Update layer config. + configurations_[i].target_bps = + parameters.bitrate.GetSpatialLayerSum(stream_idx); + configurations_[i].max_frame_rate = parameters.framerate_fps; + + if (configurations_[i].target_bps) { + configurations_[i].SetStreamState(true); + + // Update h264 encoder. + SBitrateInfo target_bitrate; + memset(&target_bitrate, 0, sizeof(SBitrateInfo)); + target_bitrate.iLayer = SPATIAL_LAYER_ALL, + target_bitrate.iBitrate = configurations_[i].target_bps; + encoders_[i]->SetOption(ENCODER_OPTION_BITRATE, &target_bitrate); + encoders_[i]->SetOption(ENCODER_OPTION_FRAME_RATE, + &configurations_[i].max_frame_rate); + } else { + configurations_[i].SetStreamState(false); + } + } +} + +int32_t H264EncoderImpl::Encode( + const VideoFrame& input_frame, + const std::vector<VideoFrameType>* frame_types) { + if (encoders_.empty()) { + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!encoded_image_callback_) { + RTC_LOG(LS_WARNING) + << "InitEncode() has been called, but a callback function " + "has not been set with RegisterEncodeCompleteCallback()"; + ReportError(); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + rtc::scoped_refptr<I420BufferInterface> frame_buffer = + input_frame.video_frame_buffer()->ToI420(); + if (!frame_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_frame.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE; + } + RTC_CHECK(frame_buffer->type() == VideoFrameBuffer::Type::kI420 || + frame_buffer->type() == VideoFrameBuffer::Type::kI420A); + + bool send_key_frame = false; + for (size_t i = 0; i < configurations_.size(); ++i) { + if (configurations_[i].key_frame_request && configurations_[i].sending) { + send_key_frame = true; + break; + } + } + + if (!send_key_frame && frame_types) { + for (size_t i = 0; i < configurations_.size(); ++i) { + const size_t simulcast_idx = + static_cast<size_t>(configurations_[i].simulcast_idx); + if (configurations_[i].sending && simulcast_idx < frame_types->size() && + (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey) { + send_key_frame = true; + break; + } + } + } + + RTC_DCHECK_EQ(configurations_[0].width, frame_buffer->width()); + RTC_DCHECK_EQ(configurations_[0].height, frame_buffer->height()); + + // Encode image for each layer. + for (size_t i = 0; i < encoders_.size(); ++i) { + // EncodeFrame input. + pictures_[i] = {0}; + pictures_[i].iPicWidth = configurations_[i].width; + pictures_[i].iPicHeight = configurations_[i].height; + pictures_[i].iColorFormat = EVideoFormatType::videoFormatI420; + pictures_[i].uiTimeStamp = input_frame.ntp_time_ms(); + // Downscale images on second and ongoing layers. + if (i == 0) { + pictures_[i].iStride[0] = frame_buffer->StrideY(); + pictures_[i].iStride[1] = frame_buffer->StrideU(); + pictures_[i].iStride[2] = frame_buffer->StrideV(); + pictures_[i].pData[0] = const_cast<uint8_t*>(frame_buffer->DataY()); + pictures_[i].pData[1] = const_cast<uint8_t*>(frame_buffer->DataU()); + pictures_[i].pData[2] = const_cast<uint8_t*>(frame_buffer->DataV()); + } else { + pictures_[i].iStride[0] = downscaled_buffers_[i - 1]->StrideY(); + pictures_[i].iStride[1] = downscaled_buffers_[i - 1]->StrideU(); + pictures_[i].iStride[2] = downscaled_buffers_[i - 1]->StrideV(); + pictures_[i].pData[0] = + const_cast<uint8_t*>(downscaled_buffers_[i - 1]->DataY()); + pictures_[i].pData[1] = + const_cast<uint8_t*>(downscaled_buffers_[i - 1]->DataU()); + pictures_[i].pData[2] = + const_cast<uint8_t*>(downscaled_buffers_[i - 1]->DataV()); + // Scale the image down a number of times by downsampling factor. + libyuv::I420Scale(pictures_[i - 1].pData[0], pictures_[i - 1].iStride[0], + pictures_[i - 1].pData[1], pictures_[i - 1].iStride[1], + pictures_[i - 1].pData[2], pictures_[i - 1].iStride[2], + configurations_[i - 1].width, + configurations_[i - 1].height, pictures_[i].pData[0], + pictures_[i].iStride[0], pictures_[i].pData[1], + pictures_[i].iStride[1], pictures_[i].pData[2], + pictures_[i].iStride[2], configurations_[i].width, + configurations_[i].height, libyuv::kFilterBox); + } + + if (!configurations_[i].sending) { + continue; + } + if (frame_types != nullptr) { + // Skip frame? + if ((*frame_types)[i] == VideoFrameType::kEmptyFrame) { + continue; + } + } + if (send_key_frame) { + // API doc says ForceIntraFrame(false) does nothing, but calling this + // function forces a key frame regardless of the `bIDR` argument's value. + // (If every frame is a key frame we get lag/delays.) + encoders_[i]->ForceIntraFrame(true); + configurations_[i].key_frame_request = false; + } + // EncodeFrame output. + SFrameBSInfo info; + memset(&info, 0, sizeof(SFrameBSInfo)); + + // Encode! + int enc_ret = encoders_[i]->EncodeFrame(&pictures_[i], &info); + if (enc_ret != 0) { + RTC_LOG(LS_ERROR) + << "OpenH264 frame encoding failed, EncodeFrame returned " << enc_ret + << "."; + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + encoded_images_[i]._encodedWidth = configurations_[i].width; + encoded_images_[i]._encodedHeight = configurations_[i].height; + encoded_images_[i].SetTimestamp(input_frame.timestamp()); + encoded_images_[i].SetColorSpace(input_frame.color_space()); + encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType); + encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx); + + // Split encoded image up into fragments. This also updates + // `encoded_image_`. + RtpFragmentize(&encoded_images_[i], &info); + + // Encoder can skip frames to save bandwidth in which case + // `encoded_images_[i]._length` == 0. + if (encoded_images_[i].size() > 0) { + // Parse QP. + h264_bitstream_parser_.ParseBitstream(encoded_images_[i]); + encoded_images_[i].qp_ = + h264_bitstream_parser_.GetLastSliceQp().value_or(-1); + + // Deliver encoded image. + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecH264; + codec_specific.codecSpecific.H264.packetization_mode = + packetization_mode_; + codec_specific.codecSpecific.H264.temporal_idx = kNoTemporalIdx; + codec_specific.codecSpecific.H264.idr_frame = + info.eFrameType == videoFrameTypeIDR; + codec_specific.codecSpecific.H264.base_layer_sync = false; + if (configurations_[i].num_temporal_layers > 1) { + const uint8_t tid = info.sLayerInfo[0].uiTemporalId; + codec_specific.codecSpecific.H264.temporal_idx = tid; + codec_specific.codecSpecific.H264.base_layer_sync = + tid > 0 && tid < tl0sync_limit_[i]; + if (codec_specific.codecSpecific.H264.base_layer_sync) { + tl0sync_limit_[i] = tid; + } + if (tid == 0) { + tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + } + } + encoded_image_callback_->OnEncodedImage(encoded_images_[i], + &codec_specific); + } + } + return WEBRTC_VIDEO_CODEC_OK; +} + +// Initialization parameters. +// There are two ways to initialize. There is SEncParamBase (cleared with +// memset(&p, 0, sizeof(SEncParamBase)) used in Initialize, and SEncParamExt +// which is a superset of SEncParamBase (cleared with GetDefaultParams) used +// in InitializeExt. +SEncParamExt H264EncoderImpl::CreateEncoderParams(size_t i) const { + SEncParamExt encoder_params; + encoders_[i]->GetDefaultParams(&encoder_params); + if (codec_.mode == VideoCodecMode::kRealtimeVideo) { + encoder_params.iUsageType = CAMERA_VIDEO_REAL_TIME; + } else if (codec_.mode == VideoCodecMode::kScreensharing) { + encoder_params.iUsageType = SCREEN_CONTENT_REAL_TIME; + } else { + RTC_DCHECK_NOTREACHED(); + } + encoder_params.iPicWidth = configurations_[i].width; + encoder_params.iPicHeight = configurations_[i].height; + encoder_params.iTargetBitrate = configurations_[i].target_bps; + // Keep unspecified. WebRTC's max codec bitrate is not the same setting + // as OpenH264's iMaxBitrate. More details in https://crbug.com/webrtc/11543 + encoder_params.iMaxBitrate = UNSPECIFIED_BIT_RATE; + // Rate Control mode + encoder_params.iRCMode = RC_BITRATE_MODE; + encoder_params.fMaxFrameRate = configurations_[i].max_frame_rate; + + // The following parameters are extension parameters (they're in SEncParamExt, + // not in SEncParamBase). + encoder_params.bEnableFrameSkip = configurations_[i].frame_dropping_on; + // `uiIntraPeriod` - multiple of GOP size + // `keyFrameInterval` - number of frames + encoder_params.uiIntraPeriod = configurations_[i].key_frame_interval; + // Reuse SPS id if possible. This helps to avoid reset of chromium HW decoder + // on each key-frame. + // Note that WebRTC resets encoder on resolution change which makes all + // EParameterSetStrategy modes except INCREASING_ID (default) essentially + // equivalent to CONSTANT_ID. + encoder_params.eSpsPpsIdStrategy = SPS_LISTING; + encoder_params.uiMaxNalSize = 0; + // Threading model: use auto. + // 0: auto (dynamic imp. internal encoder) + // 1: single thread (default value) + // >1: number of threads + encoder_params.iMultipleThreadIdc = NumberOfThreads( + encoder_params.iPicWidth, encoder_params.iPicHeight, number_of_cores_); + // The base spatial layer 0 is the only one we use. + encoder_params.sSpatialLayers[0].iVideoWidth = encoder_params.iPicWidth; + encoder_params.sSpatialLayers[0].iVideoHeight = encoder_params.iPicHeight; + encoder_params.sSpatialLayers[0].fFrameRate = encoder_params.fMaxFrameRate; + encoder_params.sSpatialLayers[0].iSpatialBitrate = + encoder_params.iTargetBitrate; + encoder_params.sSpatialLayers[0].iMaxSpatialBitrate = + encoder_params.iMaxBitrate; + encoder_params.iTemporalLayerNum = configurations_[i].num_temporal_layers; + if (encoder_params.iTemporalLayerNum > 1) { + // iNumRefFrame specifies total number of reference buffers to allocate. + // For N temporal layers we need at least (N - 1) buffers to store last + // encoded frames of all reference temporal layers. + // Note that there is no API in OpenH264 encoder to specify exact set of + // references to be used to prediction of a given frame. Encoder can + // theoretically use all available reference buffers. + encoder_params.iNumRefFrame = encoder_params.iTemporalLayerNum - 1; + } + RTC_LOG(LS_INFO) << "OpenH264 version is " << OPENH264_MAJOR << "." + << OPENH264_MINOR; + switch (packetization_mode_) { + case H264PacketizationMode::SingleNalUnit: + // Limit the size of the packets produced. + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceNum = 1; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceMode = + SM_SIZELIMITED_SLICE; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = + static_cast<unsigned int>(max_payload_size_); + RTC_LOG(LS_INFO) << "Encoder is configured with NALU constraint: " + << max_payload_size_ << " bytes"; + break; + case H264PacketizationMode::NonInterleaved: + // When uiSliceMode = SM_FIXEDSLCNUM_SLICE, uiSliceNum = 0 means auto + // design it with cpu core number. + // TODO(sprang): Set to 0 when we understand why the rate controller borks + // when uiSliceNum > 1. + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceNum = 1; + encoder_params.sSpatialLayers[0].sSliceArgument.uiSliceMode = + SM_FIXEDSLCNUM_SLICE; + break; + } + return encoder_params; +} + +void H264EncoderImpl::ReportInit() { + if (has_reported_init_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.H264EncoderImpl.Event", + kH264EncoderEventInit, kH264EncoderEventMax); + has_reported_init_ = true; +} + +void H264EncoderImpl::ReportError() { + if (has_reported_error_) + return; + RTC_HISTOGRAM_ENUMERATION("WebRTC.Video.H264EncoderImpl.Event", + kH264EncoderEventError, kH264EncoderEventMax); + has_reported_error_ = true; +} + +VideoEncoder::EncoderInfo H264EncoderImpl::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "OpenH264"; + info.scaling_settings = + VideoEncoder::ScalingSettings(kLowH264QpThreshold, kHighH264QpThreshold); + info.is_hardware_accelerated = false; + info.supports_simulcast = true; + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420}; + return info; +} + +void H264EncoderImpl::LayerConfig::SetStreamState(bool send_stream) { + if (send_stream && !sending) { + // Need a key frame if we have not sent this stream before. + key_frame_request = true; + } + sending = send_stream; +} + +} // namespace webrtc + +#endif // WEBRTC_USE_H264 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h new file mode 100644 index 0000000000..1163464421 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ + +// Everything declared in this header is only required when WebRTC is +// build with H264 support, please do not move anything out of the +// #ifdef unless needed and tested. +#ifdef WEBRTC_USE_H264 + +#if defined(WEBRTC_WIN) && !defined(__clang__) +#error "See: bugs.webrtc.org/9213#c13." +#endif + +#include <memory> +#include <vector> + +#include "api/video/i420_buffer.h" +#include "api/video_codecs/video_encoder.h" +#include "common_video/h264/h264_bitstream_parser.h" +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "modules/video_coding/utility/quality_scaler.h" +#include "third_party/openh264/src/codec/api/svc/codec_app_def.h" + +class ISVCEncoder; + +namespace webrtc { + +class H264EncoderImpl : public H264Encoder { + public: + struct LayerConfig { + int simulcast_idx = 0; + int width = -1; + int height = -1; + bool sending = true; + bool key_frame_request = false; + float max_frame_rate = 0; + uint32_t target_bps = 0; + uint32_t max_bps = 0; + bool frame_dropping_on = false; + int key_frame_interval = 0; + int num_temporal_layers = 1; + + void SetStreamState(bool send_stream); + }; + + public: + explicit H264EncoderImpl(const cricket::VideoCodec& codec); + ~H264EncoderImpl() override; + + // `settings.max_payload_size` is ignored. + // The following members of `codec_settings` are used. The rest are ignored. + // - codecType (must be kVideoCodecH264) + // - targetBitrate + // - maxFramerate + // - width + // - height + int32_t InitEncode(const VideoCodec* codec_settings, + const VideoEncoder::Settings& settings) override; + int32_t Release() override; + + int32_t RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) override; + void SetRates(const RateControlParameters& parameters) override; + + // The result of encoding - an EncodedImage and CodecSpecificInfo - are + // passed to the encode complete callback. + int32_t Encode(const VideoFrame& frame, + const std::vector<VideoFrameType>* frame_types) override; + + EncoderInfo GetEncoderInfo() const override; + + // Exposed for testing. + H264PacketizationMode PacketizationModeForTesting() const { + return packetization_mode_; + } + + private: + SEncParamExt CreateEncoderParams(size_t i) const; + + webrtc::H264BitstreamParser h264_bitstream_parser_; + // Reports statistics with histograms. + void ReportInit(); + void ReportError(); + + std::vector<ISVCEncoder*> encoders_; + std::vector<SSourcePicture> pictures_; + std::vector<rtc::scoped_refptr<I420Buffer>> downscaled_buffers_; + std::vector<LayerConfig> configurations_; + std::vector<EncodedImage> encoded_images_; + + VideoCodec codec_; + H264PacketizationMode packetization_mode_; + size_t max_payload_size_; + int32_t number_of_cores_; + EncodedImageCallback* encoded_image_callback_; + + bool has_reported_init_; + bool has_reported_error_; + + std::vector<uint8_t> tl0sync_limit_; +}; + +} // namespace webrtc + +#endif // WEBRTC_USE_H264 + +#endif // MODULES_VIDEO_CODING_CODECS_H264_H264_ENCODER_IMPL_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl_unittest.cc new file mode 100644 index 0000000000..52d26955ab --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_encoder_impl_unittest.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include "modules/video_coding/codecs/h264/h264_encoder_impl.h" + +#include "api/video_codecs/video_encoder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +const int kMaxPayloadSize = 1024; +const int kNumCores = 1; + +const VideoEncoder::Capabilities kCapabilities(false); +const VideoEncoder::Settings kSettings(kCapabilities, + kNumCores, + kMaxPayloadSize); + +void SetDefaultSettings(VideoCodec* codec_settings) { + codec_settings->codecType = kVideoCodecH264; + codec_settings->maxFramerate = 60; + codec_settings->width = 640; + codec_settings->height = 480; + // If frame dropping is false, we get a warning that bitrate can't + // be controlled for RC_QUALITY_MODE; RC_BITRATE_MODE and RC_TIMESTAMP_MODE + codec_settings->SetFrameDropEnabled(true); + codec_settings->startBitrate = 2000; + codec_settings->maxBitrate = 4000; +} + +TEST(H264EncoderImplTest, CanInitializeWithDefaultParameters) { + H264EncoderImpl encoder(cricket::VideoCodec("H264")); + VideoCodec codec_settings; + SetDefaultSettings(&codec_settings); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings, kSettings)); + EXPECT_EQ(H264PacketizationMode::NonInterleaved, + encoder.PacketizationModeForTesting()); +} + +TEST(H264EncoderImplTest, CanInitializeWithNonInterleavedModeExplicitly) { + cricket::VideoCodec codec("H264"); + codec.SetParam(cricket::kH264FmtpPacketizationMode, "1"); + H264EncoderImpl encoder(codec); + VideoCodec codec_settings; + SetDefaultSettings(&codec_settings); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings, kSettings)); + EXPECT_EQ(H264PacketizationMode::NonInterleaved, + encoder.PacketizationModeForTesting()); +} + +TEST(H264EncoderImplTest, CanInitializeWithSingleNalUnitModeExplicitly) { + cricket::VideoCodec codec("H264"); + codec.SetParam(cricket::kH264FmtpPacketizationMode, "0"); + H264EncoderImpl encoder(codec); + VideoCodec codec_settings; + SetDefaultSettings(&codec_settings); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings, kSettings)); + EXPECT_EQ(H264PacketizationMode::SingleNalUnit, + encoder.PacketizationModeForTesting()); +} + +TEST(H264EncoderImplTest, CanInitializeWithRemovedParameter) { + cricket::VideoCodec codec("H264"); + codec.RemoveParam(cricket::kH264FmtpPacketizationMode); + H264EncoderImpl encoder(codec); + VideoCodec codec_settings; + SetDefaultSettings(&codec_settings); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings, kSettings)); + EXPECT_EQ(H264PacketizationMode::SingleNalUnit, + encoder.PacketizationModeForTesting()); +} + +} // anonymous namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc new file mode 100644 index 0000000000..12b5da1404 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "api/test/create_simulcast_test_fixture.h" +#include "api/test/simulcast_test_fixture.h" +#include "api/test/video/function_video_decoder_factory.h" +#include "api/test/video/function_video_encoder_factory.h" +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { +std::unique_ptr<SimulcastTestFixture> CreateSpecificSimulcastTestFixture() { + std::unique_ptr<VideoEncoderFactory> encoder_factory = + std::make_unique<FunctionVideoEncoderFactory>( + []() { return H264Encoder::Create(cricket::VideoCodec("H264")); }); + std::unique_ptr<VideoDecoderFactory> decoder_factory = + std::make_unique<FunctionVideoDecoderFactory>( + []() { return H264Decoder::Create(); }); + return CreateSimulcastTestFixture(std::move(encoder_factory), + std::move(decoder_factory), + SdpVideoFormat("H264")); +} +} // namespace + +TEST(TestH264Simulcast, TestKeyFrameRequestsOnAllStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestKeyFrameRequestsOnAllStreams(); +} + +TEST(TestH264Simulcast, TestPaddingAllStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingAllStreams(); +} + +TEST(TestH264Simulcast, TestPaddingTwoStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingTwoStreams(); +} + +TEST(TestH264Simulcast, TestPaddingTwoStreamsOneMaxedOut) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingTwoStreamsOneMaxedOut(); +} + +TEST(TestH264Simulcast, TestPaddingOneStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingOneStream(); +} + +TEST(TestH264Simulcast, TestPaddingOneStreamTwoMaxedOut) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingOneStreamTwoMaxedOut(); +} + +TEST(TestH264Simulcast, TestSendAllStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSendAllStreams(); +} + +TEST(TestH264Simulcast, TestDisablingStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestDisablingStreams(); +} + +TEST(TestH264Simulcast, TestActiveStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestActiveStreams(); +} + +TEST(TestH264Simulcast, TestSwitchingToOneStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSwitchingToOneStream(); +} + +TEST(TestH264Simulcast, TestSwitchingToOneOddStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSwitchingToOneOddStream(); +} + +TEST(TestH264Simulcast, TestStrideEncodeDecode) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestStrideEncodeDecode(); +} + +TEST(TestH264Simulcast, TestSpatioTemporalLayers333PatternEncoder) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSpatioTemporalLayers333PatternEncoder(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264.h b/third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264.h new file mode 100644 index 0000000000..2635b53842 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "api/video_codecs/h264_profile_level_id.h" +#include "api/video_codecs/scalability_mode.h" +#include "media/base/codec.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +struct SdpVideoFormat; + +// Creates an H264 SdpVideoFormat entry with specified paramters. +RTC_EXPORT SdpVideoFormat +CreateH264Format(H264Profile profile, + H264Level level, + const std::string& packetization_mode, + bool add_scalability_modes = false); + +// Set to disable the H.264 encoder/decoder implementations that are provided if +// `rtc_use_h264` build flag is true (if false, this function does nothing). +// This function should only be called before or during WebRTC initialization +// and is not thread-safe. +RTC_EXPORT void DisableRtcUseH264(); + +// Returns a vector with all supported internal H264 encode profiles that we can +// negotiate in SDP, in order of preference. +std::vector<SdpVideoFormat> SupportedH264Codecs( + bool add_scalability_modes = false); + +// Returns a vector with all supported internal H264 decode profiles that we can +// negotiate in SDP, in order of preference. This will be available for receive +// only connections. +std::vector<SdpVideoFormat> SupportedH264DecoderCodecs(); + +class RTC_EXPORT H264Encoder : public VideoEncoder { + public: + static std::unique_ptr<H264Encoder> Create(const cricket::VideoCodec& codec); + // If H.264 is supported (any implementation). + static bool IsSupported(); + static bool SupportsScalabilityMode(ScalabilityMode scalability_mode); + + ~H264Encoder() override {} +}; + +class RTC_EXPORT H264Decoder : public VideoDecoder { + public: + static std::unique_ptr<H264Decoder> Create(); + static bool IsSupported(); + + ~H264Decoder() override {} +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264_globals.h b/third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264_globals.h new file mode 100644 index 0000000000..b61dc8c507 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/include/h264_globals.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains codec dependent definitions that are needed in +// order to compile the WebRTC codebase, even if this codec is not used. + +#ifndef MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_GLOBALS_H_ +#define MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_GLOBALS_H_ + +#include <string> + +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// The packetization types that we support: single, aggregated, and fragmented. +enum H264PacketizationTypes { + kH264SingleNalu, // This packet contains a single NAL unit. + kH264StapA, // This packet contains STAP-A (single time + // aggregation) packets. If this packet has an + // associated NAL unit type, it'll be for the + // first such aggregated packet. + kH264FuA, // This packet contains a FU-A (fragmentation + // unit) packet, meaning it is a part of a frame + // that was too large to fit into a single packet. +}; + +// Packetization modes are defined in RFC 6184 section 6 +// Due to the structure containing this being initialized with zeroes +// in some places, and mode 1 being default, mode 1 needs to have the value +// zero. https://crbug.com/webrtc/6803 +enum class H264PacketizationMode { + NonInterleaved = 0, // Mode 1 - STAP-A, FU-A is allowed + SingleNalUnit // Mode 0 - only single NALU allowed +}; + +// This function is declared inline because it is not clear which +// .cc file it should belong to. +// TODO(hta): Refactor. https://bugs.webrtc.org/6842 +// TODO(jonasolsson): Use absl::string_view instead when that's available. +inline std::string ToString(H264PacketizationMode mode) { + if (mode == H264PacketizationMode::NonInterleaved) { + return "NonInterleaved"; + } else if (mode == H264PacketizationMode::SingleNalUnit) { + return "SingleNalUnit"; + } + RTC_DCHECK_NOTREACHED(); + return ""; +} + +struct NaluInfo { + uint8_t type; + int sps_id; + int pps_id; +}; + +const size_t kMaxNalusPerPacket = 10; + +struct RTPVideoHeaderH264 { + // The NAL unit type. If this is a header for a + // fragmented packet, it's the NAL unit type of + // the original data. If this is the header for an + // aggregated packet, it's the NAL unit type of + // the first NAL unit in the packet. + uint8_t nalu_type; + // The packetization type of this buffer - single, aggregated or fragmented. + H264PacketizationTypes packetization_type; + NaluInfo nalus[kMaxNalusPerPacket]; + size_t nalus_length; + // The packetization mode of this transport. Packetization mode + // determines which packetization types are allowed when packetizing. + H264PacketizationMode packetization_mode; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_GLOBALS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc new file mode 100644 index 0000000000..595e627bcc --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdint.h> + +#include <memory> + +#include "absl/types/optional.h" +#include "api/video/color_space.h" +#include "api/video/encoded_image.h" +#include "api/video/video_frame.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_encoder.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "media/base/codec.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "modules/video_coding/codecs/test/video_codec_unittest.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "test/gtest.h" +#include "test/video_codec_settings.h" + +namespace webrtc { + +class TestH264Impl : public VideoCodecUnitTest { + protected: + std::unique_ptr<VideoEncoder> CreateEncoder() override { + return H264Encoder::Create(cricket::VideoCodec(cricket::kH264CodecName)); + } + + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return H264Decoder::Create(); + } + + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kVideoCodecH264, codec_settings); + } +}; + +#ifdef WEBRTC_USE_H264 +#define MAYBE_EncodeDecode EncodeDecode +#define MAYBE_DecodedQpEqualsEncodedQp DecodedQpEqualsEncodedQp +#else +#define MAYBE_EncodeDecode DISABLED_EncodeDecode +#define MAYBE_DecodedQpEqualsEncodedQp DISABLED_DecodedQpEqualsEncodedQp +#endif + +TEST_F(TestH264Impl, MAYBE_EncodeDecode) { + VideoFrame input_frame = NextInputFrame(); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + EXPECT_GT(I420PSNR(&input_frame, decoded_frame.get()), 36); + + const ColorSpace color_space = *decoded_frame->color_space(); + EXPECT_EQ(ColorSpace::PrimaryID::kUnspecified, color_space.primaries()); + EXPECT_EQ(ColorSpace::TransferID::kUnspecified, color_space.transfer()); + EXPECT_EQ(ColorSpace::MatrixID::kUnspecified, color_space.matrix()); + EXPECT_EQ(ColorSpace::RangeID::kInvalid, color_space.range()); + EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified, + color_space.chroma_siting_horizontal()); + EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified, + color_space.chroma_siting_vertical()); +} + +TEST_F(TestH264Impl, MAYBE_DecodedQpEqualsEncodedQp) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + ASSERT_TRUE(decoded_qp); + EXPECT_EQ(encoded_frame.qp_, *decoded_qp); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/interface/common_constants.h b/third_party/libwebrtc/modules/video_coding/codecs/interface/common_constants.h new file mode 100644 index 0000000000..a8fc6290b9 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/interface/common_constants.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains constants that are used by multiple global +// codec definitions (modules/video_coding/codecs/*/include/*_globals.h) + +#ifndef MODULES_VIDEO_CODING_CODECS_INTERFACE_COMMON_CONSTANTS_H_ +#define MODULES_VIDEO_CODING_CODECS_INTERFACE_COMMON_CONSTANTS_H_ + +#include <stdint.h> + +namespace webrtc { + +const int16_t kNoPictureId = -1; +const int16_t kNoTl0PicIdx = -1; +const uint8_t kNoTemporalIdx = 0xFF; +const int kNoKeyIdx = -1; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_INTERFACE_COMMON_CONSTANTS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.cc b/third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.cc new file mode 100644 index 0000000000..4f33bef2ba --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/interface/libvpx_interface.h" + +#include <memory> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +class LibvpxFacade : public LibvpxInterface { + public: + LibvpxFacade() = default; + ~LibvpxFacade() override = default; + + vpx_image_t* img_alloc(vpx_image_t* img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int align) const override { + return ::vpx_img_alloc(img, fmt, d_w, d_h, align); + } + + vpx_image_t* img_wrap(vpx_image_t* img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int stride_align, + unsigned char* img_data) const override { + return ::vpx_img_wrap(img, fmt, d_w, d_h, stride_align, img_data); + } + + void img_free(vpx_image_t* img) const override { ::vpx_img_free(img); } + + vpx_codec_err_t codec_enc_config_set( + vpx_codec_ctx_t* ctx, + const vpx_codec_enc_cfg_t* cfg) const override { + return ::vpx_codec_enc_config_set(ctx, cfg); + } + + vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface, + vpx_codec_enc_cfg_t* cfg, + unsigned int usage) const override { + return ::vpx_codec_enc_config_default(iface, cfg, usage); + } + + vpx_codec_err_t codec_enc_init(vpx_codec_ctx_t* ctx, + vpx_codec_iface_t* iface, + const vpx_codec_enc_cfg_t* cfg, + vpx_codec_flags_t flags) const override { + return ::vpx_codec_enc_init(ctx, iface, cfg, flags); + } + + vpx_codec_err_t codec_enc_init_multi(vpx_codec_ctx_t* ctx, + vpx_codec_iface_t* iface, + vpx_codec_enc_cfg_t* cfg, + int num_enc, + vpx_codec_flags_t flags, + vpx_rational_t* dsf) const override { + return ::vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf); + } + + vpx_codec_err_t codec_destroy(vpx_codec_ctx_t* ctx) const override { + return ::vpx_codec_destroy(ctx); + } + + // For types related to these parameters, see section + // "VP8 encoder control function parameter type" in vpx/vp8cx.h. + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + uint32_t param) const override { + // We need an explicit call for each type since vpx_codec_control is a + // macro that gets expanded into another call based on the parameter name. + switch (ctrl_id) { + case VP8E_SET_ENABLEAUTOALTREF: + return vpx_codec_control(ctx, VP8E_SET_ENABLEAUTOALTREF, param); + case VP8E_SET_NOISE_SENSITIVITY: + return vpx_codec_control(ctx, VP8E_SET_NOISE_SENSITIVITY, param); + case VP8E_SET_SHARPNESS: + return vpx_codec_control(ctx, VP8E_SET_SHARPNESS, param); + case VP8E_SET_STATIC_THRESHOLD: + return vpx_codec_control(ctx, VP8E_SET_STATIC_THRESHOLD, param); + case VP8E_SET_ARNR_MAXFRAMES: + return vpx_codec_control(ctx, VP8E_SET_ARNR_MAXFRAMES, param); + case VP8E_SET_ARNR_STRENGTH: + return vpx_codec_control(ctx, VP8E_SET_ARNR_STRENGTH, param); + case VP8E_SET_CQ_LEVEL: + return vpx_codec_control(ctx, VP8E_SET_CQ_LEVEL, param); + case VP8E_SET_MAX_INTRA_BITRATE_PCT: + return vpx_codec_control(ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT, param); + case VP9E_SET_MAX_INTER_BITRATE_PCT: + return vpx_codec_control(ctx, VP9E_SET_MAX_INTER_BITRATE_PCT, param); + case VP8E_SET_GF_CBR_BOOST_PCT: + return vpx_codec_control(ctx, VP8E_SET_GF_CBR_BOOST_PCT, param); + case VP8E_SET_SCREEN_CONTENT_MODE: + return vpx_codec_control(ctx, VP8E_SET_SCREEN_CONTENT_MODE, param); + case VP9E_SET_GF_CBR_BOOST_PCT: + return vpx_codec_control(ctx, VP9E_SET_GF_CBR_BOOST_PCT, param); + case VP9E_SET_LOSSLESS: + return vpx_codec_control(ctx, VP9E_SET_LOSSLESS, param); + case VP9E_SET_FRAME_PARALLEL_DECODING: + return vpx_codec_control(ctx, VP9E_SET_FRAME_PARALLEL_DECODING, param); + case VP9E_SET_AQ_MODE: + return vpx_codec_control(ctx, VP9E_SET_AQ_MODE, param); + case VP9E_SET_FRAME_PERIODIC_BOOST: + return vpx_codec_control(ctx, VP9E_SET_FRAME_PERIODIC_BOOST, param); + case VP9E_SET_NOISE_SENSITIVITY: + return vpx_codec_control(ctx, VP9E_SET_NOISE_SENSITIVITY, param); + case VP9E_SET_MIN_GF_INTERVAL: + return vpx_codec_control(ctx, VP9E_SET_MIN_GF_INTERVAL, param); + case VP9E_SET_MAX_GF_INTERVAL: + return vpx_codec_control(ctx, VP9E_SET_MAX_GF_INTERVAL, param); + case VP9E_SET_TARGET_LEVEL: + return vpx_codec_control(ctx, VP9E_SET_TARGET_LEVEL, param); + case VP9E_SET_ROW_MT: + return vpx_codec_control(ctx, VP9E_SET_ROW_MT, param); + case VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST: + return vpx_codec_control(ctx, VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, + param); + case VP9E_SET_SVC_INTER_LAYER_PRED: + return vpx_codec_control(ctx, VP9E_SET_SVC_INTER_LAYER_PRED, param); + case VP9E_SET_SVC_GF_TEMPORAL_REF: + return vpx_codec_control(ctx, VP9E_SET_SVC_GF_TEMPORAL_REF, param); + case VP9E_SET_POSTENCODE_DROP: + return vpx_codec_control(ctx, VP9E_SET_POSTENCODE_DROP, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + int param) const override { + switch (ctrl_id) { + case VP8E_SET_FRAME_FLAGS: + return vpx_codec_control(ctx, VP8E_SET_FRAME_FLAGS, param); + case VP8E_SET_TEMPORAL_LAYER_ID: + return vpx_codec_control(ctx, VP8E_SET_TEMPORAL_LAYER_ID, param); + case VP9E_SET_SVC: + return vpx_codec_control(ctx, VP9E_SET_SVC, param); + case VP8E_SET_CPUUSED: + return vpx_codec_control(ctx, VP8E_SET_CPUUSED, param); + case VP8E_SET_TOKEN_PARTITIONS: + return vpx_codec_control(ctx, VP8E_SET_TOKEN_PARTITIONS, param); + case VP8E_SET_TUNING: + return vpx_codec_control(ctx, VP8E_SET_TUNING, param); + case VP9E_SET_TILE_COLUMNS: + return vpx_codec_control(ctx, VP9E_SET_TILE_COLUMNS, param); + case VP9E_SET_TILE_ROWS: + return vpx_codec_control(ctx, VP9E_SET_TILE_ROWS, param); + case VP9E_SET_TPL: + return vpx_codec_control(ctx, VP9E_SET_TPL, param); + case VP9E_SET_ALT_REF_AQ: + return vpx_codec_control(ctx, VP9E_SET_ALT_REF_AQ, param); + case VP9E_SET_TUNE_CONTENT: + return vpx_codec_control(ctx, VP9E_SET_TUNE_CONTENT, param); + case VP9E_SET_COLOR_SPACE: + return vpx_codec_control(ctx, VP9E_SET_COLOR_SPACE, param); + case VP9E_SET_COLOR_RANGE: + return vpx_codec_control(ctx, VP9E_SET_COLOR_RANGE, param); + case VP9E_SET_DELTA_Q_UV: + return vpx_codec_control(ctx, VP9E_SET_DELTA_Q_UV, param); + case VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR: + return vpx_codec_control(ctx, VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, + param); + case VP9E_SET_DISABLE_LOOPFILTER: + return vpx_codec_control(ctx, VP9E_SET_DISABLE_LOOPFILTER, param); + + default: + if (param >= 0) { + // Might be intended for uint32_t but int literal used, try fallback. + return codec_control(ctx, ctrl_id, static_cast<uint32_t>(param)); + } + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + int* param) const override { + switch (ctrl_id) { + case VP8E_GET_LAST_QUANTIZER: + return vpx_codec_control(ctx, VP8E_GET_LAST_QUANTIZER, param); + case VP8E_GET_LAST_QUANTIZER_64: + return vpx_codec_control(ctx, VP8E_GET_LAST_QUANTIZER_64, param); + case VP9E_SET_RENDER_SIZE: + return vpx_codec_control(ctx, VP9E_SET_RENDER_SIZE, param); + case VP9E_GET_LEVEL: + return vpx_codec_control(ctx, VP9E_GET_LEVEL, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_roi_map* param) const override { + switch (ctrl_id) { + case VP8E_SET_ROI_MAP: + return vpx_codec_control(ctx, VP8E_SET_ROI_MAP, param); + case VP9E_SET_ROI_MAP: + return vpx_codec_control(ctx, VP9E_SET_ROI_MAP, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_active_map* param) const override { + switch (ctrl_id) { + case VP8E_SET_ACTIVEMAP: + return vpx_codec_control(ctx, VP8E_SET_ACTIVEMAP, param); + case VP9E_GET_ACTIVEMAP: + return vpx_codec_control(ctx, VP8E_SET_ACTIVEMAP, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_scaling_mode* param) const override { + switch (ctrl_id) { + case VP8E_SET_SCALEMODE: + return vpx_codec_control(ctx, VP8E_SET_SCALEMODE, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_extra_cfg_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_PARAMETERS: + return vpx_codec_control_(ctx, VP9E_SET_SVC_PARAMETERS, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_frame_drop_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_FRAME_DROP_LAYER: + return vpx_codec_control_(ctx, VP9E_SET_SVC_FRAME_DROP_LAYER, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + void* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_PARAMETERS: + return vpx_codec_control_(ctx, VP9E_SET_SVC_PARAMETERS, param); + case VP9E_REGISTER_CX_CALLBACK: + return vpx_codec_control_(ctx, VP9E_REGISTER_CX_CALLBACK, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_layer_id_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_LAYER_ID: + return vpx_codec_control_(ctx, VP9E_SET_SVC_LAYER_ID, param); + case VP9E_GET_SVC_LAYER_ID: + return vpx_codec_control_(ctx, VP9E_GET_SVC_LAYER_ID, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control( + vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_ref_frame_config_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_REF_FRAME_CONFIG: + return vpx_codec_control_(ctx, VP9E_SET_SVC_REF_FRAME_CONFIG, param); + case VP9E_GET_SVC_REF_FRAME_CONFIG: + return vpx_codec_control_(ctx, VP9E_GET_SVC_REF_FRAME_CONFIG, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control( + vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_spatial_layer_sync_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_SPATIAL_LAYER_SYNC: + return vpx_codec_control_(ctx, VP9E_SET_SVC_SPATIAL_LAYER_SYNC, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_rc_funcs_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_EXTERNAL_RATE_CONTROL: + return vpx_codec_control_(ctx, VP9E_SET_EXTERNAL_RATE_CONTROL, param); + default: + RTC_DCHECK_NOTREACHED() << "Unsupported libvpx ctrl_id: " << ctrl_id; + } + return VPX_CODEC_ERROR; + } + + vpx_codec_err_t codec_encode(vpx_codec_ctx_t* ctx, + const vpx_image_t* img, + vpx_codec_pts_t pts, + uint64_t duration, + vpx_enc_frame_flags_t flags, + uint64_t deadline) const override { + return ::vpx_codec_encode(ctx, img, pts, duration, flags, deadline); + } + + const vpx_codec_cx_pkt_t* codec_get_cx_data( + vpx_codec_ctx_t* ctx, + vpx_codec_iter_t* iter) const override { + return ::vpx_codec_get_cx_data(ctx, iter); + } + + const char* codec_error_detail(vpx_codec_ctx_t* ctx) const override { + return ::vpx_codec_error_detail(ctx); + } + + const char* codec_error(vpx_codec_ctx_t* ctx) const override { + return ::vpx_codec_error(ctx); + } + + const char* codec_err_to_string(vpx_codec_err_t err) const override { + return ::vpx_codec_err_to_string(err); + } +}; + +} // namespace + +std::unique_ptr<LibvpxInterface> LibvpxInterface::Create() { + return std::make_unique<LibvpxFacade>(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.h b/third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.h new file mode 100644 index 0000000000..3dea24dd6d --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/interface/libvpx_interface.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_INTERFACE_LIBVPX_INTERFACE_H_ +#define MODULES_VIDEO_CODING_CODECS_INTERFACE_LIBVPX_INTERFACE_H_ + +#include <stdint.h> + +#include <memory> + +#include "vpx/vp8cx.h" +#include "vpx/vpx_codec.h" +#include "vpx/vpx_encoder.h" +#include "vpx/vpx_image.h" + +namespace webrtc { + +// This interface is a proxy to the static libvpx functions, so that they +// can be mocked for testing. Currently supports VP8 encoder functions. +// TODO(sprang): Extend this to VP8 decoder and VP9 encoder/decoder too. +class LibvpxInterface { + public: + LibvpxInterface() = default; + virtual ~LibvpxInterface() = default; + + virtual vpx_image_t* img_alloc(vpx_image_t* img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int align) const = 0; + virtual vpx_image_t* img_wrap(vpx_image_t* img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int stride_align, + unsigned char* img_data) const = 0; + virtual void img_free(vpx_image_t* img) const = 0; + + virtual vpx_codec_err_t codec_enc_config_set( + vpx_codec_ctx_t* ctx, + const vpx_codec_enc_cfg_t* cfg) const = 0; + virtual vpx_codec_err_t codec_enc_config_default( + vpx_codec_iface_t* iface, + vpx_codec_enc_cfg_t* cfg, + unsigned int usage) const = 0; + + virtual vpx_codec_err_t codec_enc_init(vpx_codec_ctx_t* ctx, + vpx_codec_iface_t* iface, + const vpx_codec_enc_cfg_t* cfg, + vpx_codec_flags_t flags) const = 0; + virtual vpx_codec_err_t codec_enc_init_multi(vpx_codec_ctx_t* ctx, + vpx_codec_iface_t* iface, + vpx_codec_enc_cfg_t* cfg, + int num_enc, + vpx_codec_flags_t flags, + vpx_rational_t* dsf) const = 0; + virtual vpx_codec_err_t codec_destroy(vpx_codec_ctx_t* ctx) const = 0; + + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + uint32_t param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + int param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + int* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_roi_map* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_active_map* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_scaling_mode* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_extra_cfg_t* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_frame_drop_t* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + void* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_layer_id_t* param) const = 0; + virtual vpx_codec_err_t codec_control( + vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_ref_frame_config_t* param) const = 0; + virtual vpx_codec_err_t codec_control( + vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_spatial_layer_sync_t* param) const = 0; + virtual vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_rc_funcs_t* param) const = 0; + virtual vpx_codec_err_t codec_encode(vpx_codec_ctx_t* ctx, + const vpx_image_t* img, + vpx_codec_pts_t pts, + uint64_t duration, + vpx_enc_frame_flags_t flags, + uint64_t deadline) const = 0; + + virtual const vpx_codec_cx_pkt_t* codec_get_cx_data( + vpx_codec_ctx_t* ctx, + vpx_codec_iter_t* iter) const = 0; + + virtual const char* codec_error_detail(vpx_codec_ctx_t* ctx) const = 0; + virtual const char* codec_error(vpx_codec_ctx_t* ctx) const = 0; + virtual const char* codec_err_to_string(vpx_codec_err_t err) const = 0; + + // Returns interface wrapping the actual libvpx functions. + static std::unique_ptr<LibvpxInterface> Create(); +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_INTERFACE_LIBVPX_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/interface/mock_libvpx_interface.h b/third_party/libwebrtc/modules/video_coding/codecs/interface/mock_libvpx_interface.h new file mode 100644 index 0000000000..6dfe733dd0 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/interface/mock_libvpx_interface.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_INTERFACE_MOCK_LIBVPX_INTERFACE_H_ +#define MODULES_VIDEO_CODING_CODECS_INTERFACE_MOCK_LIBVPX_INTERFACE_H_ + +#include "modules/video_coding/codecs/interface/libvpx_interface.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +class MockLibvpxInterface : public LibvpxInterface { + public: + MOCK_METHOD( + vpx_image_t*, + img_alloc, + (vpx_image_t*, vpx_img_fmt_t, unsigned int, unsigned int, unsigned int), + (const, override)); + MOCK_METHOD(vpx_image_t*, + img_wrap, + (vpx_image_t*, + vpx_img_fmt_t, + unsigned int, + unsigned int, + unsigned int, + unsigned char*), + (const, override)); + MOCK_METHOD(void, img_free, (vpx_image_t * img), (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_enc_config_set, + (vpx_codec_ctx_t*, const vpx_codec_enc_cfg_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_enc_config_default, + (vpx_codec_iface_t*, vpx_codec_enc_cfg_t*, unsigned int), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_enc_init, + (vpx_codec_ctx_t*, + vpx_codec_iface_t*, + const vpx_codec_enc_cfg_t*, + vpx_codec_flags_t), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_enc_init_multi, + (vpx_codec_ctx_t*, + vpx_codec_iface_t*, + vpx_codec_enc_cfg_t*, + int, + vpx_codec_flags_t, + vpx_rational_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_destroy, + (vpx_codec_ctx_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, uint32_t), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, int), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, int*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_roi_map*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_active_map*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_scaling_mode*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_svc_extra_cfg_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_svc_frame_drop_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, void*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_svc_layer_id_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, + vp8e_enc_control_id, + vpx_svc_ref_frame_config_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, + vp8e_enc_control_id, + vpx_svc_spatial_layer_sync_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_control, + (vpx_codec_ctx_t*, vp8e_enc_control_id, vpx_rc_funcs_t*), + (const, override)); + MOCK_METHOD(vpx_codec_err_t, + codec_encode, + (vpx_codec_ctx_t*, + const vpx_image_t*, + vpx_codec_pts_t, + uint64_t, + vpx_enc_frame_flags_t, + uint64_t), + (const, override)); + MOCK_METHOD(const vpx_codec_cx_pkt_t*, + codec_get_cx_data, + (vpx_codec_ctx_t*, vpx_codec_iter_t*), + (const, override)); + MOCK_METHOD(const char*, + codec_error_detail, + (vpx_codec_ctx_t*), + (const, override)); + MOCK_METHOD(const char*, codec_error, (vpx_codec_ctx_t*), (const, override)); + MOCK_METHOD(const char*, + codec_err_to_string, + (vpx_codec_err_t), + (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_INTERFACE_MOCK_LIBVPX_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc new file mode 100644 index 0000000000..8740884f5b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h" + +#include <stdint.h> + +#include <utility> + +#include "api/video/video_frame_buffer.h" + +namespace webrtc { + +AugmentedVideoFrameBuffer::AugmentedVideoFrameBuffer( + const rtc::scoped_refptr<VideoFrameBuffer>& video_frame_buffer, + std::unique_ptr<uint8_t[]> augmenting_data, + uint16_t augmenting_data_size) + : augmenting_data_size_(augmenting_data_size), + augmenting_data_(std::move(augmenting_data)), + video_frame_buffer_(video_frame_buffer) {} + +rtc::scoped_refptr<VideoFrameBuffer> +AugmentedVideoFrameBuffer::GetVideoFrameBuffer() const { + return video_frame_buffer_; +} + +uint8_t* AugmentedVideoFrameBuffer::GetAugmentingData() const { + return augmenting_data_.get(); +} + +uint16_t AugmentedVideoFrameBuffer::GetAugmentingDataSize() const { + return augmenting_data_size_; +} + +VideoFrameBuffer::Type AugmentedVideoFrameBuffer::type() const { + return video_frame_buffer_->type(); +} + +int AugmentedVideoFrameBuffer::width() const { + return video_frame_buffer_->width(); +} + +int AugmentedVideoFrameBuffer::height() const { + return video_frame_buffer_->height(); +} + +rtc::scoped_refptr<I420BufferInterface> AugmentedVideoFrameBuffer::ToI420() { + return video_frame_buffer_->ToI420(); +} + +const I420BufferInterface* AugmentedVideoFrameBuffer::GetI420() const { + // TODO(https://crbug.com/webrtc/12021): When AugmentedVideoFrameBuffer is + // updated to implement the buffer interfaces of relevant + // VideoFrameBuffer::Types, stop overriding GetI420() as a workaround to + // AugmentedVideoFrameBuffer not being the type that is returned by type(). + return video_frame_buffer_->GetI420(); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h new file mode 100644 index 0000000000..d711cd07da --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_AUGMENTED_VIDEO_FRAME_BUFFER_H_ +#define MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_AUGMENTED_VIDEO_FRAME_BUFFER_H_ + +#include <cstdint> +#include <memory> + +#include "api/scoped_refptr.h" +#include "api/video/video_frame_buffer.h" + +namespace webrtc { +class AugmentedVideoFrameBuffer : public VideoFrameBuffer { + public: + AugmentedVideoFrameBuffer( + const rtc::scoped_refptr<VideoFrameBuffer>& video_frame_buffer, + std::unique_ptr<uint8_t[]> augmenting_data, + uint16_t augmenting_data_size); + + // Retrieves the underlying VideoFrameBuffer without the augmented data + rtc::scoped_refptr<VideoFrameBuffer> GetVideoFrameBuffer() const; + + // Gets a pointer to the augmenting data and moves ownership to the caller + uint8_t* GetAugmentingData() const; + + // Get the size of the augmenting data + uint16_t GetAugmentingDataSize() const; + + // Returns the type of the underlying VideoFrameBuffer + Type type() const final; + + // Returns the width of the underlying VideoFrameBuffer + int width() const final; + + // Returns the height of the underlying VideoFrameBuffer + int height() const final; + + // Get the I140 Buffer from the underlying frame buffer + rtc::scoped_refptr<I420BufferInterface> ToI420() final; + // Returns GetI420() of the underlying VideoFrameBuffer. + // TODO(hbos): AugmentedVideoFrameBuffer should not return a type (such as + // kI420) without also implementing that type's interface (i.e. + // I420BufferInterface). Either implement all possible Type's interfaces or + // return kNative. + const I420BufferInterface* GetI420() const final; + + private: + uint16_t augmenting_data_size_; + std::unique_ptr<uint8_t[]> augmenting_data_; + rtc::scoped_refptr<webrtc::VideoFrameBuffer> video_frame_buffer_; +}; +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_AUGMENTED_VIDEO_FRAME_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h new file mode 100644 index 0000000000..e73f7d0e9f --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_DECODER_ADAPTER_H_ +#define MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_DECODER_ADAPTER_H_ + +#include <map> +#include <memory> +#include <vector> + +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_decoder_factory.h" +#include "modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h" + +namespace webrtc { + +class MultiplexDecoderAdapter : public VideoDecoder { + public: + // `factory` is not owned and expected to outlive this class. + MultiplexDecoderAdapter(VideoDecoderFactory* factory, + const SdpVideoFormat& associated_format, + bool supports_augmenting_data = false); + virtual ~MultiplexDecoderAdapter(); + + // Implements VideoDecoder + bool Configure(const Settings& settings) override; + int32_t Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t render_time_ms) override; + int32_t RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) override; + int32_t Release() override; + + void Decoded(AlphaCodecStream stream_idx, + VideoFrame* decoded_image, + absl::optional<int32_t> decode_time_ms, + absl::optional<uint8_t> qp); + + private: + // Wrapper class that redirects Decoded() calls. + class AdapterDecodedImageCallback; + + // Holds the decoded image output of a frame. + struct DecodedImageData; + + // Holds the augmenting data of an image + struct AugmentingData; + + void MergeAlphaImages(VideoFrame* decoded_image, + const absl::optional<int32_t>& decode_time_ms, + const absl::optional<uint8_t>& qp, + VideoFrame* multiplex_decoded_image, + const absl::optional<int32_t>& multiplex_decode_time_ms, + const absl::optional<uint8_t>& multiplex_qp, + std::unique_ptr<uint8_t[]> augmenting_data, + uint16_t augmenting_data_length); + + VideoDecoderFactory* const factory_; + const SdpVideoFormat associated_format_; + std::vector<std::unique_ptr<VideoDecoder>> decoders_; + std::vector<std::unique_ptr<AdapterDecodedImageCallback>> adapter_callbacks_; + DecodedImageCallback* decoded_complete_callback_; + + // Holds YUV or AXX decode output of a frame that is identified by timestamp. + std::map<uint32_t /* timestamp */, DecodedImageData> decoded_data_; + std::map<uint32_t /* timestamp */, AugmentingData> decoded_augmenting_data_; + const bool supports_augmenting_data_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_DECODER_ADAPTER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h new file mode 100644 index 0000000000..2e5aad8a5b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_ENCODER_ADAPTER_H_ +#define MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_ENCODER_ADAPTER_H_ + +#include <map> +#include <memory> +#include <vector> + +#include "api/fec_controller_override.h" +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/video_encoder_factory.h" +#include "modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +enum AlphaCodecStream { + kYUVStream = 0, + kAXXStream = 1, + kAlphaCodecStreams = 2, +}; + +class MultiplexEncoderAdapter : public VideoEncoder { + public: + // `factory` is not owned and expected to outlive this class. + MultiplexEncoderAdapter(VideoEncoderFactory* factory, + const SdpVideoFormat& associated_format, + bool supports_augmenting_data = false); + virtual ~MultiplexEncoderAdapter(); + + // Implements VideoEncoder + void SetFecControllerOverride( + FecControllerOverride* fec_controller_override) override; + int InitEncode(const VideoCodec* inst, + const VideoEncoder::Settings& settings) override; + int Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) override; + int RegisterEncodeCompleteCallback(EncodedImageCallback* callback) override; + void SetRates(const RateControlParameters& parameters) override; + void OnPacketLossRateUpdate(float packet_loss_rate) override; + void OnRttUpdate(int64_t rtt_ms) override; + void OnLossNotification(const LossNotification& loss_notification) override; + int Release() override; + EncoderInfo GetEncoderInfo() const override; + + EncodedImageCallback::Result OnEncodedImage( + AlphaCodecStream stream_idx, + const EncodedImage& encodedImage, + const CodecSpecificInfo* codecSpecificInfo); + + private: + // Wrapper class that redirects OnEncodedImage() calls. + class AdapterEncodedImageCallback; + + VideoEncoderFactory* const factory_; + const SdpVideoFormat associated_format_; + std::vector<std::unique_ptr<VideoEncoder>> encoders_; + std::vector<std::unique_ptr<AdapterEncodedImageCallback>> adapter_callbacks_; + EncodedImageCallback* encoded_complete_callback_; + + std::map<uint32_t /* timestamp */, MultiplexImage> stashed_images_ + RTC_GUARDED_BY(mutex_); + + uint16_t picture_index_ = 0; + std::vector<uint8_t> multiplex_dummy_planes_; + + int key_frame_interval_; + EncodedImage combined_image_; + + Mutex mutex_; + + const bool supports_augmented_data_; + int augmenting_data_size_ = 0; + + EncoderInfo encoder_info_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_ENCODER_ADAPTER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc new file mode 100644 index 0000000000..0ad3d3883a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h" + +#include "api/video/encoded_image.h" +#include "api/video/i420_buffer.h" +#include "api/video/video_frame_buffer.h" +#include "common_video/include/video_frame_buffer.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h" +#include "modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +class MultiplexDecoderAdapter::AdapterDecodedImageCallback + : public webrtc::DecodedImageCallback { + public: + AdapterDecodedImageCallback(webrtc::MultiplexDecoderAdapter* adapter, + AlphaCodecStream stream_idx) + : adapter_(adapter), stream_idx_(stream_idx) {} + + void Decoded(VideoFrame& decoded_image, + absl::optional<int32_t> decode_time_ms, + absl::optional<uint8_t> qp) override { + if (!adapter_) + return; + adapter_->Decoded(stream_idx_, &decoded_image, decode_time_ms, qp); + } + int32_t Decoded(VideoFrame& decoded_image) override { + RTC_DCHECK_NOTREACHED(); + return WEBRTC_VIDEO_CODEC_OK; + } + int32_t Decoded(VideoFrame& decoded_image, int64_t decode_time_ms) override { + RTC_DCHECK_NOTREACHED(); + return WEBRTC_VIDEO_CODEC_OK; + } + + private: + MultiplexDecoderAdapter* adapter_; + const AlphaCodecStream stream_idx_; +}; + +struct MultiplexDecoderAdapter::DecodedImageData { + explicit DecodedImageData(AlphaCodecStream stream_idx) + : stream_idx_(stream_idx), + decoded_image_( + VideoFrame::Builder() + .set_video_frame_buffer( + I420Buffer::Create(1 /* width */, 1 /* height */)) + .set_timestamp_rtp(0) + .set_timestamp_us(0) + .set_rotation(kVideoRotation_0) + .build()) { + RTC_DCHECK_EQ(kAXXStream, stream_idx); + } + DecodedImageData(AlphaCodecStream stream_idx, + const VideoFrame& decoded_image, + const absl::optional<int32_t>& decode_time_ms, + const absl::optional<uint8_t>& qp) + : stream_idx_(stream_idx), + decoded_image_(decoded_image), + decode_time_ms_(decode_time_ms), + qp_(qp) {} + + DecodedImageData() = delete; + DecodedImageData(const DecodedImageData&) = delete; + DecodedImageData& operator=(const DecodedImageData&) = delete; + + const AlphaCodecStream stream_idx_; + VideoFrame decoded_image_; + const absl::optional<int32_t> decode_time_ms_; + const absl::optional<uint8_t> qp_; +}; + +struct MultiplexDecoderAdapter::AugmentingData { + AugmentingData(std::unique_ptr<uint8_t[]> augmenting_data, uint16_t data_size) + : data_(std::move(augmenting_data)), size_(data_size) {} + AugmentingData() = delete; + AugmentingData(const AugmentingData&) = delete; + AugmentingData& operator=(const AugmentingData&) = delete; + + std::unique_ptr<uint8_t[]> data_; + const uint16_t size_; +}; + +MultiplexDecoderAdapter::MultiplexDecoderAdapter( + VideoDecoderFactory* factory, + const SdpVideoFormat& associated_format, + bool supports_augmenting_data) + : factory_(factory), + associated_format_(associated_format), + supports_augmenting_data_(supports_augmenting_data) {} + +MultiplexDecoderAdapter::~MultiplexDecoderAdapter() { + Release(); +} + +bool MultiplexDecoderAdapter::Configure(const Settings& settings) { + RTC_DCHECK_EQ(settings.codec_type(), kVideoCodecMultiplex); + Settings associated_settings = settings; + associated_settings.set_codec_type( + PayloadStringToCodecType(associated_format_.name)); + for (size_t i = 0; i < kAlphaCodecStreams; ++i) { + std::unique_ptr<VideoDecoder> decoder = + factory_->CreateVideoDecoder(associated_format_); + if (!decoder->Configure(associated_settings)) { + return false; + } + adapter_callbacks_.emplace_back( + new MultiplexDecoderAdapter::AdapterDecodedImageCallback( + this, static_cast<AlphaCodecStream>(i))); + decoder->RegisterDecodeCompleteCallback(adapter_callbacks_.back().get()); + decoders_.emplace_back(std::move(decoder)); + } + return true; +} + +int32_t MultiplexDecoderAdapter::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t render_time_ms) { + MultiplexImage image = MultiplexEncodedImagePacker::Unpack(input_image); + + if (supports_augmenting_data_) { + RTC_DCHECK(decoded_augmenting_data_.find(input_image.Timestamp()) == + decoded_augmenting_data_.end()); + decoded_augmenting_data_.emplace( + std::piecewise_construct, + std::forward_as_tuple(input_image.Timestamp()), + std::forward_as_tuple(std::move(image.augmenting_data), + image.augmenting_data_size)); + } + + if (image.component_count == 1) { + RTC_DCHECK(decoded_data_.find(input_image.Timestamp()) == + decoded_data_.end()); + decoded_data_.emplace(std::piecewise_construct, + std::forward_as_tuple(input_image.Timestamp()), + std::forward_as_tuple(kAXXStream)); + } + int32_t rv = 0; + for (size_t i = 0; i < image.image_components.size(); i++) { + rv = decoders_[image.image_components[i].component_index]->Decode( + image.image_components[i].encoded_image, missing_frames, + render_time_ms); + if (rv != WEBRTC_VIDEO_CODEC_OK) + return rv; + } + return rv; +} + +int32_t MultiplexDecoderAdapter::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + decoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t MultiplexDecoderAdapter::Release() { + for (auto& decoder : decoders_) { + const int32_t rv = decoder->Release(); + if (rv) + return rv; + } + decoders_.clear(); + adapter_callbacks_.clear(); + return WEBRTC_VIDEO_CODEC_OK; +} + +void MultiplexDecoderAdapter::Decoded(AlphaCodecStream stream_idx, + VideoFrame* decoded_image, + absl::optional<int32_t> decode_time_ms, + absl::optional<uint8_t> qp) { + const auto& other_decoded_data_it = + decoded_data_.find(decoded_image->timestamp()); + const auto& augmenting_data_it = + decoded_augmenting_data_.find(decoded_image->timestamp()); + const bool has_augmenting_data = + augmenting_data_it != decoded_augmenting_data_.end(); + if (other_decoded_data_it != decoded_data_.end()) { + uint16_t augmenting_data_size = + has_augmenting_data ? augmenting_data_it->second.size_ : 0; + std::unique_ptr<uint8_t[]> augmenting_data = + has_augmenting_data ? std::move(augmenting_data_it->second.data_) + : nullptr; + auto& other_image_data = other_decoded_data_it->second; + if (stream_idx == kYUVStream) { + RTC_DCHECK_EQ(kAXXStream, other_image_data.stream_idx_); + MergeAlphaImages(decoded_image, decode_time_ms, qp, + &other_image_data.decoded_image_, + other_image_data.decode_time_ms_, other_image_data.qp_, + std::move(augmenting_data), augmenting_data_size); + } else { + RTC_DCHECK_EQ(kYUVStream, other_image_data.stream_idx_); + RTC_DCHECK_EQ(kAXXStream, stream_idx); + MergeAlphaImages(&other_image_data.decoded_image_, + other_image_data.decode_time_ms_, other_image_data.qp_, + decoded_image, decode_time_ms, qp, + std::move(augmenting_data), augmenting_data_size); + } + decoded_data_.erase(decoded_data_.begin(), other_decoded_data_it); + if (has_augmenting_data) { + decoded_augmenting_data_.erase(decoded_augmenting_data_.begin(), + augmenting_data_it); + } + return; + } + RTC_DCHECK(decoded_data_.find(decoded_image->timestamp()) == + decoded_data_.end()); + decoded_data_.emplace( + std::piecewise_construct, + std::forward_as_tuple(decoded_image->timestamp()), + std::forward_as_tuple(stream_idx, *decoded_image, decode_time_ms, qp)); +} + +void MultiplexDecoderAdapter::MergeAlphaImages( + VideoFrame* decoded_image, + const absl::optional<int32_t>& decode_time_ms, + const absl::optional<uint8_t>& qp, + VideoFrame* alpha_decoded_image, + const absl::optional<int32_t>& alpha_decode_time_ms, + const absl::optional<uint8_t>& alpha_qp, + std::unique_ptr<uint8_t[]> augmenting_data, + uint16_t augmenting_data_length) { + rtc::scoped_refptr<VideoFrameBuffer> merged_buffer; + if (!alpha_decoded_image->timestamp()) { + merged_buffer = decoded_image->video_frame_buffer(); + } else { + rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer = + decoded_image->video_frame_buffer()->ToI420(); + rtc::scoped_refptr<webrtc::I420BufferInterface> alpha_buffer = + alpha_decoded_image->video_frame_buffer()->ToI420(); + RTC_DCHECK_EQ(yuv_buffer->width(), alpha_buffer->width()); + RTC_DCHECK_EQ(yuv_buffer->height(), alpha_buffer->height()); + merged_buffer = WrapI420ABuffer( + yuv_buffer->width(), yuv_buffer->height(), yuv_buffer->DataY(), + yuv_buffer->StrideY(), yuv_buffer->DataU(), yuv_buffer->StrideU(), + yuv_buffer->DataV(), yuv_buffer->StrideV(), alpha_buffer->DataY(), + alpha_buffer->StrideY(), + // To keep references alive. + [yuv_buffer, alpha_buffer] {}); + } + if (supports_augmenting_data_) { + merged_buffer = rtc::make_ref_counted<AugmentedVideoFrameBuffer>( + merged_buffer, std::move(augmenting_data), augmenting_data_length); + } + + VideoFrame merged_image = VideoFrame::Builder() + .set_video_frame_buffer(merged_buffer) + .set_timestamp_rtp(decoded_image->timestamp()) + .set_timestamp_us(0) + .set_rotation(decoded_image->rotation()) + .set_id(decoded_image->id()) + .set_packet_infos(decoded_image->packet_infos()) + .build(); + decoded_complete_callback_->Decoded(merged_image, decode_time_ms, qp); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc new file mode 100644 index 0000000000..0f05d1a89c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h" + +#include <cstring> +#include <utility> + +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/checks.h" + +namespace webrtc { +int PackHeader(uint8_t* buffer, MultiplexImageHeader header) { + int offset = 0; + ByteWriter<uint8_t>::WriteBigEndian(buffer + offset, header.component_count); + offset += sizeof(uint8_t); + + ByteWriter<uint16_t>::WriteBigEndian(buffer + offset, header.image_index); + offset += sizeof(uint16_t); + + ByteWriter<uint16_t>::WriteBigEndian(buffer + offset, + header.augmenting_data_size); + offset += sizeof(uint16_t); + + ByteWriter<uint32_t>::WriteBigEndian(buffer + offset, + header.augmenting_data_offset); + offset += sizeof(uint32_t); + + ByteWriter<uint32_t>::WriteBigEndian(buffer + offset, + header.first_component_header_offset); + offset += sizeof(uint32_t); + + RTC_DCHECK_EQ(offset, kMultiplexImageHeaderSize); + return offset; +} + +MultiplexImageHeader UnpackHeader(const uint8_t* buffer) { + MultiplexImageHeader header; + int offset = 0; + header.component_count = ByteReader<uint8_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint8_t); + + header.image_index = ByteReader<uint16_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint16_t); + + header.augmenting_data_size = + ByteReader<uint16_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint16_t); + + header.augmenting_data_offset = + ByteReader<uint32_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint32_t); + + header.first_component_header_offset = + ByteReader<uint32_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint32_t); + + RTC_DCHECK_EQ(offset, kMultiplexImageHeaderSize); + return header; +} + +int PackFrameHeader(uint8_t* buffer, + MultiplexImageComponentHeader frame_header) { + int offset = 0; + ByteWriter<uint32_t>::WriteBigEndian( + buffer + offset, frame_header.next_component_header_offset); + offset += sizeof(uint32_t); + + ByteWriter<uint8_t>::WriteBigEndian(buffer + offset, + frame_header.component_index); + offset += sizeof(uint8_t); + + ByteWriter<uint32_t>::WriteBigEndian(buffer + offset, + frame_header.bitstream_offset); + offset += sizeof(uint32_t); + + ByteWriter<uint32_t>::WriteBigEndian(buffer + offset, + frame_header.bitstream_length); + offset += sizeof(uint32_t); + + ByteWriter<uint8_t>::WriteBigEndian(buffer + offset, frame_header.codec_type); + offset += sizeof(uint8_t); + + ByteWriter<uint8_t>::WriteBigEndian( + buffer + offset, static_cast<uint8_t>(frame_header.frame_type)); + offset += sizeof(uint8_t); + + RTC_DCHECK_EQ(offset, kMultiplexImageComponentHeaderSize); + return offset; +} + +MultiplexImageComponentHeader UnpackFrameHeader(const uint8_t* buffer) { + MultiplexImageComponentHeader frame_header; + int offset = 0; + + frame_header.next_component_header_offset = + ByteReader<uint32_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint32_t); + + frame_header.component_index = + ByteReader<uint8_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint8_t); + + frame_header.bitstream_offset = + ByteReader<uint32_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint32_t); + + frame_header.bitstream_length = + ByteReader<uint32_t>::ReadBigEndian(buffer + offset); + offset += sizeof(uint32_t); + + // This makes the wire format depend on the numeric values of the + // VideoCodecType and VideoFrameType enum constants. + frame_header.codec_type = static_cast<VideoCodecType>( + ByteReader<uint8_t>::ReadBigEndian(buffer + offset)); + offset += sizeof(uint8_t); + + frame_header.frame_type = static_cast<VideoFrameType>( + ByteReader<uint8_t>::ReadBigEndian(buffer + offset)); + offset += sizeof(uint8_t); + + RTC_DCHECK_EQ(offset, kMultiplexImageComponentHeaderSize); + return frame_header; +} + +void PackBitstream(uint8_t* buffer, MultiplexImageComponent image) { + memcpy(buffer, image.encoded_image.data(), image.encoded_image.size()); +} + +MultiplexImage::MultiplexImage(uint16_t picture_index, + uint8_t frame_count, + std::unique_ptr<uint8_t[]> augmenting_data, + uint16_t augmenting_data_size) + : image_index(picture_index), + component_count(frame_count), + augmenting_data_size(augmenting_data_size), + augmenting_data(std::move(augmenting_data)) {} + +EncodedImage MultiplexEncodedImagePacker::PackAndRelease( + const MultiplexImage& multiplex_image) { + MultiplexImageHeader header; + std::vector<MultiplexImageComponentHeader> frame_headers; + + header.component_count = multiplex_image.component_count; + header.image_index = multiplex_image.image_index; + int header_offset = kMultiplexImageHeaderSize; + header.first_component_header_offset = header_offset; + header.augmenting_data_offset = + header_offset + + kMultiplexImageComponentHeaderSize * header.component_count; + header.augmenting_data_size = multiplex_image.augmenting_data_size; + int bitstream_offset = + header.augmenting_data_offset + header.augmenting_data_size; + + const std::vector<MultiplexImageComponent>& images = + multiplex_image.image_components; + EncodedImage combined_image = images[0].encoded_image; + for (size_t i = 0; i < images.size(); i++) { + MultiplexImageComponentHeader frame_header; + header_offset += kMultiplexImageComponentHeaderSize; + frame_header.next_component_header_offset = + (i == images.size() - 1) ? 0 : header_offset; + frame_header.component_index = images[i].component_index; + + frame_header.bitstream_offset = bitstream_offset; + frame_header.bitstream_length = + static_cast<uint32_t>(images[i].encoded_image.size()); + bitstream_offset += frame_header.bitstream_length; + + frame_header.codec_type = images[i].codec_type; + frame_header.frame_type = images[i].encoded_image._frameType; + + // As long as one component is delta frame, we have to mark the combined + // frame as delta frame, because it is necessary for all components to be + // key frame so as to decode the whole image without previous frame data. + // Thus only when all components are key frames, we can mark the combined + // frame as key frame. + if (frame_header.frame_type == VideoFrameType::kVideoFrameDelta) { + combined_image._frameType = VideoFrameType::kVideoFrameDelta; + } + + frame_headers.push_back(frame_header); + } + + auto buffer = EncodedImageBuffer::Create(bitstream_offset); + combined_image.SetEncodedData(buffer); + + // header + header_offset = PackHeader(buffer->data(), header); + RTC_DCHECK_EQ(header.first_component_header_offset, + kMultiplexImageHeaderSize); + + // Frame Header + for (size_t i = 0; i < images.size(); i++) { + int relative_offset = + PackFrameHeader(buffer->data() + header_offset, frame_headers[i]); + RTC_DCHECK_EQ(relative_offset, kMultiplexImageComponentHeaderSize); + + header_offset = frame_headers[i].next_component_header_offset; + RTC_DCHECK_EQ(header_offset, + (i == images.size() - 1) + ? 0 + : (kMultiplexImageHeaderSize + + kMultiplexImageComponentHeaderSize * (i + 1))); + } + + // Augmenting Data + if (multiplex_image.augmenting_data_size != 0) { + memcpy(buffer->data() + header.augmenting_data_offset, + multiplex_image.augmenting_data.get(), + multiplex_image.augmenting_data_size); + } + + // Bitstreams + for (size_t i = 0; i < images.size(); i++) { + PackBitstream(buffer->data() + frame_headers[i].bitstream_offset, + images[i]); + } + + return combined_image; +} + +MultiplexImage MultiplexEncodedImagePacker::Unpack( + const EncodedImage& combined_image) { + const MultiplexImageHeader& header = UnpackHeader(combined_image.data()); + + std::vector<MultiplexImageComponentHeader> frame_headers; + int header_offset = header.first_component_header_offset; + + while (header_offset > 0) { + frame_headers.push_back( + UnpackFrameHeader(combined_image.data() + header_offset)); + header_offset = frame_headers.back().next_component_header_offset; + } + + RTC_DCHECK_LE(frame_headers.size(), header.component_count); + std::unique_ptr<uint8_t[]> augmenting_data = nullptr; + if (header.augmenting_data_size != 0) { + augmenting_data = + std::unique_ptr<uint8_t[]>(new uint8_t[header.augmenting_data_size]); + memcpy(augmenting_data.get(), + combined_image.data() + header.augmenting_data_offset, + header.augmenting_data_size); + } + + MultiplexImage multiplex_image(header.image_index, header.component_count, + std::move(augmenting_data), + header.augmenting_data_size); + + for (size_t i = 0; i < frame_headers.size(); i++) { + MultiplexImageComponent image_component; + image_component.component_index = frame_headers[i].component_index; + image_component.codec_type = frame_headers[i].codec_type; + + EncodedImage encoded_image = combined_image; + encoded_image.SetTimestamp(combined_image.Timestamp()); + encoded_image._frameType = frame_headers[i].frame_type; + encoded_image.SetEncodedData(EncodedImageBuffer::Create( + combined_image.data() + frame_headers[i].bitstream_offset, + frame_headers[i].bitstream_length)); + + image_component.encoded_image = encoded_image; + + multiplex_image.image_components.push_back(image_component); + } + + return multiplex_image; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h new file mode 100644 index 0000000000..299a0159d5 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_MULTIPLEX_MULTIPLEX_ENCODED_IMAGE_PACKER_H_ +#define MODULES_VIDEO_CODING_CODECS_MULTIPLEX_MULTIPLEX_ENCODED_IMAGE_PACKER_H_ + +#include <cstdint> +#include <memory> +#include <vector> + +#include "api/video/encoded_image.h" +#include "api/video_codecs/video_codec.h" + +namespace webrtc { + +// Struct describing the whole bundle of multiple frames of an image. +// This struct is expected to be the set in the beginning of a picture's +// bitstream data. +struct MultiplexImageHeader { + // The number of frame components making up the complete picture data. + // For example, `frame_count` = 2 for the case of YUV frame with Alpha frame. + uint8_t component_count; + + // The increasing image ID given by the encoder. For different components + // of a single picture, they have the same `picture_index`. + uint16_t image_index; + + // The location of the first MultiplexImageComponentHeader in the bitstream, + // in terms of byte from the beginning of the bitstream. + uint32_t first_component_header_offset; + + // The location of the augmenting data in the bitstream, in terms of bytes + // from the beginning of the bitstream + uint32_t augmenting_data_offset; + + // The size of the augmenting data in the bitstream it terms of byte + uint16_t augmenting_data_size; +}; +const int kMultiplexImageHeaderSize = + sizeof(uint8_t) + 2 * sizeof(uint16_t) + 2 * sizeof(uint32_t); + +// Struct describing the individual image component's content. +struct MultiplexImageComponentHeader { + // The location of the next MultiplexImageComponentHeader in the bitstream, + // in terms of the byte from the beginning of the bitstream; + uint32_t next_component_header_offset; + + // Identifies which component this frame represent, i.e. YUV frame vs Alpha + // frame. + uint8_t component_index; + + // The location of the real encoded image data of the frame in the bitstream, + // in terms of byte from the beginning of the bitstream. + uint32_t bitstream_offset; + + // Indicates the number of bytes of the encoded image data. + uint32_t bitstream_length; + + // Indicated the underlying VideoCodecType of the frame, i.e. VP9 or VP8 etc. + VideoCodecType codec_type; + + // Indicated the underlying frame is a key frame or delta frame. + VideoFrameType frame_type; +}; +const int kMultiplexImageComponentHeaderSize = + sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t) + + sizeof(uint8_t) + sizeof(uint8_t); + +// Struct holding the encoded image for one component. +struct MultiplexImageComponent { + // Indicated the underlying VideoCodecType of the frame, i.e. VP9 or VP8 etc. + VideoCodecType codec_type; + + // Identifies which component this frame represent, i.e. YUV frame vs Alpha + // frame. + uint8_t component_index; + + // Stores the actual frame data of the encoded image. + EncodedImage encoded_image; +}; + +// Struct holding the whole frame bundle of components of an image. +struct MultiplexImage { + uint16_t image_index; + uint8_t component_count; + uint16_t augmenting_data_size; + std::unique_ptr<uint8_t[]> augmenting_data; + std::vector<MultiplexImageComponent> image_components; + + MultiplexImage(uint16_t picture_index, + uint8_t component_count, + std::unique_ptr<uint8_t[]> augmenting_data, + uint16_t augmenting_data_size); +}; + +// A utility class providing conversion between two representations of a +// multiplex image frame: +// 1. Packed version is just one encoded image, we pack all necessary metadata +// in the bitstream as headers. +// 2. Unpacked version is essentially a list of encoded images, one for one +// component. +class MultiplexEncodedImagePacker { + public: + // Note: It is caller responsibility to release the buffer of the result. + static EncodedImage PackAndRelease(const MultiplexImage& image); + + // Note: The image components just share the memory with `combined_image`. + static MultiplexImage Unpack(const EncodedImage& combined_image); +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_MULTIPLEX_MULTIPLEX_ENCODED_IMAGE_PACKER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc new file mode 100644 index 0000000000..80744e2d8c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h" + +#include <cstring> + +#include "api/video/encoded_image.h" +#include "api/video_codecs/video_encoder.h" +#include "common_video/include/video_frame_buffer.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "media/base/video_common.h" +#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +// Callback wrapper that helps distinguish returned results from `encoders_` +// instances. +class MultiplexEncoderAdapter::AdapterEncodedImageCallback + : public webrtc::EncodedImageCallback { + public: + AdapterEncodedImageCallback(webrtc::MultiplexEncoderAdapter* adapter, + AlphaCodecStream stream_idx) + : adapter_(adapter), stream_idx_(stream_idx) {} + + EncodedImageCallback::Result OnEncodedImage( + const EncodedImage& encoded_image, + const CodecSpecificInfo* codec_specific_info) override { + if (!adapter_) + return Result(Result::OK); + return adapter_->OnEncodedImage(stream_idx_, encoded_image, + codec_specific_info); + } + + private: + MultiplexEncoderAdapter* adapter_; + const AlphaCodecStream stream_idx_; +}; + +MultiplexEncoderAdapter::MultiplexEncoderAdapter( + VideoEncoderFactory* factory, + const SdpVideoFormat& associated_format, + bool supports_augmented_data) + : factory_(factory), + associated_format_(associated_format), + encoded_complete_callback_(nullptr), + key_frame_interval_(0), + supports_augmented_data_(supports_augmented_data) {} + +MultiplexEncoderAdapter::~MultiplexEncoderAdapter() { + Release(); +} + +void MultiplexEncoderAdapter::SetFecControllerOverride( + FecControllerOverride* fec_controller_override) { + // Ignored. +} + +int MultiplexEncoderAdapter::InitEncode( + const VideoCodec* inst, + const VideoEncoder::Settings& settings) { + const size_t buffer_size = + CalcBufferSize(VideoType::kI420, inst->width, inst->height); + multiplex_dummy_planes_.resize(buffer_size); + // It is more expensive to encode 0x00, so use 0x80 instead. + std::fill(multiplex_dummy_planes_.begin(), multiplex_dummy_planes_.end(), + 0x80); + + RTC_DCHECK_EQ(kVideoCodecMultiplex, inst->codecType); + VideoCodec video_codec = *inst; + video_codec.codecType = PayloadStringToCodecType(associated_format_.name); + + // Take over the key frame interval at adapter level, because we have to + // sync the key frames for both sub-encoders. + switch (video_codec.codecType) { + case kVideoCodecVP8: + key_frame_interval_ = video_codec.VP8()->keyFrameInterval; + video_codec.VP8()->keyFrameInterval = 0; + break; + case kVideoCodecVP9: + key_frame_interval_ = video_codec.VP9()->keyFrameInterval; + video_codec.VP9()->keyFrameInterval = 0; + break; + case kVideoCodecH264: + key_frame_interval_ = video_codec.H264()->keyFrameInterval; + video_codec.H264()->keyFrameInterval = 0; + break; + default: + break; + } + + encoder_info_ = EncoderInfo(); + encoder_info_.implementation_name = "MultiplexEncoderAdapter ("; + encoder_info_.requested_resolution_alignment = 1; + encoder_info_.apply_alignment_to_all_simulcast_layers = false; + // This needs to be false so that we can do the split in Encode(). + encoder_info_.supports_native_handle = false; + + for (size_t i = 0; i < kAlphaCodecStreams; ++i) { + std::unique_ptr<VideoEncoder> encoder = + factory_->CreateVideoEncoder(associated_format_); + const int rv = encoder->InitEncode(&video_codec, settings); + if (rv) { + RTC_LOG(LS_ERROR) << "Failed to create multiplex codec index " << i; + return rv; + } + adapter_callbacks_.emplace_back(new AdapterEncodedImageCallback( + this, static_cast<AlphaCodecStream>(i))); + encoder->RegisterEncodeCompleteCallback(adapter_callbacks_.back().get()); + + const EncoderInfo& encoder_impl_info = encoder->GetEncoderInfo(); + encoder_info_.implementation_name += encoder_impl_info.implementation_name; + if (i != kAlphaCodecStreams - 1) { + encoder_info_.implementation_name += ", "; + } + // Uses hardware support if any of the encoders uses it. + // For example, if we are having issues with down-scaling due to + // pipelining delay in HW encoders we need higher encoder usage + // thresholds in CPU adaptation. + if (i == 0) { + encoder_info_.is_hardware_accelerated = + encoder_impl_info.is_hardware_accelerated; + } else { + encoder_info_.is_hardware_accelerated |= + encoder_impl_info.is_hardware_accelerated; + } + + encoder_info_.requested_resolution_alignment = cricket::LeastCommonMultiple( + encoder_info_.requested_resolution_alignment, + encoder_impl_info.requested_resolution_alignment); + + if (encoder_impl_info.apply_alignment_to_all_simulcast_layers) { + encoder_info_.apply_alignment_to_all_simulcast_layers = true; + } + + encoders_.emplace_back(std::move(encoder)); + } + encoder_info_.implementation_name += ")"; + + return WEBRTC_VIDEO_CODEC_OK; +} + +int MultiplexEncoderAdapter::Encode( + const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) { + if (!encoded_complete_callback_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + // The input image is forwarded as-is, unless it is a native buffer and + // `supports_augmented_data_` is true in which case we need to map it in order + // to access the underlying AugmentedVideoFrameBuffer. + VideoFrame forwarded_image = input_image; + if (supports_augmented_data_ && + forwarded_image.video_frame_buffer()->type() == + VideoFrameBuffer::Type::kNative) { + auto info = GetEncoderInfo(); + rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer = + forwarded_image.video_frame_buffer()->GetMappedFrameBuffer( + info.preferred_pixel_formats); + if (!mapped_buffer) { + // Unable to map the buffer. + return WEBRTC_VIDEO_CODEC_ERROR; + } + forwarded_image.set_video_frame_buffer(std::move(mapped_buffer)); + } + + std::vector<VideoFrameType> adjusted_frame_types; + if (key_frame_interval_ > 0 && picture_index_ % key_frame_interval_ == 0) { + adjusted_frame_types.push_back(VideoFrameType::kVideoFrameKey); + } else { + adjusted_frame_types.push_back(VideoFrameType::kVideoFrameDelta); + } + const bool has_alpha = forwarded_image.video_frame_buffer()->type() == + VideoFrameBuffer::Type::kI420A; + std::unique_ptr<uint8_t[]> augmenting_data = nullptr; + uint16_t augmenting_data_length = 0; + AugmentedVideoFrameBuffer* augmented_video_frame_buffer = nullptr; + if (supports_augmented_data_) { + augmented_video_frame_buffer = static_cast<AugmentedVideoFrameBuffer*>( + forwarded_image.video_frame_buffer().get()); + augmenting_data_length = + augmented_video_frame_buffer->GetAugmentingDataSize(); + augmenting_data = + std::unique_ptr<uint8_t[]>(new uint8_t[augmenting_data_length]); + memcpy(augmenting_data.get(), + augmented_video_frame_buffer->GetAugmentingData(), + augmenting_data_length); + augmenting_data_size_ = augmenting_data_length; + } + + { + MutexLock lock(&mutex_); + stashed_images_.emplace( + std::piecewise_construct, + std::forward_as_tuple(forwarded_image.timestamp()), + std::forward_as_tuple( + picture_index_, has_alpha ? kAlphaCodecStreams : 1, + std::move(augmenting_data), augmenting_data_length)); + } + + ++picture_index_; + + // Encode YUV + int rv = + encoders_[kYUVStream]->Encode(forwarded_image, &adjusted_frame_types); + + // If we do not receive an alpha frame, we send a single frame for this + // `picture_index_`. The receiver will receive `frame_count` as 1 which + // specifies this case. + if (rv || !has_alpha) + return rv; + + // Encode AXX + rtc::scoped_refptr<VideoFrameBuffer> frame_buffer = + supports_augmented_data_ + ? augmented_video_frame_buffer->GetVideoFrameBuffer() + : forwarded_image.video_frame_buffer(); + const I420ABufferInterface* yuva_buffer = frame_buffer->GetI420A(); + rtc::scoped_refptr<I420BufferInterface> alpha_buffer = + WrapI420Buffer(forwarded_image.width(), forwarded_image.height(), + yuva_buffer->DataA(), yuva_buffer->StrideA(), + multiplex_dummy_planes_.data(), yuva_buffer->StrideU(), + multiplex_dummy_planes_.data(), yuva_buffer->StrideV(), + // To keep reference alive. + [frame_buffer] {}); + VideoFrame alpha_image = + VideoFrame::Builder() + .set_video_frame_buffer(alpha_buffer) + .set_timestamp_rtp(forwarded_image.timestamp()) + .set_timestamp_ms(forwarded_image.render_time_ms()) + .set_rotation(forwarded_image.rotation()) + .set_id(forwarded_image.id()) + .set_packet_infos(forwarded_image.packet_infos()) + .build(); + rv = encoders_[kAXXStream]->Encode(alpha_image, &adjusted_frame_types); + return rv; +} + +int MultiplexEncoderAdapter::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +void MultiplexEncoderAdapter::SetRates( + const RateControlParameters& parameters) { + VideoBitrateAllocation bitrate_allocation(parameters.bitrate); + bitrate_allocation.SetBitrate( + 0, 0, parameters.bitrate.GetBitrate(0, 0) - augmenting_data_size_); + for (auto& encoder : encoders_) { + // TODO(emircan): `framerate` is used to calculate duration in encoder + // instances. We report the total frame rate to keep real time for now. + // Remove this after refactoring duration logic. + encoder->SetRates(RateControlParameters( + bitrate_allocation, + static_cast<uint32_t>(encoders_.size() * parameters.framerate_fps), + parameters.bandwidth_allocation - + DataRate::BitsPerSec(augmenting_data_size_))); + } +} + +void MultiplexEncoderAdapter::OnPacketLossRateUpdate(float packet_loss_rate) { + for (auto& encoder : encoders_) { + encoder->OnPacketLossRateUpdate(packet_loss_rate); + } +} + +void MultiplexEncoderAdapter::OnRttUpdate(int64_t rtt_ms) { + for (auto& encoder : encoders_) { + encoder->OnRttUpdate(rtt_ms); + } +} + +void MultiplexEncoderAdapter::OnLossNotification( + const LossNotification& loss_notification) { + for (auto& encoder : encoders_) { + encoder->OnLossNotification(loss_notification); + } +} + +int MultiplexEncoderAdapter::Release() { + for (auto& encoder : encoders_) { + const int rv = encoder->Release(); + if (rv) + return rv; + } + encoders_.clear(); + adapter_callbacks_.clear(); + MutexLock lock(&mutex_); + stashed_images_.clear(); + + return WEBRTC_VIDEO_CODEC_OK; +} + +VideoEncoder::EncoderInfo MultiplexEncoderAdapter::GetEncoderInfo() const { + return encoder_info_; +} + +EncodedImageCallback::Result MultiplexEncoderAdapter::OnEncodedImage( + AlphaCodecStream stream_idx, + const EncodedImage& encodedImage, + const CodecSpecificInfo* codecSpecificInfo) { + // Save the image + MultiplexImageComponent image_component; + image_component.component_index = stream_idx; + image_component.codec_type = + PayloadStringToCodecType(associated_format_.name); + image_component.encoded_image = encodedImage; + + MutexLock lock(&mutex_); + const auto& stashed_image_itr = + stashed_images_.find(encodedImage.Timestamp()); + const auto& stashed_image_next_itr = std::next(stashed_image_itr, 1); + RTC_DCHECK(stashed_image_itr != stashed_images_.end()); + MultiplexImage& stashed_image = stashed_image_itr->second; + const uint8_t frame_count = stashed_image.component_count; + + stashed_image.image_components.push_back(image_component); + + if (stashed_image.image_components.size() == frame_count) { + // Complete case + for (auto iter = stashed_images_.begin(); + iter != stashed_images_.end() && iter != stashed_image_next_itr; + iter++) { + // No image at all, skip. + if (iter->second.image_components.size() == 0) + continue; + + // We have to send out those stashed frames, otherwise the delta frame + // dependency chain is broken. + combined_image_ = + MultiplexEncodedImagePacker::PackAndRelease(iter->second); + + CodecSpecificInfo codec_info = *codecSpecificInfo; + codec_info.codecType = kVideoCodecMultiplex; + encoded_complete_callback_->OnEncodedImage(combined_image_, &codec_info); + } + + stashed_images_.erase(stashed_images_.begin(), stashed_image_next_itr); + } + return EncodedImageCallback::Result(EncodedImageCallback::Result::OK); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc new file mode 100644 index 0000000000..be0f5deb52 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> + +#include <cstdint> +#include <memory> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/scoped_refptr.h" +#include "api/test/mock_video_decoder_factory.h" +#include "api/test/mock_video_encoder_factory.h" +#include "api/video/encoded_image.h" +#include "api/video/video_frame.h" +#include "api/video/video_frame_buffer.h" +#include "api/video/video_rotation.h" +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_encoder.h" +#include "common_video/include/video_frame_buffer.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h" +#include "modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h" +#include "modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h" +#include "modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.h" +#include "modules/video_coding/codecs/test/video_codec_unittest.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/video_codec_settings.h" + +using ::testing::_; +using ::testing::Return; + +namespace webrtc { + +constexpr const char* kMultiplexAssociatedCodecName = cricket::kVp9CodecName; +const VideoCodecType kMultiplexAssociatedCodecType = + PayloadStringToCodecType(kMultiplexAssociatedCodecName); + +class TestMultiplexAdapter : public VideoCodecUnitTest, + public ::testing::WithParamInterface< + bool /* supports_augmenting_data */> { + public: + TestMultiplexAdapter() + : decoder_factory_(new webrtc::MockVideoDecoderFactory), + encoder_factory_(new webrtc::MockVideoEncoderFactory), + supports_augmenting_data_(GetParam()) {} + + protected: + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return std::make_unique<MultiplexDecoderAdapter>( + decoder_factory_.get(), SdpVideoFormat(kMultiplexAssociatedCodecName), + supports_augmenting_data_); + } + + std::unique_ptr<VideoEncoder> CreateEncoder() override { + return std::make_unique<MultiplexEncoderAdapter>( + encoder_factory_.get(), SdpVideoFormat(kMultiplexAssociatedCodecName), + supports_augmenting_data_); + } + + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kMultiplexAssociatedCodecType, codec_settings); + codec_settings->VP9()->numberOfTemporalLayers = 1; + codec_settings->VP9()->numberOfSpatialLayers = 1; + codec_settings->codecType = webrtc::kVideoCodecMultiplex; + } + + std::unique_ptr<VideoFrame> CreateDataAugmentedInputFrame( + VideoFrame* video_frame) { + rtc::scoped_refptr<VideoFrameBuffer> video_buffer = + video_frame->video_frame_buffer(); + std::unique_ptr<uint8_t[]> data = + std::unique_ptr<uint8_t[]>(new uint8_t[16]); + for (int i = 0; i < 16; i++) { + data[i] = i; + } + auto augmented_video_frame_buffer = + rtc::make_ref_counted<AugmentedVideoFrameBuffer>(video_buffer, + std::move(data), 16); + return std::make_unique<VideoFrame>( + VideoFrame::Builder() + .set_video_frame_buffer(augmented_video_frame_buffer) + .set_timestamp_rtp(video_frame->timestamp()) + .set_timestamp_ms(video_frame->render_time_ms()) + .set_rotation(video_frame->rotation()) + .set_id(video_frame->id()) + .build()); + } + + std::unique_ptr<VideoFrame> CreateI420AInputFrame() { + VideoFrame input_frame = NextInputFrame(); + rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer = + input_frame.video_frame_buffer()->ToI420(); + rtc::scoped_refptr<I420ABufferInterface> yuva_buffer = WrapI420ABuffer( + yuv_buffer->width(), yuv_buffer->height(), yuv_buffer->DataY(), + yuv_buffer->StrideY(), yuv_buffer->DataU(), yuv_buffer->StrideU(), + yuv_buffer->DataV(), yuv_buffer->StrideV(), yuv_buffer->DataY(), + yuv_buffer->StrideY(), + // To keep reference alive. + [yuv_buffer] {}); + return std::make_unique<VideoFrame>(VideoFrame::Builder() + .set_video_frame_buffer(yuva_buffer) + .set_timestamp_rtp(123) + .set_timestamp_ms(345) + .set_rotation(kVideoRotation_0) + .build()); + } + + std::unique_ptr<VideoFrame> CreateInputFrame(bool contains_alpha) { + std::unique_ptr<VideoFrame> video_frame; + if (contains_alpha) { + video_frame = CreateI420AInputFrame(); + } else { + VideoFrame next_frame = NextInputFrame(); + video_frame = std::make_unique<VideoFrame>( + VideoFrame::Builder() + .set_video_frame_buffer(next_frame.video_frame_buffer()) + .set_timestamp_rtp(next_frame.timestamp()) + .set_timestamp_ms(next_frame.render_time_ms()) + .set_rotation(next_frame.rotation()) + .set_id(next_frame.id()) + .build()); + } + if (supports_augmenting_data_) { + video_frame = CreateDataAugmentedInputFrame(video_frame.get()); + } + + return video_frame; + } + + void CheckData(rtc::scoped_refptr<VideoFrameBuffer> video_frame_buffer) { + if (!supports_augmenting_data_) { + return; + } + AugmentedVideoFrameBuffer* augmented_buffer = + static_cast<AugmentedVideoFrameBuffer*>(video_frame_buffer.get()); + EXPECT_EQ(augmented_buffer->GetAugmentingDataSize(), 16); + uint8_t* data = augmented_buffer->GetAugmentingData(); + for (int i = 0; i < 16; i++) { + EXPECT_EQ(data[i], i); + } + } + + std::unique_ptr<VideoFrame> ExtractAXXFrame(const VideoFrame& video_frame) { + rtc::scoped_refptr<VideoFrameBuffer> video_frame_buffer = + video_frame.video_frame_buffer(); + if (supports_augmenting_data_) { + AugmentedVideoFrameBuffer* augmentedBuffer = + static_cast<AugmentedVideoFrameBuffer*>(video_frame_buffer.get()); + video_frame_buffer = augmentedBuffer->GetVideoFrameBuffer(); + } + const I420ABufferInterface* yuva_buffer = video_frame_buffer->GetI420A(); + rtc::scoped_refptr<I420BufferInterface> axx_buffer = WrapI420Buffer( + yuva_buffer->width(), yuva_buffer->height(), yuva_buffer->DataA(), + yuva_buffer->StrideA(), yuva_buffer->DataU(), yuva_buffer->StrideU(), + yuva_buffer->DataV(), yuva_buffer->StrideV(), [video_frame_buffer] {}); + return std::make_unique<VideoFrame>(VideoFrame::Builder() + .set_video_frame_buffer(axx_buffer) + .set_timestamp_rtp(123) + .set_timestamp_ms(345) + .set_rotation(kVideoRotation_0) + .build()); + } + + private: + void SetUp() override { + EXPECT_CALL(*decoder_factory_, Die); + // The decoders/encoders will be owned by the caller of + // CreateVideoDecoder()/CreateVideoEncoder(). + EXPECT_CALL(*decoder_factory_, CreateVideoDecoder) + .Times(2) + .WillRepeatedly([] { return VP9Decoder::Create(); }); + + EXPECT_CALL(*encoder_factory_, Die); + EXPECT_CALL(*encoder_factory_, CreateVideoEncoder) + .Times(2) + .WillRepeatedly([] { return VP9Encoder::Create(); }); + + VideoCodecUnitTest::SetUp(); + } + + const std::unique_ptr<webrtc::MockVideoDecoderFactory> decoder_factory_; + const std::unique_ptr<webrtc::MockVideoEncoderFactory> encoder_factory_; + const bool supports_augmenting_data_; +}; + +// TODO(emircan): Currently VideoCodecUnitTest tests do a complete setup +// step that goes beyond constructing `decoder_`. Simplify these tests to do +// less. +TEST_P(TestMultiplexAdapter, ConstructAndDestructDecoder) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Release()); +} + +TEST_P(TestMultiplexAdapter, ConstructAndDestructEncoder) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); +} + +TEST_P(TestMultiplexAdapter, EncodeDecodeI420Frame) { + std::unique_ptr<VideoFrame> input_frame = CreateInputFrame(false); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, -1)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + EXPECT_GT(I420PSNR(input_frame.get(), decoded_frame.get()), 36); + CheckData(decoded_frame->video_frame_buffer()); +} + +TEST_P(TestMultiplexAdapter, EncodeDecodeI420AFrame) { + std::unique_ptr<VideoFrame> yuva_frame = CreateInputFrame(true); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*yuva_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + EXPECT_GT(I420PSNR(yuva_frame.get(), decoded_frame.get()), 36); + + // Find PSNR for AXX bits. + std::unique_ptr<VideoFrame> input_axx_frame = ExtractAXXFrame(*yuva_frame); + std::unique_ptr<VideoFrame> output_axx_frame = + ExtractAXXFrame(*decoded_frame); + EXPECT_GT(I420PSNR(input_axx_frame.get(), output_axx_frame.get()), 47); + + CheckData(decoded_frame->video_frame_buffer()); +} + +TEST_P(TestMultiplexAdapter, CheckSingleFrameEncodedBitstream) { + std::unique_ptr<VideoFrame> input_frame = CreateInputFrame(false); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType); + EXPECT_FALSE(encoded_frame.SpatialIndex()); + + const MultiplexImage& unpacked_frame = + MultiplexEncodedImagePacker::Unpack(encoded_frame); + EXPECT_EQ(0, unpacked_frame.image_index); + EXPECT_EQ(1, unpacked_frame.component_count); + const MultiplexImageComponent& component = unpacked_frame.image_components[0]; + EXPECT_EQ(0, component.component_index); + EXPECT_NE(nullptr, component.encoded_image.data()); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, component.encoded_image._frameType); +} + +TEST_P(TestMultiplexAdapter, CheckDoubleFramesEncodedBitstream) { + std::unique_ptr<VideoFrame> yuva_frame = CreateInputFrame(true); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*yuva_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType); + EXPECT_FALSE(encoded_frame.SpatialIndex()); + + const MultiplexImage& unpacked_frame = + MultiplexEncodedImagePacker::Unpack(encoded_frame); + EXPECT_EQ(0, unpacked_frame.image_index); + EXPECT_EQ(2, unpacked_frame.component_count); + EXPECT_EQ(unpacked_frame.image_components.size(), + unpacked_frame.component_count); + for (int i = 0; i < unpacked_frame.component_count; ++i) { + const MultiplexImageComponent& component = + unpacked_frame.image_components[i]; + EXPECT_EQ(i, component.component_index); + EXPECT_NE(nullptr, component.encoded_image.data()); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, + component.encoded_image._frameType); + } +} + +TEST_P(TestMultiplexAdapter, ImageIndexIncreases) { + std::unique_ptr<VideoFrame> yuva_frame = CreateInputFrame(true); + const size_t expected_num_encoded_frames = 3; + for (size_t i = 0; i < expected_num_encoded_frames; ++i) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*yuva_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + const MultiplexImage& unpacked_frame = + MultiplexEncodedImagePacker::Unpack(encoded_frame); + EXPECT_EQ(i, unpacked_frame.image_index); + EXPECT_EQ( + i ? VideoFrameType::kVideoFrameDelta : VideoFrameType::kVideoFrameKey, + encoded_frame._frameType); + } +} + +INSTANTIATE_TEST_SUITE_P(TestMultiplexAdapter, + TestMultiplexAdapter, + ::testing::Bool()); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.cc new file mode 100644 index 0000000000..d1be684cbb --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.cc @@ -0,0 +1,78 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/android_codec_factory_helper.h" + +#include <jni.h> +#include <pthread.h> +#include <stddef.h> + +#include <memory> + +#include "modules/utility/include/jvm_android.h" +#include "rtc_base/checks.h" +#include "sdk/android/native_api/codecs/wrapper.h" +#include "sdk/android/native_api/jni/class_loader.h" +#include "sdk/android/native_api/jni/jvm.h" +#include "sdk/android/native_api/jni/scoped_java_ref.h" +#include "sdk/android/src/jni/jvm.h" + +namespace webrtc { +namespace test { + +namespace { + +static pthread_once_t g_initialize_once = PTHREAD_ONCE_INIT; + +void EnsureInitializedOnce() { + RTC_CHECK(::webrtc::jni::GetJVM() != nullptr); + + JNIEnv* jni = ::webrtc::jni::AttachCurrentThreadIfNeeded(); + JavaVM* jvm = NULL; + RTC_CHECK_EQ(0, jni->GetJavaVM(&jvm)); + + // Initialize the Java environment (currently only used by the audio manager). + webrtc::JVM::Initialize(jvm); +} + +} // namespace + +void InitializeAndroidObjects() { + RTC_CHECK_EQ(0, pthread_once(&g_initialize_once, &EnsureInitializedOnce)); +} + +std::unique_ptr<VideoEncoderFactory> CreateAndroidEncoderFactory() { + JNIEnv* env = AttachCurrentThreadIfNeeded(); + ScopedJavaLocalRef<jclass> factory_class = + GetClass(env, "org/webrtc/HardwareVideoEncoderFactory"); + jmethodID factory_constructor = env->GetMethodID( + factory_class.obj(), "<init>", "(Lorg/webrtc/EglBase$Context;ZZ)V"); + ScopedJavaLocalRef<jobject> factory_object( + env, env->NewObject(factory_class.obj(), factory_constructor, + nullptr /* shared_context */, + false /* enable_intel_vp8_encoder */, + true /* enable_h264_high_profile */)); + return JavaToNativeVideoEncoderFactory(env, factory_object.obj()); +} + +std::unique_ptr<VideoDecoderFactory> CreateAndroidDecoderFactory() { + JNIEnv* env = AttachCurrentThreadIfNeeded(); + ScopedJavaLocalRef<jclass> factory_class = + GetClass(env, "org/webrtc/HardwareVideoDecoderFactory"); + jmethodID factory_constructor = env->GetMethodID( + factory_class.obj(), "<init>", "(Lorg/webrtc/EglBase$Context;)V"); + ScopedJavaLocalRef<jobject> factory_object( + env, env->NewObject(factory_class.obj(), factory_constructor, + nullptr /* shared_context */)); + return JavaToNativeVideoDecoderFactory(env, factory_object.obj()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.h b/third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.h new file mode 100644 index 0000000000..ad9cf35162 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/android_codec_factory_helper.h @@ -0,0 +1,30 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_ANDROID_CODEC_FACTORY_HELPER_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_ANDROID_CODEC_FACTORY_HELPER_H_ + +#include <memory> + +#include "api/video_codecs/video_decoder_factory.h" +#include "api/video_codecs/video_encoder_factory.h" + +namespace webrtc { +namespace test { + +void InitializeAndroidObjects(); + +std::unique_ptr<VideoEncoderFactory> CreateAndroidEncoderFactory(); +std::unique_ptr<VideoDecoderFactory> CreateAndroidDecoderFactory(); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_TEST_ANDROID_CODEC_FACTORY_HELPER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/batch/empty-runtime-deps b/third_party/libwebrtc/modules/video_coding/codecs/test/batch/empty-runtime-deps new file mode 100644 index 0000000000..6702195ca9 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/batch/empty-runtime-deps @@ -0,0 +1 @@ +does-not-exist diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-instantiation-tests.sh b/third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-instantiation-tests.sh new file mode 100755 index 0000000000..28083b1808 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-instantiation-tests.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +if [ $# -ne 1 ]; then + echo "Usage: run-instantiation-tests.sh ADB-DEVICE-ID" + exit 1 +fi + +# Paths: update these based on your git checkout and gn output folder names. +WEBRTC_DIR=$HOME/src/webrtc/src +BUILD_DIR=$WEBRTC_DIR/out/Android_Release + +# Other settings. +ADB=`which adb` +SERIAL=$1 +TIMEOUT=7200 + +# Ensure we are using the latest version. +ninja -C $BUILD_DIR modules_tests + +# Transfer the required files by trying to run a test that doesn't exist. +echo "===> Transferring required resources to device $1." +$WEBRTC_DIR/build/android/test_runner.py gtest \ + --output-directory $BUILD_DIR \ + --suite modules_tests \ + --gtest_filter "DoesNotExist" \ + --shard-timeout $TIMEOUT \ + --runtime-deps-path $BUILD_DIR/gen.runtime/modules/modules_tests__test_runner_script.runtime_deps \ + --adb-path $ADB \ + --device $SERIAL \ + --verbose + +# Run all tests as separate test invocations. +mkdir $SERIAL +pushd $SERIAL +$WEBRTC_DIR/build/android/test_runner.py gtest \ + --output-directory $BUILD_DIR \ + --suite modules_tests \ + --gtest_filter "*InstantiationTest*" \ + --gtest_also_run_disabled_tests \ + --shard-timeout $TIMEOUT \ + --runtime-deps-path ../empty-runtime-deps \ + --test-launcher-retry-limit 0 \ + --adb-path $ADB \ + --device $SERIAL \ + --verbose \ + --num-retries 0 \ + 2>&1 | tee -a instantiation-tests.log +popd diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-videoprocessor-tests.sh b/third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-videoprocessor-tests.sh new file mode 100755 index 0000000000..25c971ba61 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/batch/run-videoprocessor-tests.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +if [ $# -ne 1 ]; then + echo "Usage: run.sh ADB-DEVICE-ID" + exit 1 +fi + +# Paths: update these based on your git checkout and gn output folder names. +WEBRTC_DIR=$HOME/src/webrtc/src +BUILD_DIR=$WEBRTC_DIR/out/Android_Release + +# Clips: update these to encode/decode other content. +CLIPS=('Foreman') +RESOLUTIONS=('128x96' '160x120' '176x144' '320x240' '352x288') +FRAMERATES=(30) + +# Other settings. +ADB=`which adb` +SERIAL=$1 +TIMEOUT=7200 + +# Ensure we are using the latest version. +ninja -C $BUILD_DIR modules_tests + +# Transfer the required files by trying to run a test that doesn't exist. +echo "===> Transferring required resources to device $1." +$WEBRTC_DIR/build/android/test_runner.py gtest \ + --output-directory $BUILD_DIR \ + --suite modules_tests \ + --gtest_filter "DoesNotExist" \ + --shard-timeout $TIMEOUT \ + --runtime-deps-path $BUILD_DIR/gen.runtime/modules/modules_tests__test_runner_script.runtime_deps \ + --adb-path $ADB \ + --device $SERIAL \ + --verbose + +# Run all tests as separate test invocations. +mkdir $SERIAL +pushd $SERIAL +for clip in "${CLIPS[@]}"; do + for resolution in "${RESOLUTIONS[@]}"; do + for framerate in "${FRAMERATES[@]}"; do + test_name="${clip}_${resolution}_${framerate}" + log_name="${test_name}.log" + + echo "===> Running ${test_name} on device $1." + + $WEBRTC_DIR/build/android/test_runner.py gtest \ + --output-directory $BUILD_DIR \ + --suite modules_tests \ + --gtest_filter "CodecSettings/*${test_name}*" \ + --shard-timeout $TIMEOUT \ + --runtime-deps-path ../empty-runtime-deps \ + --test-launcher-retry-limit 0 \ + --adb-path $ADB \ + --device $SERIAL \ + --verbose \ + 2>&1 | tee -a ${log_name} + done + done +done +popd diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.cc new file mode 100644 index 0000000000..899826eee4 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.cc @@ -0,0 +1,77 @@ +/* + * Copyright 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h" + +#include <memory> +#include <vector> + +#include "api/test/create_frame_generator.h" +#include "api/test/frame_generator_interface.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_frame.h" +#include "api/video/video_frame_type.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +class EncoderCallback : public EncodedImageCallback { + public: + explicit EncoderCallback( + std::vector<EncodedVideoFrameProducer::EncodedFrame>& output_frames) + : output_frames_(output_frames) {} + + private: + Result OnEncodedImage(const EncodedImage& encoded_image, + const CodecSpecificInfo* codec_specific_info) override { + output_frames_.push_back({encoded_image, *codec_specific_info}); + return Result(Result::Error::OK); + } + + std::vector<EncodedVideoFrameProducer::EncodedFrame>& output_frames_; +}; + +} // namespace + +std::vector<EncodedVideoFrameProducer::EncodedFrame> +EncodedVideoFrameProducer::Encode() { + std::unique_ptr<test::FrameGeneratorInterface> frame_buffer_generator = + test::CreateSquareFrameGenerator( + resolution_.Width(), resolution_.Height(), + test::FrameGeneratorInterface::OutputType::kI420, absl::nullopt); + + std::vector<EncodedFrame> encoded_frames; + EncoderCallback encoder_callback(encoded_frames); + RTC_CHECK_EQ(encoder_.RegisterEncodeCompleteCallback(&encoder_callback), + WEBRTC_VIDEO_CODEC_OK); + + uint32_t rtp_tick = 90000 / framerate_fps_; + for (int i = 0; i < num_input_frames_; ++i) { + VideoFrame frame = + VideoFrame::Builder() + .set_video_frame_buffer(frame_buffer_generator->NextFrame().buffer) + .set_timestamp_rtp(rtp_timestamp_) + .build(); + rtp_timestamp_ += rtp_tick; + RTC_CHECK_EQ(encoder_.Encode(frame, &next_frame_type_), + WEBRTC_VIDEO_CODEC_OK); + next_frame_type_[0] = VideoFrameType::kVideoFrameDelta; + } + + RTC_CHECK_EQ(encoder_.RegisterEncodeCompleteCallback(nullptr), + WEBRTC_VIDEO_CODEC_OK); + return encoded_frames; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.h b/third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.h new file mode 100644 index 0000000000..2216287b92 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/encoded_video_frame_producer.h @@ -0,0 +1,92 @@ +/* + * Copyright 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_ + +#include <stdint.h> + +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/encoded_image.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/include/video_codec_interface.h" + +namespace webrtc { + +// Wrapper around VideoEncoder::Encode for convenient input (generates frames) +// and output (returns encoded frames instead of passing them to callback) +class EncodedVideoFrameProducer { + public: + struct EncodedFrame { + EncodedImage encoded_image; + CodecSpecificInfo codec_specific_info; + }; + + // `encoder` should be initialized, but shouldn't have `EncoderCallback` set. + explicit EncodedVideoFrameProducer(VideoEncoder& encoder) + : encoder_(encoder) {} + EncodedVideoFrameProducer(const EncodedVideoFrameProducer&) = delete; + EncodedVideoFrameProducer& operator=(const EncodedVideoFrameProducer&) = + delete; + + // Number of the input frames to pass to the encoder. + EncodedVideoFrameProducer& SetNumInputFrames(int value); + // Encode next frame as key frame. + EncodedVideoFrameProducer& ForceKeyFrame(); + // Resolution of the input frames. + EncodedVideoFrameProducer& SetResolution(RenderResolution value); + + EncodedVideoFrameProducer& SetFramerateFps(int value); + + // Generates input video frames and encodes them with `encoder` provided in + // the constructor. Returns frame passed to the `OnEncodedImage` by wraping + // `EncodedImageCallback` underneath. + std::vector<EncodedFrame> Encode(); + + private: + VideoEncoder& encoder_; + + uint32_t rtp_timestamp_ = 1000; + int num_input_frames_ = 1; + int framerate_fps_ = 30; + RenderResolution resolution_ = {320, 180}; + std::vector<VideoFrameType> next_frame_type_ = { + VideoFrameType::kVideoFrameKey}; +}; + +inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetNumInputFrames( + int value) { + RTC_DCHECK_GT(value, 0); + num_input_frames_ = value; + return *this; +} + +inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::ForceKeyFrame() { + next_frame_type_ = {VideoFrameType::kVideoFrameKey}; + return *this; +} + +inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetResolution( + RenderResolution value) { + resolution_ = value; + return *this; +} + +inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetFramerateFps( + int value) { + RTC_DCHECK_GT(value, 0); + framerate_fps_ = value; + return *this; +} + +} // namespace webrtc +#endif // MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.h b/third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.h new file mode 100644 index 0000000000..475d0fdd08 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.h @@ -0,0 +1,28 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_OBJC_CODEC_FACTORY_HELPER_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_OBJC_CODEC_FACTORY_HELPER_H_ + +#include <memory> + +#include "api/video_codecs/video_decoder_factory.h" +#include "api/video_codecs/video_encoder_factory.h" + +namespace webrtc { +namespace test { + +std::unique_ptr<VideoEncoderFactory> CreateObjCEncoderFactory(); +std::unique_ptr<VideoDecoderFactory> CreateObjCDecoderFactory(); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_TEST_OBJC_CODEC_FACTORY_HELPER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.mm b/third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.mm new file mode 100644 index 0000000000..ed82376251 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/objc_codec_factory_helper.mm @@ -0,0 +1,30 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/objc_codec_factory_helper.h" + +#import "sdk/objc/components/video_codec/RTCVideoDecoderFactoryH264.h" +#import "sdk/objc/components/video_codec/RTCVideoEncoderFactoryH264.h" +#include "sdk/objc/native/api/video_decoder_factory.h" +#include "sdk/objc/native/api/video_encoder_factory.h" + +namespace webrtc { +namespace test { + +std::unique_ptr<VideoEncoderFactory> CreateObjCEncoderFactory() { + return ObjCToNativeVideoEncoderFactory([[RTC_OBJC_TYPE(RTCVideoEncoderFactoryH264) alloc] init]); +} + +std::unique_ptr<VideoDecoderFactory> CreateObjCDecoderFactory() { + return ObjCToNativeVideoDecoderFactory([[RTC_OBJC_TYPE(RTCVideoDecoderFactoryH264) alloc] init]); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/plot_webrtc_test_logs.py b/third_party/libwebrtc/modules/video_coding/codecs/test/plot_webrtc_test_logs.py new file mode 100755 index 0000000000..29e2d6f65a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/plot_webrtc_test_logs.py @@ -0,0 +1,438 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Plots statistics from WebRTC integration test logs. + +Usage: $ python plot_webrtc_test_logs.py filename.txt +""" + +import numpy +import sys +import re + +import matplotlib.pyplot as plt + +# Log events. +EVENT_START = 'RUN ] CodecSettings/VideoCodecTestParameterized.' +EVENT_END = 'OK ] CodecSettings/VideoCodecTestParameterized.' + +# Metrics to plot, tuple: (name to parse in file, label to use when plotting). +WIDTH = ('width', 'width') +HEIGHT = ('height', 'height') +FILENAME = ('filename', 'clip') +CODEC_TYPE = ('codec_type', 'Codec') +ENCODER_IMPLEMENTATION_NAME = ('enc_impl_name', 'enc name') +DECODER_IMPLEMENTATION_NAME = ('dec_impl_name', 'dec name') +CODEC_IMPLEMENTATION_NAME = ('codec_impl_name', 'codec name') +CORES = ('num_cores', 'CPU cores used') +DENOISING = ('denoising', 'denoising') +RESILIENCE = ('resilience', 'resilience') +ERROR_CONCEALMENT = ('error_concealment', 'error concealment') +CPU_USAGE = ('cpu_usage_percent', 'CPU usage (%)') +BITRATE = ('target_bitrate_kbps', 'target bitrate (kbps)') +FRAMERATE = ('input_framerate_fps', 'fps') +QP = ('avg_qp', 'QP avg') +PSNR = ('avg_psnr', 'PSNR (dB)') +SSIM = ('avg_ssim', 'SSIM') +ENC_BITRATE = ('bitrate_kbps', 'encoded bitrate (kbps)') +NUM_FRAMES = ('num_input_frames', 'num frames') +NUM_DROPPED_FRAMES = ('num_dropped_frames', 'num dropped frames') +TIME_TO_TARGET = ('time_to_reach_target_bitrate_sec', + 'time to reach target rate (sec)') +ENCODE_SPEED_FPS = ('enc_speed_fps', 'encode speed (fps)') +DECODE_SPEED_FPS = ('dec_speed_fps', 'decode speed (fps)') +AVG_KEY_FRAME_SIZE = ('avg_key_frame_size_bytes', 'avg key frame size (bytes)') +AVG_DELTA_FRAME_SIZE = ('avg_delta_frame_size_bytes', + 'avg delta frame size (bytes)') + +# Settings. +SETTINGS = [ + WIDTH, + HEIGHT, + FILENAME, + NUM_FRAMES, +] + +# Settings, options for x-axis. +X_SETTINGS = [ + CORES, + FRAMERATE, + DENOISING, + RESILIENCE, + ERROR_CONCEALMENT, + BITRATE, # TODO(asapersson): Needs to be last. +] + +# Settings, options for subplots. +SUBPLOT_SETTINGS = [ + CODEC_TYPE, + ENCODER_IMPLEMENTATION_NAME, + DECODER_IMPLEMENTATION_NAME, + CODEC_IMPLEMENTATION_NAME, +] + X_SETTINGS + +# Results. +RESULTS = [ + PSNR, + SSIM, + ENC_BITRATE, + NUM_DROPPED_FRAMES, + TIME_TO_TARGET, + ENCODE_SPEED_FPS, + DECODE_SPEED_FPS, + QP, + CPU_USAGE, + AVG_KEY_FRAME_SIZE, + AVG_DELTA_FRAME_SIZE, +] + +METRICS_TO_PARSE = SETTINGS + SUBPLOT_SETTINGS + RESULTS + +Y_METRICS = [res[1] for res in RESULTS] + +# Parameters for plotting. +FIG_SIZE_SCALE_FACTOR_X = 1.6 +FIG_SIZE_SCALE_FACTOR_Y = 1.8 +GRID_COLOR = [0.45, 0.45, 0.45] + + +def ParseSetting(filename, setting): + """Parses setting from file. + + Args: + filename: The name of the file. + setting: Name of setting to parse (e.g. width). + + Returns: + A list holding parsed settings, e.g. ['width: 128.0', 'width: 160.0'] """ + + settings = [] + + settings_file = open(filename) + while True: + line = settings_file.readline() + if not line: + break + if re.search(r'%s' % EVENT_START, line): + # Parse event. + parsed = {} + while True: + line = settings_file.readline() + if not line: + break + if re.search(r'%s' % EVENT_END, line): + # Add parsed setting to list. + if setting in parsed: + s = setting + ': ' + str(parsed[setting]) + if s not in settings: + settings.append(s) + break + + TryFindMetric(parsed, line) + + settings_file.close() + return settings + + +def ParseMetrics(filename, setting1, setting2): + """Parses metrics from file. + + Args: + filename: The name of the file. + setting1: First setting for sorting metrics (e.g. width). + setting2: Second setting for sorting metrics (e.g. CPU cores used). + + Returns: + A dictionary holding parsed metrics. + + For example: + metrics[key1][key2][measurement] + + metrics = { + "width: 352": { + "CPU cores used: 1.0": { + "encode time (us)": [0.718005, 0.806925, 0.909726, 0.931835, 0.953642], + "PSNR (dB)": [25.546029, 29.465518, 34.723535, 36.428493, 38.686551], + "bitrate (kbps)": [50, 100, 300, 500, 1000] + }, + "CPU cores used: 2.0": { + "encode time (us)": [0.718005, 0.806925, 0.909726, 0.931835, 0.953642], + "PSNR (dB)": [25.546029, 29.465518, 34.723535, 36.428493, 38.686551], + "bitrate (kbps)": [50, 100, 300, 500, 1000] + }, + }, + "width: 176": { + "CPU cores used: 1.0": { + "encode time (us)": [0.857897, 0.91608, 0.959173, 0.971116, 0.980961], + "PSNR (dB)": [30.243646, 33.375592, 37.574387, 39.42184, 41.437897], + "bitrate (kbps)": [50, 100, 300, 500, 1000] + }, + } + } """ + + metrics = {} + + # Parse events. + settings_file = open(filename) + while True: + line = settings_file.readline() + if not line: + break + if re.search(r'%s' % EVENT_START, line): + # Parse event. + parsed = {} + while True: + line = settings_file.readline() + if not line: + break + if re.search(r'%s' % EVENT_END, line): + # Add parsed values to metrics. + key1 = setting1 + ': ' + str(parsed[setting1]) + key2 = setting2 + ': ' + str(parsed[setting2]) + if key1 not in metrics: + metrics[key1] = {} + if key2 not in metrics[key1]: + metrics[key1][key2] = {} + + for label in parsed: + if label not in metrics[key1][key2]: + metrics[key1][key2][label] = [] + metrics[key1][key2][label].append(parsed[label]) + + break + + TryFindMetric(parsed, line) + + settings_file.close() + return metrics + + +def TryFindMetric(parsed, line): + for metric in METRICS_TO_PARSE: + name = metric[0] + label = metric[1] + if re.search(r'%s' % name, line): + found, value = GetMetric(name, line) + if found: + parsed[label] = value + return + + +def GetMetric(name, string): + # Float (e.g. bitrate = 98.8253). + pattern = r'%s\s*[:=]\s*([+-]?\d+\.*\d*)' % name + m = re.search(r'%s' % pattern, string) + if m is not None: + return StringToFloat(m.group(1)) + + # Alphanumeric characters (e.g. codec type : VP8). + pattern = r'%s\s*[:=]\s*(\w+)' % name + m = re.search(r'%s' % pattern, string) + if m is not None: + return True, m.group(1) + + return False, -1 + + +def StringToFloat(value): + try: + value = float(value) + except ValueError: + print "Not a float, skipped %s" % value + return False, -1 + + return True, value + + +def Plot(y_metric, x_metric, metrics): + """Plots y_metric vs x_metric per key in metrics. + + For example: + y_metric = 'PSNR (dB)' + x_metric = 'bitrate (kbps)' + metrics = { + "CPU cores used: 1.0": { + "PSNR (dB)": [25.546029, 29.465518, 34.723535, 36.428493, 38.686551], + "bitrate (kbps)": [50, 100, 300, 500, 1000] + }, + "CPU cores used: 2.0": { + "PSNR (dB)": [25.546029, 29.465518, 34.723535, 36.428493, 38.686551], + "bitrate (kbps)": [50, 100, 300, 500, 1000] + }, + } + """ + for key in sorted(metrics): + data = metrics[key] + if y_metric not in data: + print "Failed to find metric: %s" % y_metric + continue + + y = numpy.array(data[y_metric]) + x = numpy.array(data[x_metric]) + if len(y) != len(x): + print "Length mismatch for %s, %s" % (y, x) + continue + + label = y_metric + ' - ' + str(key) + + plt.plot(x, + y, + label=label, + linewidth=1.5, + marker='o', + markersize=5, + markeredgewidth=0.0) + + +def PlotFigure(settings, y_metrics, x_metric, metrics, title): + """Plots metrics in y_metrics list. One figure is plotted and each entry + in the list is plotted in a subplot (and sorted per settings). + + For example: + settings = ['width: 128.0', 'width: 160.0']. Sort subplot per setting. + y_metrics = ['PSNR (dB)', 'PSNR (dB)']. Metric to plot per subplot. + x_metric = 'bitrate (kbps)' + + """ + + plt.figure() + plt.suptitle(title, fontsize='large', fontweight='bold') + settings.sort() + rows = len(settings) + cols = 1 + pos = 1 + while pos <= rows: + plt.rc('grid', color=GRID_COLOR) + ax = plt.subplot(rows, cols, pos) + plt.grid() + plt.setp(ax.get_xticklabels(), visible=(pos == rows), fontsize='large') + plt.setp(ax.get_yticklabels(), fontsize='large') + setting = settings[pos - 1] + Plot(y_metrics[pos - 1], x_metric, metrics[setting]) + if setting.startswith(WIDTH[1]): + plt.title(setting, fontsize='medium') + plt.legend(fontsize='large', loc='best') + pos += 1 + + plt.xlabel(x_metric, fontsize='large') + plt.subplots_adjust(left=0.06, + right=0.98, + bottom=0.05, + top=0.94, + hspace=0.08) + + +def GetTitle(filename, setting): + title = '' + if setting != CODEC_IMPLEMENTATION_NAME[1] and setting != CODEC_TYPE[1]: + codec_types = ParseSetting(filename, CODEC_TYPE[1]) + for i in range(0, len(codec_types)): + title += codec_types[i] + ', ' + + if setting != CORES[1]: + cores = ParseSetting(filename, CORES[1]) + for i in range(0, len(cores)): + title += cores[i].split('.')[0] + ', ' + + if setting != FRAMERATE[1]: + framerate = ParseSetting(filename, FRAMERATE[1]) + for i in range(0, len(framerate)): + title += framerate[i].split('.')[0] + ', ' + + if (setting != CODEC_IMPLEMENTATION_NAME[1] + and setting != ENCODER_IMPLEMENTATION_NAME[1]): + enc_names = ParseSetting(filename, ENCODER_IMPLEMENTATION_NAME[1]) + for i in range(0, len(enc_names)): + title += enc_names[i] + ', ' + + if (setting != CODEC_IMPLEMENTATION_NAME[1] + and setting != DECODER_IMPLEMENTATION_NAME[1]): + dec_names = ParseSetting(filename, DECODER_IMPLEMENTATION_NAME[1]) + for i in range(0, len(dec_names)): + title += dec_names[i] + ', ' + + filenames = ParseSetting(filename, FILENAME[1]) + title += filenames[0].split('_')[0] + + num_frames = ParseSetting(filename, NUM_FRAMES[1]) + for i in range(0, len(num_frames)): + title += ' (' + num_frames[i].split('.')[0] + ')' + + return title + + +def ToString(input_list): + return ToStringWithoutMetric(input_list, ('', '')) + + +def ToStringWithoutMetric(input_list, metric): + i = 1 + output_str = "" + for m in input_list: + if m != metric: + output_str = output_str + ("%s. %s\n" % (i, m[1])) + i += 1 + return output_str + + +def GetIdx(text_list): + return int(raw_input(text_list)) - 1 + + +def main(): + filename = sys.argv[1] + + # Setup. + idx_metric = GetIdx("Choose metric:\n0. All\n%s" % ToString(RESULTS)) + if idx_metric == -1: + # Plot all metrics. One subplot for each metric. + # Per subplot: metric vs bitrate (per resolution). + cores = ParseSetting(filename, CORES[1]) + setting1 = CORES[1] + setting2 = WIDTH[1] + sub_keys = [cores[0]] * len(Y_METRICS) + y_metrics = Y_METRICS + x_metric = BITRATE[1] + else: + resolutions = ParseSetting(filename, WIDTH[1]) + idx = GetIdx("Select metric for x-axis:\n%s" % ToString(X_SETTINGS)) + if X_SETTINGS[idx] == BITRATE: + idx = GetIdx("Plot per:\n%s" % + ToStringWithoutMetric(SUBPLOT_SETTINGS, BITRATE)) + idx_setting = METRICS_TO_PARSE.index(SUBPLOT_SETTINGS[idx]) + # Plot one metric. One subplot for each resolution. + # Per subplot: metric vs bitrate (per setting). + setting1 = WIDTH[1] + setting2 = METRICS_TO_PARSE[idx_setting][1] + sub_keys = resolutions + y_metrics = [RESULTS[idx_metric][1]] * len(sub_keys) + x_metric = BITRATE[1] + else: + # Plot one metric. One subplot for each resolution. + # Per subplot: metric vs setting (per bitrate). + setting1 = WIDTH[1] + setting2 = BITRATE[1] + sub_keys = resolutions + y_metrics = [RESULTS[idx_metric][1]] * len(sub_keys) + x_metric = X_SETTINGS[idx][1] + + metrics = ParseMetrics(filename, setting1, setting2) + + # Stretch fig size. + figsize = plt.rcParams["figure.figsize"] + figsize[0] *= FIG_SIZE_SCALE_FACTOR_X + figsize[1] *= FIG_SIZE_SCALE_FACTOR_Y + plt.rcParams["figure.figsize"] = figsize + + PlotFigure(sub_keys, y_metrics, x_metric, metrics, + GetTitle(filename, setting2)) + + plt.show() + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.cc new file mode 100644 index 0000000000..b81f658dd0 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/video_codec_unittest.h" + +#include <utility> + +#include "api/test/create_frame_generator.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "test/video_codec_settings.h" + +static const int kEncodeTimeoutMs = 100; +static const int kDecodeTimeoutMs = 25; +// Set bitrate to get higher quality. +static const int kStartBitrate = 300; +static const int kMaxBitrate = 4000; +static const int kWidth = 176; // Width of the input image. +static const int kHeight = 144; // Height of the input image. +static const int kMaxFramerate = 30; // Arbitrary value. + +namespace webrtc { +namespace { +const VideoEncoder::Capabilities kCapabilities(false); +} + +EncodedImageCallback::Result +VideoCodecUnitTest::FakeEncodeCompleteCallback::OnEncodedImage( + const EncodedImage& frame, + const CodecSpecificInfo* codec_specific_info) { + MutexLock lock(&test_->encoded_frame_section_); + test_->encoded_frames_.push_back(frame); + RTC_DCHECK(codec_specific_info); + test_->codec_specific_infos_.push_back(*codec_specific_info); + if (!test_->wait_for_encoded_frames_threshold_) { + test_->encoded_frame_event_.Set(); + return Result(Result::OK); + } + + if (test_->encoded_frames_.size() == + test_->wait_for_encoded_frames_threshold_) { + test_->wait_for_encoded_frames_threshold_ = 1; + test_->encoded_frame_event_.Set(); + } + return Result(Result::OK); +} + +void VideoCodecUnitTest::FakeDecodeCompleteCallback::Decoded( + VideoFrame& frame, + absl::optional<int32_t> decode_time_ms, + absl::optional<uint8_t> qp) { + MutexLock lock(&test_->decoded_frame_section_); + test_->decoded_frame_.emplace(frame); + test_->decoded_qp_ = qp; + test_->decoded_frame_event_.Set(); +} + +void VideoCodecUnitTest::SetUp() { + webrtc::test::CodecSettings(kVideoCodecVP8, &codec_settings_); + codec_settings_.startBitrate = kStartBitrate; + codec_settings_.maxBitrate = kMaxBitrate; + codec_settings_.maxFramerate = kMaxFramerate; + codec_settings_.width = kWidth; + codec_settings_.height = kHeight; + + ModifyCodecSettings(&codec_settings_); + + input_frame_generator_ = test::CreateSquareFrameGenerator( + codec_settings_.width, codec_settings_.height, + test::FrameGeneratorInterface::OutputType::kI420, absl::optional<int>()); + + encoder_ = CreateEncoder(); + decoder_ = CreateDecoder(); + encoder_->RegisterEncodeCompleteCallback(&encode_complete_callback_); + decoder_->RegisterDecodeCompleteCallback(&decode_complete_callback_); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode( + &codec_settings_, + VideoEncoder::Settings(kCapabilities, 1 /* number of cores */, + 0 /* max payload size (unused) */))); + + VideoDecoder::Settings decoder_settings; + decoder_settings.set_codec_type(codec_settings_.codecType); + decoder_settings.set_max_render_resolution( + {codec_settings_.width, codec_settings_.height}); + EXPECT_TRUE(decoder_->Configure(decoder_settings)); +} + +void VideoCodecUnitTest::ModifyCodecSettings(VideoCodec* codec_settings) {} + +VideoFrame VideoCodecUnitTest::NextInputFrame() { + test::FrameGeneratorInterface::VideoFrameData frame_data = + input_frame_generator_->NextFrame(); + VideoFrame input_frame = VideoFrame::Builder() + .set_video_frame_buffer(frame_data.buffer) + .set_update_rect(frame_data.update_rect) + .build(); + + const uint32_t timestamp = + last_input_frame_timestamp_ + + kVideoPayloadTypeFrequency / codec_settings_.maxFramerate; + input_frame.set_timestamp(timestamp); + + last_input_frame_timestamp_ = timestamp; + return input_frame; +} + +bool VideoCodecUnitTest::WaitForEncodedFrame( + EncodedImage* frame, + CodecSpecificInfo* codec_specific_info) { + std::vector<EncodedImage> frames; + std::vector<CodecSpecificInfo> codec_specific_infos; + if (!WaitForEncodedFrames(&frames, &codec_specific_infos)) + return false; + EXPECT_EQ(frames.size(), static_cast<size_t>(1)); + EXPECT_EQ(frames.size(), codec_specific_infos.size()); + *frame = frames[0]; + *codec_specific_info = codec_specific_infos[0]; + return true; +} + +void VideoCodecUnitTest::SetWaitForEncodedFramesThreshold(size_t num_frames) { + MutexLock lock(&encoded_frame_section_); + wait_for_encoded_frames_threshold_ = num_frames; +} + +bool VideoCodecUnitTest::WaitForEncodedFrames( + std::vector<EncodedImage>* frames, + std::vector<CodecSpecificInfo>* codec_specific_info) { + EXPECT_TRUE(encoded_frame_event_.Wait(kEncodeTimeoutMs)) + << "Timed out while waiting for encoded frame."; + // This becomes unsafe if there are multiple threads waiting for frames. + MutexLock lock(&encoded_frame_section_); + EXPECT_FALSE(encoded_frames_.empty()); + EXPECT_FALSE(codec_specific_infos_.empty()); + EXPECT_EQ(encoded_frames_.size(), codec_specific_infos_.size()); + if (!encoded_frames_.empty()) { + *frames = encoded_frames_; + encoded_frames_.clear(); + RTC_DCHECK(!codec_specific_infos_.empty()); + *codec_specific_info = codec_specific_infos_; + codec_specific_infos_.clear(); + return true; + } else { + return false; + } +} + +bool VideoCodecUnitTest::WaitForDecodedFrame(std::unique_ptr<VideoFrame>* frame, + absl::optional<uint8_t>* qp) { + bool ret = decoded_frame_event_.Wait(kDecodeTimeoutMs); + EXPECT_TRUE(ret) << "Timed out while waiting for a decoded frame."; + // This becomes unsafe if there are multiple threads waiting for frames. + MutexLock lock(&decoded_frame_section_); + EXPECT_TRUE(decoded_frame_); + if (decoded_frame_) { + frame->reset(new VideoFrame(std::move(*decoded_frame_))); + *qp = decoded_qp_; + decoded_frame_.reset(); + return true; + } else { + return false; + } +} + +size_t VideoCodecUnitTest::GetNumEncodedFrames() { + MutexLock lock(&encoded_frame_section_); + return encoded_frames_.size(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.h b/third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.h new file mode 100644 index 0000000000..7d05882b63 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/video_codec_unittest.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_VIDEO_CODEC_UNITTEST_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_VIDEO_CODEC_UNITTEST_H_ + +#include <memory> +#include <vector> + +#include "api/test/frame_generator_interface.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/utility/vp8_header_parser.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "rtc_base/event.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" +#include "test/gtest.h" + +namespace webrtc { + +class VideoCodecUnitTest : public ::testing::Test { + public: + VideoCodecUnitTest() + : encode_complete_callback_(this), + decode_complete_callback_(this), + wait_for_encoded_frames_threshold_(1), + last_input_frame_timestamp_(0) {} + + protected: + class FakeEncodeCompleteCallback : public webrtc::EncodedImageCallback { + public: + explicit FakeEncodeCompleteCallback(VideoCodecUnitTest* test) + : test_(test) {} + + Result OnEncodedImage(const EncodedImage& frame, + const CodecSpecificInfo* codec_specific_info); + + private: + VideoCodecUnitTest* const test_; + }; + + class FakeDecodeCompleteCallback : public webrtc::DecodedImageCallback { + public: + explicit FakeDecodeCompleteCallback(VideoCodecUnitTest* test) + : test_(test) {} + + int32_t Decoded(VideoFrame& frame) override { + RTC_DCHECK_NOTREACHED(); + return -1; + } + int32_t Decoded(VideoFrame& frame, int64_t decode_time_ms) override { + RTC_DCHECK_NOTREACHED(); + return -1; + } + void Decoded(VideoFrame& frame, + absl::optional<int32_t> decode_time_ms, + absl::optional<uint8_t> qp) override; + + private: + VideoCodecUnitTest* const test_; + }; + + virtual std::unique_ptr<VideoEncoder> CreateEncoder() = 0; + virtual std::unique_ptr<VideoDecoder> CreateDecoder() = 0; + + void SetUp() override; + + virtual void ModifyCodecSettings(VideoCodec* codec_settings); + + VideoFrame NextInputFrame(); + + // Helper method for waiting a single encoded frame. + bool WaitForEncodedFrame(EncodedImage* frame, + CodecSpecificInfo* codec_specific_info); + + // Helper methods for waiting for multiple encoded frames. Caller must + // define how many frames are to be waited for via `num_frames` before calling + // Encode(). Then, they can expect to retrive them via WaitForEncodedFrames(). + void SetWaitForEncodedFramesThreshold(size_t num_frames); + bool WaitForEncodedFrames( + std::vector<EncodedImage>* frames, + std::vector<CodecSpecificInfo>* codec_specific_info); + + // Helper method for waiting a single decoded frame. + bool WaitForDecodedFrame(std::unique_ptr<VideoFrame>* frame, + absl::optional<uint8_t>* qp); + + size_t GetNumEncodedFrames(); + + VideoCodec codec_settings_; + + std::unique_ptr<VideoEncoder> encoder_; + std::unique_ptr<VideoDecoder> decoder_; + std::unique_ptr<test::FrameGeneratorInterface> input_frame_generator_; + + private: + FakeEncodeCompleteCallback encode_complete_callback_; + FakeDecodeCompleteCallback decode_complete_callback_; + + rtc::Event encoded_frame_event_; + Mutex encoded_frame_section_; + size_t wait_for_encoded_frames_threshold_; + std::vector<EncodedImage> encoded_frames_ + RTC_GUARDED_BY(encoded_frame_section_); + std::vector<CodecSpecificInfo> codec_specific_infos_ + RTC_GUARDED_BY(encoded_frame_section_); + + rtc::Event decoded_frame_event_; + Mutex decoded_frame_section_; + absl::optional<VideoFrame> decoded_frame_ + RTC_GUARDED_BY(decoded_frame_section_); + absl::optional<uint8_t> decoded_qp_ RTC_GUARDED_BY(decoded_frame_section_); + + uint32_t last_input_frame_timestamp_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_TEST_VIDEO_CODEC_UNITTEST_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/video_encoder_decoder_instantiation_tests.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/video_encoder_decoder_instantiation_tests.cc new file mode 100644 index 0000000000..41f2304748 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/video_encoder_decoder_instantiation_tests.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <vector> + +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_decoder_factory.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/video_encoder_factory.h" +#if defined(WEBRTC_ANDROID) +#include "modules/video_coding/codecs/test/android_codec_factory_helper.h" +#elif defined(WEBRTC_IOS) +#include "modules/video_coding/codecs/test/objc_codec_factory_helper.h" +#endif +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/video_codec_settings.h" + +namespace webrtc { +namespace test { + +namespace { + +using ::testing::NotNull; + +const VideoEncoder::Capabilities kCapabilities(false); + +int32_t InitEncoder(VideoCodecType codec_type, VideoEncoder* encoder) { + VideoCodec codec; + CodecSettings(codec_type, &codec); + codec.width = 640; + codec.height = 480; + codec.maxFramerate = 30; + RTC_CHECK(encoder); + return encoder->InitEncode( + &codec, VideoEncoder::Settings(kCapabilities, 1 /* number_of_cores */, + 1200 /* max_payload_size */)); +} + +VideoDecoder::Settings DecoderSettings(VideoCodecType codec_type) { + VideoDecoder::Settings settings; + settings.set_max_render_resolution({640, 480}); + settings.set_codec_type(codec_type); + return settings; +} + +} // namespace + +class VideoEncoderDecoderInstantiationTest + : public ::testing::Test, + public ::testing::WithParamInterface<::testing::tuple<int, int>> { + protected: + VideoEncoderDecoderInstantiationTest() + : vp8_format_("VP8"), + vp9_format_("VP9"), + h264cbp_format_("H264"), + num_encoders_(::testing::get<0>(GetParam())), + num_decoders_(::testing::get<1>(GetParam())) { +#if defined(WEBRTC_ANDROID) + InitializeAndroidObjects(); + encoder_factory_ = CreateAndroidEncoderFactory(); + decoder_factory_ = CreateAndroidDecoderFactory(); +#elif defined(WEBRTC_IOS) + encoder_factory_ = CreateObjCEncoderFactory(); + decoder_factory_ = CreateObjCDecoderFactory(); +#else + RTC_DCHECK_NOTREACHED() << "Only support Android and iOS."; +#endif + } + + ~VideoEncoderDecoderInstantiationTest() { + for (auto& encoder : encoders_) { + encoder->Release(); + } + for (auto& decoder : decoders_) { + decoder->Release(); + } + } + + const SdpVideoFormat vp8_format_; + const SdpVideoFormat vp9_format_; + const SdpVideoFormat h264cbp_format_; + std::unique_ptr<VideoEncoderFactory> encoder_factory_; + std::unique_ptr<VideoDecoderFactory> decoder_factory_; + + const int num_encoders_; + const int num_decoders_; + std::vector<std::unique_ptr<VideoEncoder>> encoders_; + std::vector<std::unique_ptr<VideoDecoder>> decoders_; +}; + +INSTANTIATE_TEST_SUITE_P(MultipleEncoders, + VideoEncoderDecoderInstantiationTest, + ::testing::Combine(::testing::Range(1, 4), + ::testing::Range(1, 2))); + +INSTANTIATE_TEST_SUITE_P(MultipleDecoders, + VideoEncoderDecoderInstantiationTest, + ::testing::Combine(::testing::Range(1, 2), + ::testing::Range(1, 9))); + +INSTANTIATE_TEST_SUITE_P(MultipleEncodersDecoders, + VideoEncoderDecoderInstantiationTest, + ::testing::Combine(::testing::Range(1, 4), + ::testing::Range(1, 9))); + +// TODO(brandtr): Check that the factories actually support the codecs before +// trying to instantiate. Currently, we will just crash with a Java exception +// if the factory does not support the codec. +TEST_P(VideoEncoderDecoderInstantiationTest, DISABLED_InstantiateVp8Codecs) { + for (int i = 0; i < num_encoders_; ++i) { + std::unique_ptr<VideoEncoder> encoder = + encoder_factory_->CreateVideoEncoder(vp8_format_); + EXPECT_EQ(0, InitEncoder(kVideoCodecVP8, encoder.get())); + encoders_.emplace_back(std::move(encoder)); + } + + for (int i = 0; i < num_decoders_; ++i) { + std::unique_ptr<VideoDecoder> decoder = + decoder_factory_->CreateVideoDecoder(vp8_format_); + ASSERT_THAT(decoder, NotNull()); + EXPECT_TRUE(decoder->Configure(DecoderSettings(kVideoCodecVP8))); + decoders_.emplace_back(std::move(decoder)); + } +} + +TEST_P(VideoEncoderDecoderInstantiationTest, + DISABLED_InstantiateH264CBPCodecs) { + for (int i = 0; i < num_encoders_; ++i) { + std::unique_ptr<VideoEncoder> encoder = + encoder_factory_->CreateVideoEncoder(h264cbp_format_); + EXPECT_EQ(0, InitEncoder(kVideoCodecH264, encoder.get())); + encoders_.emplace_back(std::move(encoder)); + } + + for (int i = 0; i < num_decoders_; ++i) { + std::unique_ptr<VideoDecoder> decoder = + decoder_factory_->CreateVideoDecoder(h264cbp_format_); + ASSERT_THAT(decoder, NotNull()); + EXPECT_TRUE(decoder->Configure(DecoderSettings(kVideoCodecH264))); + decoders_.push_back(std::move(decoder)); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_av1.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_av1.cc new file mode 100644 index 0000000000..4fa343e706 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_av1.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <vector> + +#include "api/test/create_videocodec_test_fixture.h" +#include "api/test/video/function_video_encoder_factory.h" +#include "api/video_codecs/sdp_video_format.h" +#include "media/base/media_constants.h" +#include "media/engine/internal_decoder_factory.h" +#include "media/engine/internal_encoder_factory.h" +#include "media/engine/simulcast_encoder_adapter.h" +#include "modules/video_coding/codecs/av1/libaom_av1_decoder.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { +// Test clips settings. +constexpr int kCifWidth = 352; +constexpr int kCifHeight = 288; +constexpr int kNumFramesLong = 300; + +VideoCodecTestFixture::Config CreateConfig(std::string filename) { + VideoCodecTestFixture::Config config; + config.filename = filename; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = kNumFramesLong; + config.use_single_core = true; + return config; +} + +TEST(VideoCodecTestAv1, HighBitrate) { + auto config = CreateConfig("foreman_cif"); + config.SetCodecSettings(cricket::kAv1CodecName, 1, 1, 1, false, true, true, + kCifWidth, kCifHeight); + config.codec_settings.SetScalabilityMode(ScalabilityMode::kL1T1); + config.num_frames = kNumFramesLong; + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {12, 1, 0, 1, 0.3, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{37, 34, 0.94, 0.91}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestAv1, VeryLowBitrate) { + auto config = CreateConfig("foreman_cif"); + config.SetCodecSettings(cricket::kAv1CodecName, 1, 1, 1, false, true, true, + kCifWidth, kCifHeight); + config.codec_settings.SetScalabilityMode(ScalabilityMode::kL1T1); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{50, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {15, 8, 75, 2, 2, 2, 2, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{28, 24.8, 0.70, 0.55}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +#if !defined(WEBRTC_ANDROID) +constexpr int kHdWidth = 1280; +constexpr int kHdHeight = 720; +TEST(VideoCodecTestAv1, Hd) { + auto config = CreateConfig("ConferenceMotion_1280_720_50"); + config.SetCodecSettings(cricket::kAv1CodecName, 1, 1, 1, false, true, true, + kHdWidth, kHdHeight); + config.codec_settings.SetScalabilityMode(ScalabilityMode::kL1T1); + config.num_frames = kNumFramesLong; + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{1000, 50, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {13, 3, 0, 1, 0.3, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = { + {35.9, 31.5, 0.925, 0.865}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} +#endif + +} // namespace +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_config_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_config_unittest.cc new file mode 100644 index 0000000000..126aa93ee8 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_config_unittest.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> + +#include "api/test/videocodec_test_fixture.h" +#include "api/video_codecs/video_codec.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/video_codec_settings.h" + +using ::testing::ElementsAre; + +namespace webrtc { +namespace test { + +using Config = VideoCodecTestFixture::Config; + +namespace { +const size_t kNumTemporalLayers = 2; +} // namespace + +TEST(Config, NumberOfCoresWithUseSingleCore) { + Config config; + config.use_single_core = true; + EXPECT_EQ(1u, config.NumberOfCores()); +} + +TEST(Config, NumberOfCoresWithoutUseSingleCore) { + Config config; + config.use_single_core = false; + EXPECT_GE(config.NumberOfCores(), 1u); +} + +TEST(Config, NumberOfTemporalLayersIsOne) { + Config config; + webrtc::test::CodecSettings(kVideoCodecH264, &config.codec_settings); + EXPECT_EQ(1u, config.NumberOfTemporalLayers()); +} + +TEST(Config, NumberOfTemporalLayers_Vp8) { + Config config; + webrtc::test::CodecSettings(kVideoCodecVP8, &config.codec_settings); + config.codec_settings.VP8()->numberOfTemporalLayers = kNumTemporalLayers; + EXPECT_EQ(kNumTemporalLayers, config.NumberOfTemporalLayers()); +} + +TEST(Config, NumberOfTemporalLayers_Vp9) { + Config config; + webrtc::test::CodecSettings(kVideoCodecVP9, &config.codec_settings); + config.codec_settings.VP9()->numberOfTemporalLayers = kNumTemporalLayers; + EXPECT_EQ(kNumTemporalLayers, config.NumberOfTemporalLayers()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.cc new file mode 100644 index 0000000000..cd940c9111 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.cc @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/videocodec_test_fixture_impl.h" + +#include <stdint.h> +#include <stdio.h> + +#include <algorithm> +#include <cmath> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "absl/strings/str_replace.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/transport/field_trial_based_config.h" +#include "api/video/video_bitrate_allocation.h" +#include "api/video_codecs/h264_profile_level_id.h" +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_decoder_factory_template.h" +#include "api/video_codecs/video_decoder_factory_template_dav1d_adapter.h" +#include "api/video_codecs/video_decoder_factory_template_libvpx_vp8_adapter.h" +#include "api/video_codecs/video_decoder_factory_template_libvpx_vp9_adapter.h" +#include "api/video_codecs/video_decoder_factory_template_open_h264_adapter.h" +#include "api/video_codecs/video_encoder_config.h" +#include "api/video_codecs/video_encoder_factory.h" +#include "api/video_codecs/video_encoder_factory_template.h" +#include "api/video_codecs/video_encoder_factory_template_libaom_av1_adapter.h" +#include "api/video_codecs/video_encoder_factory_template_libvpx_vp8_adapter.h" +#include "api/video_codecs/video_encoder_factory_template_libvpx_vp9_adapter.h" +#include "api/video_codecs/video_encoder_factory_template_open_h264_adapter.h" +#include "common_video/h264/h264_common.h" +#include "media/base/media_constants.h" +#include "media/engine/simulcast.h" +#include "modules/video_coding/codecs/h264/include/h264_globals.h" +#include "modules/video_coding/codecs/vp9/svc_config.h" +#include "modules/video_coding/utility/ivf_file_writer.h" +#include "rtc_base/checks.h" +#include "rtc_base/cpu_time.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/cpu_info.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" +#include "test/testsupport/frame_writer.h" +#include "test/testsupport/perf_test.h" +#include "test/video_codec_settings.h" + +namespace webrtc { +namespace test { + +using VideoStatistics = VideoCodecTestStats::VideoStatistics; + +namespace { +const int kBaseKeyFrameInterval = 3000; +const double kBitratePriority = 1.0; +const int kDefaultMaxFramerateFps = 30; +const int kMaxQp = 56; + +void ConfigureSimulcast(VideoCodec* codec_settings) { + FieldTrialBasedConfig trials; + const std::vector<webrtc::VideoStream> streams = cricket::GetSimulcastConfig( + /*min_layer=*/1, codec_settings->numberOfSimulcastStreams, + codec_settings->width, codec_settings->height, kBitratePriority, kMaxQp, + /* is_screenshare = */ false, true, trials); + + for (size_t i = 0; i < streams.size(); ++i) { + SimulcastStream* ss = &codec_settings->simulcastStream[i]; + ss->width = static_cast<uint16_t>(streams[i].width); + ss->height = static_cast<uint16_t>(streams[i].height); + ss->numberOfTemporalLayers = + static_cast<unsigned char>(*streams[i].num_temporal_layers); + ss->maxBitrate = streams[i].max_bitrate_bps / 1000; + ss->targetBitrate = streams[i].target_bitrate_bps / 1000; + ss->minBitrate = streams[i].min_bitrate_bps / 1000; + ss->qpMax = streams[i].max_qp; + ss->active = true; + } +} + +void ConfigureSvc(VideoCodec* codec_settings) { + RTC_CHECK_EQ(kVideoCodecVP9, codec_settings->codecType); + + const std::vector<SpatialLayer> layers = GetSvcConfig( + codec_settings->width, codec_settings->height, kDefaultMaxFramerateFps, + /*first_active_layer=*/0, codec_settings->VP9()->numberOfSpatialLayers, + codec_settings->VP9()->numberOfTemporalLayers, + /* is_screen_sharing = */ false); + ASSERT_EQ(codec_settings->VP9()->numberOfSpatialLayers, layers.size()) + << "GetSvcConfig returned fewer spatial layers than configured."; + + for (size_t i = 0; i < layers.size(); ++i) { + codec_settings->spatialLayers[i] = layers[i]; + } +} + +std::string CodecSpecificToString(const VideoCodec& codec) { + char buf[1024]; + rtc::SimpleStringBuilder ss(buf); + switch (codec.codecType) { + case kVideoCodecVP8: + ss << "\nnum_temporal_layers: " + << static_cast<int>(codec.VP8().numberOfTemporalLayers); + ss << "\ndenoising: " << codec.VP8().denoisingOn; + ss << "\nautomatic_resize: " << codec.VP8().automaticResizeOn; + ss << "\nkey_frame_interval: " << codec.VP8().keyFrameInterval; + break; + case kVideoCodecVP9: + ss << "\nnum_temporal_layers: " + << static_cast<int>(codec.VP9().numberOfTemporalLayers); + ss << "\nnum_spatial_layers: " + << static_cast<int>(codec.VP9().numberOfSpatialLayers); + ss << "\ndenoising: " << codec.VP9().denoisingOn; + ss << "\nkey_frame_interval: " << codec.VP9().keyFrameInterval; + ss << "\nadaptive_qp_mode: " << codec.VP9().adaptiveQpMode; + ss << "\nautomatic_resize: " << codec.VP9().automaticResizeOn; + ss << "\nflexible_mode: " << codec.VP9().flexibleMode; + break; + case kVideoCodecH264: + ss << "\nkey_frame_interval: " << codec.H264().keyFrameInterval; + ss << "\nnum_temporal_layers: " + << static_cast<int>(codec.H264().numberOfTemporalLayers); + break; + default: + break; + } + return ss.str(); +} + +bool RunEncodeInRealTime(const VideoCodecTestFixtureImpl::Config& config) { + if (config.measure_cpu || config.encode_in_real_time) { + return true; + } + return false; +} + +std::string FilenameWithParams( + const VideoCodecTestFixtureImpl::Config& config) { + return config.filename + "_" + config.CodecName() + "_" + + std::to_string(config.codec_settings.startBitrate); +} + +SdpVideoFormat CreateSdpVideoFormat( + const VideoCodecTestFixtureImpl::Config& config) { + if (config.codec_settings.codecType == kVideoCodecH264) { + const char* packetization_mode = + config.h264_codec_settings.packetization_mode == + H264PacketizationMode::NonInterleaved + ? "1" + : "0"; + SdpVideoFormat::Parameters codec_params = { + {cricket::kH264FmtpProfileLevelId, + *H264ProfileLevelIdToString(H264ProfileLevelId( + config.h264_codec_settings.profile, H264Level::kLevel3_1))}, + {cricket::kH264FmtpPacketizationMode, packetization_mode}, + {cricket::kH264FmtpLevelAsymmetryAllowed, "1"}}; + + return SdpVideoFormat(config.codec_name, codec_params); + } else if (config.codec_settings.codecType == kVideoCodecVP9) { + return SdpVideoFormat(config.codec_name, {{"profile-id", "0"}}); + } + + return SdpVideoFormat(config.codec_name); +} + +} // namespace + +VideoCodecTestFixtureImpl::Config::Config() = default; + +void VideoCodecTestFixtureImpl::Config::SetCodecSettings( + std::string codec_name, + size_t num_simulcast_streams, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool denoising_on, + bool frame_dropper_on, + bool spatial_resize_on, + size_t width, + size_t height) { + this->codec_name = codec_name; + VideoCodecType codec_type = PayloadStringToCodecType(codec_name); + webrtc::test::CodecSettings(codec_type, &codec_settings); + + // TODO(brandtr): Move the setting of `width` and `height` to the tests, and + // DCHECK that they are set before initializing the codec instead. + codec_settings.width = static_cast<uint16_t>(width); + codec_settings.height = static_cast<uint16_t>(height); + + RTC_CHECK(num_simulcast_streams >= 1 && + num_simulcast_streams <= kMaxSimulcastStreams); + RTC_CHECK(num_spatial_layers >= 1 && num_spatial_layers <= kMaxSpatialLayers); + RTC_CHECK(num_temporal_layers >= 1 && + num_temporal_layers <= kMaxTemporalStreams); + + // Simulcast is only available with VP8. + RTC_CHECK(num_simulcast_streams < 2 || codec_type == kVideoCodecVP8); + + // Spatial scalability is only available with VP9. + RTC_CHECK(num_spatial_layers < 2 || codec_type == kVideoCodecVP9); + + // Some base code requires numberOfSimulcastStreams to be set to zero + // when simulcast is not used. + codec_settings.numberOfSimulcastStreams = + num_simulcast_streams <= 1 ? 0 + : static_cast<uint8_t>(num_simulcast_streams); + + codec_settings.SetFrameDropEnabled(frame_dropper_on); + switch (codec_settings.codecType) { + case kVideoCodecVP8: + codec_settings.VP8()->numberOfTemporalLayers = + static_cast<uint8_t>(num_temporal_layers); + codec_settings.VP8()->denoisingOn = denoising_on; + codec_settings.VP8()->automaticResizeOn = spatial_resize_on; + codec_settings.VP8()->keyFrameInterval = kBaseKeyFrameInterval; + break; + case kVideoCodecVP9: + codec_settings.VP9()->numberOfTemporalLayers = + static_cast<uint8_t>(num_temporal_layers); + codec_settings.VP9()->denoisingOn = denoising_on; + codec_settings.VP9()->keyFrameInterval = kBaseKeyFrameInterval; + codec_settings.VP9()->automaticResizeOn = spatial_resize_on; + codec_settings.VP9()->numberOfSpatialLayers = + static_cast<uint8_t>(num_spatial_layers); + break; + case kVideoCodecAV1: + codec_settings.qpMax = 63; + break; + case kVideoCodecH264: + codec_settings.H264()->keyFrameInterval = kBaseKeyFrameInterval; + codec_settings.H264()->numberOfTemporalLayers = + static_cast<uint8_t>(num_temporal_layers); + break; + default: + break; + } + + if (codec_settings.numberOfSimulcastStreams > 1) { + ConfigureSimulcast(&codec_settings); + } else if (codec_settings.codecType == kVideoCodecVP9 && + codec_settings.VP9()->numberOfSpatialLayers > 1) { + ConfigureSvc(&codec_settings); + } +} + +size_t VideoCodecTestFixtureImpl::Config::NumberOfCores() const { + return use_single_core ? 1 : CpuInfo::DetectNumberOfCores(); +} + +size_t VideoCodecTestFixtureImpl::Config::NumberOfTemporalLayers() const { + if (codec_settings.codecType == kVideoCodecVP8) { + return codec_settings.VP8().numberOfTemporalLayers; + } else if (codec_settings.codecType == kVideoCodecVP9) { + return codec_settings.VP9().numberOfTemporalLayers; + } else if (codec_settings.codecType == kVideoCodecH264) { + return codec_settings.H264().numberOfTemporalLayers; + } else { + return 1; + } +} + +size_t VideoCodecTestFixtureImpl::Config::NumberOfSpatialLayers() const { + if (codec_settings.codecType == kVideoCodecVP9) { + return codec_settings.VP9().numberOfSpatialLayers; + } else { + return 1; + } +} + +size_t VideoCodecTestFixtureImpl::Config::NumberOfSimulcastStreams() const { + return codec_settings.numberOfSimulcastStreams; +} + +std::string VideoCodecTestFixtureImpl::Config::ToString() const { + std::string codec_type = CodecTypeToPayloadString(codec_settings.codecType); + rtc::StringBuilder ss; + ss << "test_name: " << test_name; + ss << "\nfilename: " << filename; + ss << "\nnum_frames: " << num_frames; + ss << "\nmax_payload_size_bytes: " << max_payload_size_bytes; + ss << "\ndecode: " << decode; + ss << "\nuse_single_core: " << use_single_core; + ss << "\nmeasure_cpu: " << measure_cpu; + ss << "\nnum_cores: " << NumberOfCores(); + ss << "\ncodec_type: " << codec_type; + ss << "\n\n--> codec_settings"; + ss << "\nwidth: " << codec_settings.width; + ss << "\nheight: " << codec_settings.height; + ss << "\nmax_framerate_fps: " << codec_settings.maxFramerate; + ss << "\nstart_bitrate_kbps: " << codec_settings.startBitrate; + ss << "\nmax_bitrate_kbps: " << codec_settings.maxBitrate; + ss << "\nmin_bitrate_kbps: " << codec_settings.minBitrate; + ss << "\nmax_qp: " << codec_settings.qpMax; + ss << "\nnum_simulcast_streams: " + << static_cast<int>(codec_settings.numberOfSimulcastStreams); + ss << "\n\n--> codec_settings." << codec_type; + ss << "complexity: " + << static_cast<int>(codec_settings.GetVideoEncoderComplexity()); + ss << "\nframe_dropping: " << codec_settings.GetFrameDropEnabled(); + ss << "\n" << CodecSpecificToString(codec_settings); + if (codec_settings.numberOfSimulcastStreams > 1) { + for (int i = 0; i < codec_settings.numberOfSimulcastStreams; ++i) { + ss << "\n\n--> codec_settings.simulcastStream[" << i << "]"; + const SimulcastStream& simulcast_stream = + codec_settings.simulcastStream[i]; + ss << "\nwidth: " << simulcast_stream.width; + ss << "\nheight: " << simulcast_stream.height; + ss << "\nnum_temporal_layers: " + << static_cast<int>(simulcast_stream.numberOfTemporalLayers); + ss << "\nmin_bitrate_kbps: " << simulcast_stream.minBitrate; + ss << "\ntarget_bitrate_kbps: " << simulcast_stream.targetBitrate; + ss << "\nmax_bitrate_kbps: " << simulcast_stream.maxBitrate; + ss << "\nmax_qp: " << simulcast_stream.qpMax; + ss << "\nactive: " << simulcast_stream.active; + } + } + ss << "\n"; + return ss.Release(); +} + +std::string VideoCodecTestFixtureImpl::Config::CodecName() const { + std::string name = codec_name; + if (name.empty()) { + name = CodecTypeToPayloadString(codec_settings.codecType); + } + if (codec_settings.codecType == kVideoCodecH264) { + if (h264_codec_settings.profile == H264Profile::kProfileConstrainedHigh) { + return name + "-CHP"; + } else { + RTC_DCHECK_EQ(h264_codec_settings.profile, + H264Profile::kProfileConstrainedBaseline); + return name + "-CBP"; + } + } + return name; +} + +// TODO(kthelgason): Move this out of the test fixture impl and +// make available as a shared utility class. +void VideoCodecTestFixtureImpl::H264KeyframeChecker::CheckEncodedFrame( + webrtc::VideoCodecType codec, + const EncodedImage& encoded_frame) const { + EXPECT_EQ(kVideoCodecH264, codec); + bool contains_sps = false; + bool contains_pps = false; + bool contains_idr = false; + const std::vector<webrtc::H264::NaluIndex> nalu_indices = + webrtc::H264::FindNaluIndices(encoded_frame.data(), encoded_frame.size()); + for (const webrtc::H264::NaluIndex& index : nalu_indices) { + webrtc::H264::NaluType nalu_type = webrtc::H264::ParseNaluType( + encoded_frame.data()[index.payload_start_offset]); + if (nalu_type == webrtc::H264::NaluType::kSps) { + contains_sps = true; + } else if (nalu_type == webrtc::H264::NaluType::kPps) { + contains_pps = true; + } else if (nalu_type == webrtc::H264::NaluType::kIdr) { + contains_idr = true; + } + } + if (encoded_frame._frameType == VideoFrameType::kVideoFrameKey) { + EXPECT_TRUE(contains_sps) << "Keyframe should contain SPS."; + EXPECT_TRUE(contains_pps) << "Keyframe should contain PPS."; + EXPECT_TRUE(contains_idr) << "Keyframe should contain IDR."; + } else if (encoded_frame._frameType == VideoFrameType::kVideoFrameDelta) { + EXPECT_FALSE(contains_sps) << "Delta frame should not contain SPS."; + EXPECT_FALSE(contains_pps) << "Delta frame should not contain PPS."; + EXPECT_FALSE(contains_idr) << "Delta frame should not contain IDR."; + } else { + RTC_DCHECK_NOTREACHED(); + } +} + +class VideoCodecTestFixtureImpl::CpuProcessTime final { + public: + explicit CpuProcessTime(const Config& config) : config_(config) {} + ~CpuProcessTime() {} + + void Start() { + if (config_.measure_cpu) { + cpu_time_ -= rtc::GetProcessCpuTimeNanos(); + wallclock_time_ -= rtc::SystemTimeNanos(); + } + } + void Stop() { + if (config_.measure_cpu) { + cpu_time_ += rtc::GetProcessCpuTimeNanos(); + wallclock_time_ += rtc::SystemTimeNanos(); + } + } + void Print() const { + if (config_.measure_cpu) { + RTC_LOG(LS_INFO) << "cpu_usage_percent: " + << GetUsagePercent() / config_.NumberOfCores(); + } + } + + private: + double GetUsagePercent() const { + return static_cast<double>(cpu_time_) / wallclock_time_ * 100.0; + } + + const Config config_; + int64_t cpu_time_ = 0; + int64_t wallclock_time_ = 0; +}; + +VideoCodecTestFixtureImpl::VideoCodecTestFixtureImpl(Config config) + : encoder_factory_(std::make_unique<webrtc::VideoEncoderFactoryTemplate< + webrtc::LibvpxVp8EncoderTemplateAdapter, + webrtc::LibvpxVp9EncoderTemplateAdapter, + webrtc::OpenH264EncoderTemplateAdapter, + webrtc::LibaomAv1EncoderTemplateAdapter>>()), + decoder_factory_(std::make_unique<webrtc::VideoDecoderFactoryTemplate< + webrtc::LibvpxVp8DecoderTemplateAdapter, + webrtc::LibvpxVp9DecoderTemplateAdapter, + webrtc::OpenH264DecoderTemplateAdapter, + webrtc::Dav1dDecoderTemplateAdapter>>()), + config_(config) {} + +VideoCodecTestFixtureImpl::VideoCodecTestFixtureImpl( + Config config, + std::unique_ptr<VideoDecoderFactory> decoder_factory, + std::unique_ptr<VideoEncoderFactory> encoder_factory) + : encoder_factory_(std::move(encoder_factory)), + decoder_factory_(std::move(decoder_factory)), + config_(config) {} + +VideoCodecTestFixtureImpl::~VideoCodecTestFixtureImpl() = default; + +// Processes all frames in the clip and verifies the result. +void VideoCodecTestFixtureImpl::RunTest( + const std::vector<RateProfile>& rate_profiles, + const std::vector<RateControlThresholds>* rc_thresholds, + const std::vector<QualityThresholds>* quality_thresholds, + const BitstreamThresholds* bs_thresholds) { + RTC_DCHECK(!rate_profiles.empty()); + + // To emulate operation on a production VideoStreamEncoder, we call the + // codecs on a task queue. + TaskQueueForTest task_queue("VidProc TQ"); + + bool is_setup_succeeded = SetUpAndInitObjects( + &task_queue, rate_profiles[0].target_kbps, rate_profiles[0].input_fps); + EXPECT_TRUE(is_setup_succeeded); + if (!is_setup_succeeded) { + ReleaseAndCloseObjects(&task_queue); + return; + } + + PrintSettings(&task_queue); + ProcessAllFrames(&task_queue, rate_profiles); + ReleaseAndCloseObjects(&task_queue); + + AnalyzeAllFrames(rate_profiles, rc_thresholds, quality_thresholds, + bs_thresholds); +} + +void VideoCodecTestFixtureImpl::ProcessAllFrames( + TaskQueueForTest* task_queue, + const std::vector<RateProfile>& rate_profiles) { + // Set initial rates. + auto rate_profile = rate_profiles.begin(); + task_queue->PostTask([this, rate_profile] { + processor_->SetRates(rate_profile->target_kbps, rate_profile->input_fps); + }); + + cpu_process_time_->Start(); + + for (size_t frame_num = 0; frame_num < config_.num_frames; ++frame_num) { + auto next_rate_profile = std::next(rate_profile); + if (next_rate_profile != rate_profiles.end() && + frame_num == next_rate_profile->frame_num) { + rate_profile = next_rate_profile; + task_queue->PostTask([this, rate_profile] { + processor_->SetRates(rate_profile->target_kbps, + rate_profile->input_fps); + }); + } + + task_queue->PostTask([this] { processor_->ProcessFrame(); }); + + if (RunEncodeInRealTime(config_)) { + // Roughly pace the frames. + const int frame_duration_ms = + std::ceil(rtc::kNumMillisecsPerSec / rate_profile->input_fps); + SleepMs(frame_duration_ms); + } + } + + task_queue->PostTask([this] { processor_->Finalize(); }); + + // Wait until we know that the last frame has been sent for encode. + task_queue->SendTask([] {}); + + // Give the VideoProcessor pipeline some time to process the last frame, + // and then release the codecs. + SleepMs(1 * rtc::kNumMillisecsPerSec); + cpu_process_time_->Stop(); +} + +void VideoCodecTestFixtureImpl::AnalyzeAllFrames( + const std::vector<RateProfile>& rate_profiles, + const std::vector<RateControlThresholds>* rc_thresholds, + const std::vector<QualityThresholds>* quality_thresholds, + const BitstreamThresholds* bs_thresholds) { + + for (size_t rate_profile_idx = 0; rate_profile_idx < rate_profiles.size(); + ++rate_profile_idx) { + const size_t first_frame_num = rate_profiles[rate_profile_idx].frame_num; + const size_t last_frame_num = + rate_profile_idx + 1 < rate_profiles.size() + ? rate_profiles[rate_profile_idx + 1].frame_num - 1 + : config_.num_frames - 1; + RTC_CHECK(last_frame_num >= first_frame_num); + + VideoStatistics send_stat = stats_.SliceAndCalcAggregatedVideoStatistic( + first_frame_num, last_frame_num); + RTC_LOG(LS_INFO) << "==> Send stats"; + RTC_LOG(LS_INFO) << send_stat.ToString("send_") << "\n"; + + std::vector<VideoStatistics> layer_stats = + stats_.SliceAndCalcLayerVideoStatistic(first_frame_num, last_frame_num); + RTC_LOG(LS_INFO) << "==> Receive stats"; + for (const auto& layer_stat : layer_stats) { + RTC_LOG(LS_INFO) << layer_stat.ToString("recv_") << "\n"; + + // For perf dashboard. + char modifier_buf[256]; + rtc::SimpleStringBuilder modifier(modifier_buf); + modifier << "_r" << rate_profile_idx << "_sl" << layer_stat.spatial_idx; + + auto PrintResultHelper = [&modifier, this](const std::string& measurement, + double value, + const std::string& units) { + PrintResult(measurement, modifier.str(), config_.test_name, value, + units, /*important=*/false); + }; + + if (layer_stat.temporal_idx == config_.NumberOfTemporalLayers() - 1) { + PrintResultHelper("enc_speed", layer_stat.enc_speed_fps, "fps"); + PrintResultHelper("avg_key_frame_size", + layer_stat.avg_key_frame_size_bytes, "bytes"); + PrintResultHelper("num_key_frames", layer_stat.num_key_frames, + "frames"); + printf("\n"); + } + + modifier << "tl" << layer_stat.temporal_idx; + PrintResultHelper("dec_speed", layer_stat.dec_speed_fps, "fps"); + PrintResultHelper("avg_delta_frame_size", + layer_stat.avg_delta_frame_size_bytes, "bytes"); + PrintResultHelper("bitrate", layer_stat.bitrate_kbps, "kbps"); + PrintResultHelper("framerate", layer_stat.framerate_fps, "fps"); + PrintResultHelper("avg_psnr_y", layer_stat.avg_psnr_y, "dB"); + PrintResultHelper("avg_psnr_u", layer_stat.avg_psnr_u, "dB"); + PrintResultHelper("avg_psnr_v", layer_stat.avg_psnr_v, "dB"); + PrintResultHelper("min_psnr_yuv", layer_stat.min_psnr, "dB"); + PrintResultHelper("avg_qp", layer_stat.avg_qp, ""); + printf("\n"); + if (layer_stat.temporal_idx == config_.NumberOfTemporalLayers() - 1) { + printf("\n"); + } + } + + const RateControlThresholds* rc_threshold = + rc_thresholds ? &(*rc_thresholds)[rate_profile_idx] : nullptr; + const QualityThresholds* quality_threshold = + quality_thresholds ? &(*quality_thresholds)[rate_profile_idx] : nullptr; + + VerifyVideoStatistic(send_stat, rc_threshold, quality_threshold, + bs_thresholds, + rate_profiles[rate_profile_idx].target_kbps, + rate_profiles[rate_profile_idx].input_fps); + } + + if (config_.print_frame_level_stats) { + RTC_LOG(LS_INFO) << "==> Frame stats"; + std::vector<VideoCodecTestStats::FrameStatistics> frame_stats = + stats_.GetFrameStatistics(); + for (const auto& frame_stat : frame_stats) { + RTC_LOG(LS_INFO) << frame_stat.ToString(); + } + } + + cpu_process_time_->Print(); +} + +void VideoCodecTestFixtureImpl::VerifyVideoStatistic( + const VideoStatistics& video_stat, + const RateControlThresholds* rc_thresholds, + const QualityThresholds* quality_thresholds, + const BitstreamThresholds* bs_thresholds, + size_t target_bitrate_kbps, + double input_framerate_fps) { + if (rc_thresholds) { + const float bitrate_mismatch_percent = + 100 * std::fabs(1.0f * video_stat.bitrate_kbps - target_bitrate_kbps) / + target_bitrate_kbps; + const float framerate_mismatch_percent = + 100 * std::fabs(video_stat.framerate_fps - input_framerate_fps) / + input_framerate_fps; + EXPECT_LE(bitrate_mismatch_percent, + rc_thresholds->max_avg_bitrate_mismatch_percent); + EXPECT_LE(video_stat.time_to_reach_target_bitrate_sec, + rc_thresholds->max_time_to_reach_target_bitrate_sec); + EXPECT_LE(framerate_mismatch_percent, + rc_thresholds->max_avg_framerate_mismatch_percent); + EXPECT_LE(video_stat.avg_delay_sec, + rc_thresholds->max_avg_buffer_level_sec); + EXPECT_LE(video_stat.max_key_frame_delay_sec, + rc_thresholds->max_max_key_frame_delay_sec); + EXPECT_LE(video_stat.max_delta_frame_delay_sec, + rc_thresholds->max_max_delta_frame_delay_sec); + EXPECT_LE(video_stat.num_spatial_resizes, + rc_thresholds->max_num_spatial_resizes); + EXPECT_LE(video_stat.num_key_frames, rc_thresholds->max_num_key_frames); + } + + if (quality_thresholds) { + EXPECT_GT(video_stat.avg_psnr, quality_thresholds->min_avg_psnr); + EXPECT_GT(video_stat.min_psnr, quality_thresholds->min_min_psnr); + + // SSIM calculation is not optimized and thus it is disabled in real-time + // mode. + if (!config_.encode_in_real_time) { + EXPECT_GT(video_stat.avg_ssim, quality_thresholds->min_avg_ssim); + EXPECT_GT(video_stat.min_ssim, quality_thresholds->min_min_ssim); + } + } + + if (bs_thresholds) { + EXPECT_LE(video_stat.max_nalu_size_bytes, + bs_thresholds->max_max_nalu_size_bytes); + } +} + +bool VideoCodecTestFixtureImpl::CreateEncoderAndDecoder() { + SdpVideoFormat encoder_format(CreateSdpVideoFormat(config_)); + SdpVideoFormat decoder_format = encoder_format; + + // Override encoder and decoder formats with explicitly provided ones. + if (config_.encoder_format) { + RTC_DCHECK_EQ(config_.encoder_format->name, config_.codec_name); + encoder_format = *config_.encoder_format; + } + + if (config_.decoder_format) { + RTC_DCHECK_EQ(config_.decoder_format->name, config_.codec_name); + decoder_format = *config_.decoder_format; + } + + encoder_ = encoder_factory_->CreateVideoEncoder(encoder_format); + EXPECT_TRUE(encoder_) << "Encoder not successfully created."; + if (encoder_ == nullptr) { + return false; + } + + const size_t num_simulcast_or_spatial_layers = std::max( + config_.NumberOfSimulcastStreams(), config_.NumberOfSpatialLayers()); + for (size_t i = 0; i < num_simulcast_or_spatial_layers; ++i) { + std::unique_ptr<VideoDecoder> decoder = + decoder_factory_->CreateVideoDecoder(decoder_format); + EXPECT_TRUE(decoder) << "Decoder not successfully created."; + if (decoder == nullptr) { + return false; + } + decoders_.push_back(std::move(decoder)); + } + + return true; +} + +void VideoCodecTestFixtureImpl::DestroyEncoderAndDecoder() { + decoders_.clear(); + encoder_.reset(); +} + +VideoCodecTestStats& VideoCodecTestFixtureImpl::GetStats() { + return stats_; +} + +bool VideoCodecTestFixtureImpl::SetUpAndInitObjects( + TaskQueueForTest* task_queue, + size_t initial_bitrate_kbps, + double initial_framerate_fps) { + config_.codec_settings.minBitrate = 0; + config_.codec_settings.startBitrate = static_cast<int>(initial_bitrate_kbps); + config_.codec_settings.maxFramerate = std::ceil(initial_framerate_fps); + + int clip_width = config_.clip_width.value_or(config_.codec_settings.width); + int clip_height = config_.clip_height.value_or(config_.codec_settings.height); + + // Create file objects for quality analysis. + source_frame_reader_.reset(new YuvFrameReaderImpl( + config_.filepath, clip_width, clip_height, + config_.reference_width.value_or(clip_width), + config_.reference_height.value_or(clip_height), + YuvFrameReaderImpl::RepeatMode::kPingPong, config_.clip_fps, + config_.codec_settings.maxFramerate)); + EXPECT_TRUE(source_frame_reader_->Init()); + + RTC_DCHECK(encoded_frame_writers_.empty()); + RTC_DCHECK(decoded_frame_writers_.empty()); + + stats_.Clear(); + + cpu_process_time_.reset(new CpuProcessTime(config_)); + + bool is_codec_created = false; + task_queue->SendTask([this, &is_codec_created]() { + is_codec_created = CreateEncoderAndDecoder(); + }); + + if (!is_codec_created) { + return false; + } + + if (config_.visualization_params.save_encoded_ivf || + config_.visualization_params.save_decoded_y4m) { + std::string encoder_name = GetCodecName(task_queue, /*is_encoder=*/true); + encoder_name = absl::StrReplaceAll(encoder_name, {{":", ""}, {" ", "-"}}); + + const size_t num_simulcast_or_spatial_layers = std::max( + config_.NumberOfSimulcastStreams(), config_.NumberOfSpatialLayers()); + const size_t num_temporal_layers = config_.NumberOfTemporalLayers(); + for (size_t simulcast_svc_idx = 0; + simulcast_svc_idx < num_simulcast_or_spatial_layers; + ++simulcast_svc_idx) { + const std::string output_filename_base = + JoinFilename(config_.output_path, + FilenameWithParams(config_) + "_" + encoder_name + + "_sl" + std::to_string(simulcast_svc_idx)); + + if (config_.visualization_params.save_encoded_ivf) { + for (size_t temporal_idx = 0; temporal_idx < num_temporal_layers; + ++temporal_idx) { + const std::string output_file_path = output_filename_base + "tl" + + std::to_string(temporal_idx) + + ".ivf"; + FileWrapper ivf_file = FileWrapper::OpenWriteOnly(output_file_path); + + const VideoProcessor::LayerKey layer_key(simulcast_svc_idx, + temporal_idx); + encoded_frame_writers_[layer_key] = + IvfFileWriter::Wrap(std::move(ivf_file), /*byte_limit=*/0); + } + } + + if (config_.visualization_params.save_decoded_y4m) { + FrameWriter* decoded_frame_writer = new Y4mFrameWriterImpl( + output_filename_base + ".y4m", config_.codec_settings.width, + config_.codec_settings.height, config_.codec_settings.maxFramerate); + EXPECT_TRUE(decoded_frame_writer->Init()); + decoded_frame_writers_.push_back( + std::unique_ptr<FrameWriter>(decoded_frame_writer)); + } + } + } + + task_queue->SendTask( + [this]() { + processor_ = std::make_unique<VideoProcessor>( + encoder_.get(), &decoders_, source_frame_reader_.get(), config_, + &stats_, &encoded_frame_writers_, + decoded_frame_writers_.empty() ? nullptr : &decoded_frame_writers_); + }); + return true; +} + +void VideoCodecTestFixtureImpl::ReleaseAndCloseObjects( + TaskQueueForTest* task_queue) { + task_queue->SendTask([this]() { + processor_.reset(); + // The VideoProcessor must be destroyed before the codecs. + DestroyEncoderAndDecoder(); + }); + + source_frame_reader_->Close(); + + // Close visualization files. + for (auto& encoded_frame_writer : encoded_frame_writers_) { + EXPECT_TRUE(encoded_frame_writer.second->Close()); + } + encoded_frame_writers_.clear(); + for (auto& decoded_frame_writer : decoded_frame_writers_) { + decoded_frame_writer->Close(); + } + decoded_frame_writers_.clear(); +} + +std::string VideoCodecTestFixtureImpl::GetCodecName( + TaskQueueForTest* task_queue, + bool is_encoder) const { + std::string codec_name; + task_queue->SendTask([this, is_encoder, &codec_name] { + if (is_encoder) { + codec_name = encoder_->GetEncoderInfo().implementation_name; + } else { + codec_name = decoders_.at(0)->ImplementationName(); + } + }); + return codec_name; +} + +void VideoCodecTestFixtureImpl::PrintSettings( + TaskQueueForTest* task_queue) const { + RTC_LOG(LS_INFO) << "==> Config"; + RTC_LOG(LS_INFO) << config_.ToString(); + + RTC_LOG(LS_INFO) << "==> Codec names"; + RTC_LOG(LS_INFO) << "enc_impl_name: " + << GetCodecName(task_queue, /*is_encoder=*/true); + RTC_LOG(LS_INFO) << "dec_impl_name: " + << GetCodecName(task_queue, /*is_encoder=*/false); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.h b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.h new file mode 100644 index 0000000000..005b7c0a8e --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_fixture_impl.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_VIDEOCODEC_TEST_FIXTURE_IMPL_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_VIDEOCODEC_TEST_FIXTURE_IMPL_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "api/test/videocodec_test_fixture.h" +#include "api/video_codecs/video_decoder_factory.h" +#include "api/video_codecs/video_encoder_factory.h" +#include "common_video/h264/h264_common.h" +#include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h" +#include "modules/video_coding/codecs/test/videoprocessor.h" +#include "modules/video_coding/utility/ivf_file_writer.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/testsupport/frame_reader.h" +#include "test/testsupport/frame_writer.h" + +namespace webrtc { +namespace test { + +// Integration test for video processor. It does rate control and frame quality +// analysis using frame statistics collected by video processor and logs the +// results. If thresholds are specified it checks that corresponding metrics +// are in desirable range. +class VideoCodecTestFixtureImpl : public VideoCodecTestFixture { + // Verifies that all H.264 keyframes contain SPS/PPS/IDR NALUs. + public: + class H264KeyframeChecker : public EncodedFrameChecker { + public: + void CheckEncodedFrame(webrtc::VideoCodecType codec, + const EncodedImage& encoded_frame) const override; + }; + + explicit VideoCodecTestFixtureImpl(Config config); + VideoCodecTestFixtureImpl( + Config config, + std::unique_ptr<VideoDecoderFactory> decoder_factory, + std::unique_ptr<VideoEncoderFactory> encoder_factory); + ~VideoCodecTestFixtureImpl() override; + + void RunTest(const std::vector<RateProfile>& rate_profiles, + const std::vector<RateControlThresholds>* rc_thresholds, + const std::vector<QualityThresholds>* quality_thresholds, + const BitstreamThresholds* bs_thresholds) override; + + VideoCodecTestStats& GetStats() override; + + private: + class CpuProcessTime; + + bool CreateEncoderAndDecoder(); + void DestroyEncoderAndDecoder(); + bool SetUpAndInitObjects(TaskQueueForTest* task_queue, + size_t initial_bitrate_kbps, + double initial_framerate_fps); + void ReleaseAndCloseObjects(TaskQueueForTest* task_queue); + + void ProcessAllFrames(TaskQueueForTest* task_queue, + const std::vector<RateProfile>& rate_profiles); + void AnalyzeAllFrames( + const std::vector<RateProfile>& rate_profiles, + const std::vector<RateControlThresholds>* rc_thresholds, + const std::vector<QualityThresholds>* quality_thresholds, + const BitstreamThresholds* bs_thresholds); + + void VerifyVideoStatistic( + const VideoCodecTestStats::VideoStatistics& video_stat, + const RateControlThresholds* rc_thresholds, + const QualityThresholds* quality_thresholds, + const BitstreamThresholds* bs_thresholds, + size_t target_bitrate_kbps, + double input_framerate_fps); + + std::string GetCodecName(TaskQueueForTest* task_queue, bool is_encoder) const; + void PrintSettings(TaskQueueForTest* task_queue) const; + + // Codecs. + const std::unique_ptr<VideoEncoderFactory> encoder_factory_; + std::unique_ptr<VideoEncoder> encoder_; + const std::unique_ptr<VideoDecoderFactory> decoder_factory_; + VideoProcessor::VideoDecoderList decoders_; + + // Helper objects. + Config config_; + VideoCodecTestStatsImpl stats_; + std::unique_ptr<FrameReader> source_frame_reader_; + VideoProcessor::IvfFileWriterMap encoded_frame_writers_; + VideoProcessor::FrameWriterList decoded_frame_writers_; + std::unique_ptr<VideoProcessor> processor_; + std::unique_ptr<CpuProcessTime> cpu_process_time_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_TEST_VIDEOCODEC_TEST_FIXTURE_IMPL_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_libvpx.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_libvpx.cc new file mode 100644 index 0000000000..062375bd60 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_libvpx.cc @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <vector> + +#include "api/test/create_videocodec_test_fixture.h" +#include "api/test/video/function_video_encoder_factory.h" +#include "api/video_codecs/sdp_video_format.h" +#include "media/base/media_constants.h" +#include "media/engine/internal_decoder_factory.h" +#include "media/engine/internal_encoder_factory.h" +#include "media/engine/simulcast_encoder_adapter.h" +#include "modules/video_coding/utility/vp8_header_parser.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +using VideoStatistics = VideoCodecTestStats::VideoStatistics; + +namespace { +// Codec settings. +const int kCifWidth = 352; +const int kCifHeight = 288; +const int kNumFramesShort = 100; +const int kNumFramesLong = 300; +const size_t kBitrateRdPerfKbps[] = {100, 200, 300, 400, 500, 600, + 700, 800, 1000, 1250, 1400, 1600, + 1800, 2000, 2200, 2500}; +const size_t kNumFirstFramesToSkipAtRdPerfAnalysis = 60; + +class QpFrameChecker : public VideoCodecTestFixture::EncodedFrameChecker { + public: + void CheckEncodedFrame(webrtc::VideoCodecType codec, + const EncodedImage& encoded_frame) const override { + int qp; + if (codec == kVideoCodecVP8) { + EXPECT_TRUE(vp8::GetQp(encoded_frame.data(), encoded_frame.size(), &qp)); + } else if (codec == kVideoCodecVP9) { + EXPECT_TRUE(vp9::GetQp(encoded_frame.data(), encoded_frame.size(), &qp)); + } else { + RTC_DCHECK_NOTREACHED(); + } + EXPECT_EQ(encoded_frame.qp_, qp) << "Encoder QP != parsed bitstream QP."; + } +}; + +VideoCodecTestFixture::Config CreateConfig() { + VideoCodecTestFixture::Config config; + config.filename = "foreman_cif"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = kNumFramesLong; + config.use_single_core = true; + return config; +} + +void PrintRdPerf(std::map<size_t, std::vector<VideoStatistics>> rd_stats) { + printf("--> Summary\n"); + printf("%11s %5s %6s %11s %12s %11s %13s %13s %5s %7s %7s %7s %13s %13s\n", + "uplink_kbps", "width", "height", "spatial_idx", "temporal_idx", + "target_kbps", "downlink_kbps", "framerate_fps", "psnr", "psnr_y", + "psnr_u", "psnr_v", "enc_speed_fps", "dec_speed_fps"); + for (const auto& rd_stat : rd_stats) { + const size_t bitrate_kbps = rd_stat.first; + for (const auto& layer_stat : rd_stat.second) { + printf( + "%11zu %5zu %6zu %11zu %12zu %11zu %13zu %13.2f %5.2f %7.2f %7.2f " + "%7.2f" + "%13.2f %13.2f\n", + bitrate_kbps, layer_stat.width, layer_stat.height, + layer_stat.spatial_idx, layer_stat.temporal_idx, + layer_stat.target_bitrate_kbps, layer_stat.bitrate_kbps, + layer_stat.framerate_fps, layer_stat.avg_psnr, layer_stat.avg_psnr_y, + layer_stat.avg_psnr_u, layer_stat.avg_psnr_v, + layer_stat.enc_speed_fps, layer_stat.dec_speed_fps); + } + } +} +} // namespace + +#if defined(RTC_ENABLE_VP9) +TEST(VideoCodecTestLibvpx, HighBitrateVP9) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp9CodecName, 1, 1, 1, false, true, false, + kCifWidth, kCifHeight); + config.num_frames = kNumFramesShort; + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 1, 0.3, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{37, 36, 0.94, 0.92}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, ChangeBitrateVP9) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp9CodecName, 1, 1, 1, false, true, false, + kCifWidth, kCifHeight); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = { + {200, 30, 0}, // target_kbps, input_fps, frame_num + {700, 30, 100}, + {500, 30, 200}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 2, 0, 1, 0.5, 0.1, 0, 1}, + {15, 3, 0, 1, 0.5, 0.1, 0, 0}, + {11, 2, 0, 1, 0.5, 0.1, 0, 0}}; + + std::vector<QualityThresholds> quality_thresholds = { + {34, 33, 0.90, 0.88}, {38, 35, 0.95, 0.91}, {35, 34, 0.93, 0.90}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, ChangeFramerateVP9) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp9CodecName, 1, 1, 1, false, true, false, + kCifWidth, kCifHeight); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = { + {100, 24, 0}, // target_kbps, input_fps, frame_num + {100, 15, 100}, + {100, 10, 200}}; + + // Framerate mismatch should be lower for lower framerate. + std::vector<RateControlThresholds> rc_thresholds = { + {10, 2, 40, 1, 0.5, 0.2, 0, 1}, + {8, 2, 5, 1, 0.5, 0.2, 0, 0}, + {5, 2, 0, 1, 0.5, 0.3, 0, 0}}; + + // Quality should be higher for lower framerates for the same content. + std::vector<QualityThresholds> quality_thresholds = { + {33, 32, 0.88, 0.86}, {33.5, 32, 0.90, 0.86}, {33.5, 31.5, 0.90, 0.85}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, DenoiserOnVP9) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp9CodecName, 1, 1, 1, true, true, false, + kCifWidth, kCifHeight); + config.num_frames = kNumFramesShort; + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 1, 0.3, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{37.5, 36, 0.94, 0.93}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, VeryLowBitrateVP9) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp9CodecName, 1, 1, 1, false, true, true, + kCifWidth, kCifHeight); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{50, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {15, 3, 75, 1, 0.5, 0.4, 2, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{28, 25, 0.80, 0.65}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +// TODO(marpan): Add temporal layer test for VP9, once changes are in +// vp9 wrapper for this. + +#endif // defined(RTC_ENABLE_VP9) + +TEST(VideoCodecTestLibvpx, HighBitrateVP8) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp8CodecName, 1, 1, 1, true, true, false, + kCifWidth, kCifHeight); + config.num_frames = kNumFramesShort; + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 1, 0.2, 0.1, 0, 1}}; + +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<QualityThresholds> quality_thresholds = {{35, 33, 0.91, 0.89}}; +#else + std::vector<QualityThresholds> quality_thresholds = {{37, 35, 0.93, 0.91}}; +#endif + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, MAYBE_ChangeBitrateVP8) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp8CodecName, 1, 1, 1, true, true, false, + kCifWidth, kCifHeight); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = { + {200, 30, 0}, // target_kbps, input_fps, frame_num + {800, 30, 100}, + {500, 30, 200}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 1, 0.2, 0.1, 0, 1}, + {15.5, 1, 0, 1, 0.2, 0.1, 0, 0}, + {15, 1, 0, 1, 0.2, 0.1, 0, 0}}; + +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<QualityThresholds> quality_thresholds = { + {31.8, 31, 0.86, 0.85}, {36, 34.8, 0.92, 0.90}, {33.5, 32, 0.90, 0.88}}; +#else + std::vector<QualityThresholds> quality_thresholds = { + {33, 32, 0.89, 0.88}, {38, 36, 0.94, 0.93}, {35, 34, 0.92, 0.91}}; +#endif + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, MAYBE_ChangeFramerateVP8) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp8CodecName, 1, 1, 1, true, true, false, + kCifWidth, kCifHeight); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = { + {80, 24, 0}, // target_kbps, input_fps, frame_index_rate_update + {80, 15, 100}, + {80, 10, 200}}; + +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<RateControlThresholds> rc_thresholds = { + {10, 2.42, 60, 1, 0.3, 0.3, 0, 1}, + {10, 2, 30, 1, 0.3, 0.3, 0, 0}, + {10, 2, 10, 1, 0.3, 0.2, 0, 0}}; +#else + std::vector<RateControlThresholds> rc_thresholds = { + {10, 2, 20, 1, 0.3, 0.15, 0, 1}, + {5, 2, 5, 1, 0.3, 0.15, 0, 0}, + {4, 2, 1, 1, 0.3, 0.2, 0, 0}}; +#endif + +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<QualityThresholds> quality_thresholds = { + {31, 30, 0.85, 0.84}, {31.4, 30.5, 0.86, 0.84}, {30.5, 29, 0.83, 0.78}}; +#else + std::vector<QualityThresholds> quality_thresholds = { + {31, 30, 0.87, 0.85}, {32, 31, 0.88, 0.85}, {32, 30, 0.87, 0.82}}; +#endif + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_TemporalLayersVP8 DISABLED_TemporalLayersVP8 +#else +#define MAYBE_TemporalLayersVP8 TemporalLayersVP8 +#endif +TEST(VideoCodecTestLibvpx, MAYBE_TemporalLayersVP8) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp8CodecName, 1, 1, 3, true, true, false, + kCifWidth, kCifHeight); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{200, 30, 0}, {400, 30, 150}}; + +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<RateControlThresholds> rc_thresholds = { + {10, 1, 2.1, 1, 0.2, 0.1, 0, 1}, {12, 2, 3, 1, 0.2, 0.1, 0, 1}}; +#else + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 1, 0.2, 0.1, 0, 1}, {10, 2, 0, 1, 0.2, 0.1, 0, 1}}; +#endif +// Min SSIM drops because of high motion scene with complex backgound (trees). +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<QualityThresholds> quality_thresholds = {{31, 30, 0.85, 0.83}, + {31, 28, 0.85, 0.75}}; +#else + std::vector<QualityThresholds> quality_thresholds = {{32, 30, 0.88, 0.85}, + {33, 30, 0.89, 0.83}}; +#endif + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_MultiresVP8 DISABLED_MultiresVP8 +#else +#define MAYBE_MultiresVP8 MultiresVP8 +#endif +TEST(VideoCodecTestLibvpx, MAYBE_MultiresVP8) { + auto config = CreateConfig(); + config.filename = "ConferenceMotion_1280_720_50"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = 100; + config.SetCodecSettings(cricket::kVp8CodecName, 3, 1, 3, true, true, false, + 1280, 720); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{1500, 30, 0}}; +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) + std::vector<RateControlThresholds> rc_thresholds = { + {4.1, 1.04, 7, 0.18, 0.14, 0.08, 0, 1}}; +#else + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 5, 1, 0.3, 0.1, 0, 1}}; +#endif + std::vector<QualityThresholds> quality_thresholds = {{34, 32, 0.90, 0.88}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_SimulcastVP8 DISABLED_SimulcastVP8 +#else +#define MAYBE_SimulcastVP8 SimulcastVP8 +#endif +TEST(VideoCodecTestLibvpx, MAYBE_SimulcastVP8) { + auto config = CreateConfig(); + config.filename = "ConferenceMotion_1280_720_50"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = 100; + config.SetCodecSettings(cricket::kVp8CodecName, 3, 1, 3, true, true, false, + 1280, 720); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + + InternalEncoderFactory internal_encoder_factory; + std::unique_ptr<VideoEncoderFactory> adapted_encoder_factory = + std::make_unique<FunctionVideoEncoderFactory>([&]() { + return std::make_unique<SimulcastEncoderAdapter>( + &internal_encoder_factory, SdpVideoFormat(cricket::kVp8CodecName)); + }); + std::unique_ptr<InternalDecoderFactory> internal_decoder_factory( + new InternalDecoderFactory()); + + auto fixture = + CreateVideoCodecTestFixture(config, std::move(internal_decoder_factory), + std::move(adapted_encoder_factory)); + + std::vector<RateProfile> rate_profiles = {{1500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {20, 5, 90, 1, 0.5, 0.3, 0, 1}}; + std::vector<QualityThresholds> quality_thresholds = {{34, 32, 0.90, 0.88}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_SvcVP9 DISABLED_SvcVP9 +#else +#define MAYBE_SvcVP9 SvcVP9 +#endif +TEST(VideoCodecTestLibvpx, MAYBE_SvcVP9) { + auto config = CreateConfig(); + config.filename = "ConferenceMotion_1280_720_50"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = 100; + config.SetCodecSettings(cricket::kVp9CodecName, 1, 3, 3, true, true, false, + 1280, 720); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{1500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 5, 1, 0.3, 0.1, 0, 1}}; + std::vector<QualityThresholds> quality_thresholds = {{36, 34, 0.93, 0.90}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestLibvpx, DISABLED_MultiresVP8RdPerf) { + auto config = CreateConfig(); + config.filename = "FourPeople_1280x720_30"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = 300; + config.print_frame_level_stats = true; + config.SetCodecSettings(cricket::kVp8CodecName, 3, 1, 3, true, true, false, + 1280, 720); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::map<size_t, std::vector<VideoStatistics>> rd_stats; + for (size_t bitrate_kbps : kBitrateRdPerfKbps) { + std::vector<RateProfile> rate_profiles = {{bitrate_kbps, 30, 0}}; + + fixture->RunTest(rate_profiles, nullptr, nullptr, nullptr); + + rd_stats[bitrate_kbps] = + fixture->GetStats().SliceAndCalcLayerVideoStatistic( + kNumFirstFramesToSkipAtRdPerfAnalysis, config.num_frames - 1); + } + + PrintRdPerf(rd_stats); +} + +TEST(VideoCodecTestLibvpx, DISABLED_SvcVP9RdPerf) { + auto config = CreateConfig(); + config.filename = "FourPeople_1280x720_30"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = 300; + config.print_frame_level_stats = true; + config.SetCodecSettings(cricket::kVp9CodecName, 1, 3, 3, true, true, false, + 1280, 720); + const auto frame_checker = std::make_unique<QpFrameChecker>(); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::map<size_t, std::vector<VideoStatistics>> rd_stats; + for (size_t bitrate_kbps : kBitrateRdPerfKbps) { + std::vector<RateProfile> rate_profiles = {{bitrate_kbps, 30, 0}}; + + fixture->RunTest(rate_profiles, nullptr, nullptr, nullptr); + + rd_stats[bitrate_kbps] = + fixture->GetStats().SliceAndCalcLayerVideoStatistic( + kNumFirstFramesToSkipAtRdPerfAnalysis, config.num_frames - 1); + } + + PrintRdPerf(rd_stats); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_mediacodec.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_mediacodec.cc new file mode 100644 index 0000000000..8a1cf01a66 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_mediacodec.cc @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <string> +#include <tuple> +#include <vector> + +#include "api/test/create_videocodec_test_fixture.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/test/android_codec_factory_helper.h" +#include "modules/video_coding/codecs/test/videocodec_test_fixture_impl.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +namespace { +const int kForemanNumFrames = 300; +const int kForemanFramerateFps = 30; + +const size_t kConstRateIntervalSec = 10; +const std::vector<webrtc::test::RateProfile> kBitRateHighLowHigh = { + {/*target_kbps=*/3000, /*input_fps=*/30, /*frame_num=*/0}, + {/*target_kbps=*/1500, /*input_fps=*/30, /*frame_num=*/300}, + {/*target_kbps=*/750, /*input_fps=*/30, /*frame_num=*/600}, + {/*target_kbps=*/1500, /*input_fps=*/30, /*frame_num=*/900}, + {/*target_kbps=*/3000, /*input_fps=*/30, /*frame_num=*/1200}}; + +const std::vector<webrtc::test::RateProfile> kBitRateLowHighLow = { + {/*target_kbps=*/750, /*input_fps=*/30, /*frame_num=*/0}, + {/*target_kbps=*/1500, /*input_fps=*/30, /*frame_num=*/300}, + {/*target_kbps=*/3000, /*input_fps=*/30, /*frame_num=*/600}, + {/*target_kbps=*/1500, /*input_fps=*/30, /*frame_num=*/900}, + {/*target_kbps=*/720, /*input_fps=*/30, /*frame_num=*/1200}}; + +const std::vector<webrtc::test::RateProfile> kFrameRateHighLowHigh = { + {/*target_kbps=*/2000, /*input_fps=*/30, /*frame_num=*/0}, + {/*target_kbps=*/2000, /*input_fps=*/15, /*frame_num=*/300}, + {/*target_kbps=*/2000, /*input_fps=*/7.5, /*frame_num=*/450}, + {/*target_kbps=*/2000, /*input_fps=*/15, /*frame_num=*/525}, + {/*target_kbps=*/2000, /*input_fps=*/30, /*frame_num=*/675}}; + +const std::vector<webrtc::test::RateProfile> kFrameRateLowHighLow = { + {/*target_kbps=*/2000, /*input_fps=*/7.5, /*frame_num=*/0}, + {/*target_kbps=*/2000, /*input_fps=*/15, /*frame_num=*/75}, + {/*target_kbps=*/2000, /*input_fps=*/30, /*frame_num=*/225}, + {/*target_kbps=*/2000, /*input_fps=*/15, /*frame_num=*/525}, + {/*target_kbps=*/2000, /*input_fps=*/7.5, /*frame_num=*/775}}; + +VideoCodecTestFixture::Config CreateConfig() { + VideoCodecTestFixture::Config config; + config.filename = "foreman_cif"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = kForemanNumFrames; + // In order to not overwhelm the OpenMAX buffers in the Android MediaCodec. + config.encode_in_real_time = true; + return config; +} + +std::unique_ptr<VideoCodecTestFixture> CreateTestFixtureWithConfig( + VideoCodecTestFixture::Config config) { + InitializeAndroidObjects(); // Idempotent. + auto encoder_factory = CreateAndroidEncoderFactory(); + auto decoder_factory = CreateAndroidDecoderFactory(); + return CreateVideoCodecTestFixture(config, std::move(decoder_factory), + std::move(encoder_factory)); +} +} // namespace + +TEST(VideoCodecTestMediaCodec, ForemanCif500kbpsVp8) { + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kVp8CodecName, 1, 1, 1, false, false, false, + 352, 288); + auto fixture = CreateTestFixtureWithConfig(config); + + std::vector<RateProfile> rate_profiles = {{500, kForemanFramerateFps, 0}}; + + // The thresholds below may have to be tweaked to let even poor MediaCodec + // implementations pass. If this test fails on the bots, disable it and + // ping brandtr@. + std::vector<RateControlThresholds> rc_thresholds = { + {10, 1, 1, 0.1, 0.2, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{36, 31, 0.92, 0.86}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestMediaCodec, ForemanCif500kbpsH264CBP) { + auto config = CreateConfig(); + const auto frame_checker = + std::make_unique<VideoCodecTestFixtureImpl::H264KeyframeChecker>(); + config.encoded_frame_checker = frame_checker.get(); + config.SetCodecSettings(cricket::kH264CodecName, 1, 1, 1, false, false, false, + 352, 288); + auto fixture = CreateTestFixtureWithConfig(config); + + std::vector<RateProfile> rate_profiles = {{500, kForemanFramerateFps, 0}}; + + // The thresholds below may have to be tweaked to let even poor MediaCodec + // implementations pass. If this test fails on the bots, disable it and + // ping brandtr@. + std::vector<RateControlThresholds> rc_thresholds = { + {10, 1, 1, 0.1, 0.2, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{36, 31, 0.92, 0.86}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +// TODO(brandtr): Enable this test when we have trybots/buildbots with +// HW encoders that support CHP. +TEST(VideoCodecTestMediaCodec, DISABLED_ForemanCif500kbpsH264CHP) { + auto config = CreateConfig(); + const auto frame_checker = + std::make_unique<VideoCodecTestFixtureImpl::H264KeyframeChecker>(); + + config.h264_codec_settings.profile = H264Profile::kProfileConstrainedHigh; + config.encoded_frame_checker = frame_checker.get(); + config.SetCodecSettings(cricket::kH264CodecName, 1, 1, 1, false, false, false, + 352, 288); + auto fixture = CreateTestFixtureWithConfig(config); + + std::vector<RateProfile> rate_profiles = {{500, kForemanFramerateFps, 0}}; + + // The thresholds below may have to be tweaked to let even poor MediaCodec + // implementations pass. If this test fails on the bots, disable it and + // ping brandtr@. + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 0.1, 0.2, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{37, 35, 0.93, 0.91}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +TEST(VideoCodecTestMediaCodec, ForemanMixedRes100kbpsVp8H264) { + auto config = CreateConfig(); + const int kNumFrames = 30; + const std::vector<std::string> codecs = {cricket::kVp8CodecName, + cricket::kH264CodecName}; + const std::vector<std::tuple<int, int>> resolutions = { + {128, 96}, {176, 144}, {320, 240}, {480, 272}}; + const std::vector<RateProfile> rate_profiles = { + {100, kForemanFramerateFps, 0}}; + const std::vector<QualityThresholds> quality_thresholds = { + {29, 26, 0.8, 0.75}}; + + for (const auto& codec : codecs) { + for (const auto& resolution : resolutions) { + const int width = std::get<0>(resolution); + const int height = std::get<1>(resolution); + config.filename = std::string("foreman_") + std::to_string(width) + "x" + + std::to_string(height); + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = kNumFrames; + config.SetCodecSettings(codec, 1, 1, 1, false, false, false, width, + height); + + auto fixture = CreateTestFixtureWithConfig(config); + fixture->RunTest(rate_profiles, nullptr /* rc_thresholds */, + &quality_thresholds, nullptr /* bs_thresholds */); + } + } +} + +class VideoCodecTestMediaCodecRateAdaptation + : public ::testing::TestWithParam< + std::tuple<std::vector<webrtc::test::RateProfile>, std::string>> {}; + +TEST_P(VideoCodecTestMediaCodecRateAdaptation, DISABLED_RateAdaptation) { + const std::vector<webrtc::test::RateProfile> rate_profile = + std::get<0>(GetParam()); + const std::string codec_name = std::get<1>(GetParam()); + + VideoCodecTestFixture::Config config; + config.filename = "FourPeople_1280x720_30"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = rate_profile.back().frame_num + + static_cast<size_t>(kConstRateIntervalSec * + rate_profile.back().input_fps); + config.encode_in_real_time = true; + config.SetCodecSettings(codec_name, 1, 1, 1, false, false, false, 1280, 720); + + auto fixture = CreateTestFixtureWithConfig(config); + fixture->RunTest(rate_profile, nullptr, nullptr, nullptr); + + for (size_t i = 0; i < rate_profile.size(); ++i) { + const size_t num_frames = + static_cast<size_t>(rate_profile[i].input_fps * kConstRateIntervalSec); + + auto stats = fixture->GetStats().SliceAndCalcLayerVideoStatistic( + rate_profile[i].frame_num, rate_profile[i].frame_num + num_frames - 1); + ASSERT_EQ(stats.size(), 1u); + + // Bitrate mismatch is <= 10%. + EXPECT_LE(stats[0].avg_bitrate_mismatch_pct, 10); + EXPECT_GE(stats[0].avg_bitrate_mismatch_pct, -10); + + // Avg frame transmission delay and processing latency is <=100..250ms + // depending on frame rate. + const double expected_delay_sec = + std::min(std::max(1 / rate_profile[i].input_fps, 0.1), 0.25); + EXPECT_LE(stats[0].avg_delay_sec, expected_delay_sec); + EXPECT_LE(stats[0].avg_encode_latency_sec, expected_delay_sec); + EXPECT_LE(stats[0].avg_decode_latency_sec, expected_delay_sec); + + // Frame drops are not expected. + EXPECT_EQ(stats[0].num_encoded_frames, num_frames); + EXPECT_EQ(stats[0].num_decoded_frames, num_frames); + + // Periodic keyframes are not expected. + EXPECT_EQ(stats[0].num_key_frames, i == 0 ? 1u : 0); + + // Ensure codec delivers a reasonable spatial quality. + EXPECT_GE(stats[0].avg_psnr_y, 35); + } +} + +INSTANTIATE_TEST_SUITE_P( + RateAdaptation, + VideoCodecTestMediaCodecRateAdaptation, + ::testing::Combine(::testing::Values(kBitRateLowHighLow, + kBitRateHighLowHigh, + kFrameRateLowHighLow, + kFrameRateHighLowHigh), + ::testing::Values(cricket::kVp8CodecName, + cricket::kVp9CodecName, + cricket::kH264CodecName))); + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_openh264.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_openh264.cc new file mode 100644 index 0000000000..6513074bad --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_openh264.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <vector> + +#include "api/test/create_videocodec_test_fixture.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/test/videocodec_test_fixture_impl.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +namespace { +// Codec settings. +const int kCifWidth = 352; +const int kCifHeight = 288; +const int kNumFrames = 100; + +VideoCodecTestFixture::Config CreateConfig() { + VideoCodecTestFixture::Config config; + config.filename = "foreman_cif"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = kNumFrames; + // Only allow encoder/decoder to use single core, for predictability. + config.use_single_core = true; + return config; +} +} // namespace + +TEST(VideoCodecTestOpenH264, ConstantHighBitrate) { + auto frame_checker = + std::make_unique<VideoCodecTestFixtureImpl::H264KeyframeChecker>(); + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kH264CodecName, 1, 1, 1, false, true, false, + kCifWidth, kCifHeight); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 0.1, 0.2, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{37, 35, 0.93, 0.91}}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, nullptr); +} + +// H264: Enable SingleNalUnit packetization mode. Encoder should split +// large frames into multiple slices and limit length of NAL units. +TEST(VideoCodecTestOpenH264, SingleNalUnit) { + auto frame_checker = + std::make_unique<VideoCodecTestFixtureImpl::H264KeyframeChecker>(); + auto config = CreateConfig(); + config.h264_codec_settings.packetization_mode = + H264PacketizationMode::SingleNalUnit; + config.max_payload_size_bytes = 500; + config.SetCodecSettings(cricket::kH264CodecName, 1, 1, 1, false, true, false, + kCifWidth, kCifHeight); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateVideoCodecTestFixture(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<RateControlThresholds> rc_thresholds = { + {5, 1, 0, 0.1, 0.2, 0.1, 0, 1}}; + + std::vector<QualityThresholds> quality_thresholds = {{37, 35, 0.93, 0.91}}; + + BitstreamThresholds bs_thresholds = {config.max_payload_size_bytes}; + + fixture->RunTest(rate_profiles, &rc_thresholds, &quality_thresholds, + &bs_thresholds); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.cc new file mode 100644 index 0000000000..efb7502e5d --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h" + +#include <algorithm> +#include <cmath> +#include <iterator> +#include <limits> +#include <numeric> + +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/running_statistics.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { +namespace test { + +using FrameStatistics = VideoCodecTestStats::FrameStatistics; +using VideoStatistics = VideoCodecTestStats::VideoStatistics; + +namespace { +const int kMaxBitrateMismatchPercent = 20; +} + +VideoCodecTestStatsImpl::VideoCodecTestStatsImpl() = default; +VideoCodecTestStatsImpl::~VideoCodecTestStatsImpl() = default; + +void VideoCodecTestStatsImpl::AddFrame(const FrameStatistics& frame_stat) { + const size_t timestamp = frame_stat.rtp_timestamp; + const size_t layer_idx = frame_stat.spatial_idx; + RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) == + rtp_timestamp_to_frame_num_[layer_idx].end()); + rtp_timestamp_to_frame_num_[layer_idx][timestamp] = frame_stat.frame_number; + layer_stats_[layer_idx].push_back(frame_stat); +} + +FrameStatistics* VideoCodecTestStatsImpl::GetFrame(size_t frame_num, + size_t layer_idx) { + RTC_CHECK_LT(frame_num, layer_stats_[layer_idx].size()); + return &layer_stats_[layer_idx][frame_num]; +} + +FrameStatistics* VideoCodecTestStatsImpl::GetFrameWithTimestamp( + size_t timestamp, + size_t layer_idx) { + RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) != + rtp_timestamp_to_frame_num_[layer_idx].end()); + + return GetFrame(rtp_timestamp_to_frame_num_[layer_idx][timestamp], layer_idx); +} + +std::vector<FrameStatistics> VideoCodecTestStatsImpl::GetFrameStatistics() { + size_t capacity = 0; + for (const auto& layer_stat : layer_stats_) { + capacity += layer_stat.second.size(); + } + + std::vector<FrameStatistics> frame_statistics; + frame_statistics.reserve(capacity); + for (const auto& layer_stat : layer_stats_) { + std::copy(layer_stat.second.cbegin(), layer_stat.second.cend(), + std::back_inserter(frame_statistics)); + } + + return frame_statistics; +} + +std::vector<VideoStatistics> +VideoCodecTestStatsImpl::SliceAndCalcLayerVideoStatistic( + size_t first_frame_num, + size_t last_frame_num) { + std::vector<VideoStatistics> layer_stats; + + size_t num_spatial_layers = 0; + size_t num_temporal_layers = 0; + GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers, + &num_temporal_layers); + RTC_CHECK_GT(num_spatial_layers, 0); + RTC_CHECK_GT(num_temporal_layers, 0); + + for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers; + ++spatial_idx) { + for (size_t temporal_idx = 0; temporal_idx < num_temporal_layers; + ++temporal_idx) { + VideoStatistics layer_stat = SliceAndCalcVideoStatistic( + first_frame_num, last_frame_num, spatial_idx, temporal_idx, false); + layer_stats.push_back(layer_stat); + } + } + + return layer_stats; +} + +VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcAggregatedVideoStatistic( + size_t first_frame_num, + size_t last_frame_num) { + size_t num_spatial_layers = 0; + size_t num_temporal_layers = 0; + GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers, + &num_temporal_layers); + RTC_CHECK_GT(num_spatial_layers, 0); + RTC_CHECK_GT(num_temporal_layers, 0); + + return SliceAndCalcVideoStatistic(first_frame_num, last_frame_num, + num_spatial_layers - 1, + num_temporal_layers - 1, true); +} + +size_t VideoCodecTestStatsImpl::Size(size_t spatial_idx) { + return layer_stats_[spatial_idx].size(); +} + +void VideoCodecTestStatsImpl::Clear() { + layer_stats_.clear(); + rtp_timestamp_to_frame_num_.clear(); +} + +FrameStatistics VideoCodecTestStatsImpl::AggregateFrameStatistic( + size_t frame_num, + size_t spatial_idx, + bool aggregate_independent_layers) { + FrameStatistics frame_stat = *GetFrame(frame_num, spatial_idx); + bool inter_layer_predicted = frame_stat.inter_layer_predicted; + while (spatial_idx-- > 0) { + if (aggregate_independent_layers || inter_layer_predicted) { + FrameStatistics* base_frame_stat = GetFrame(frame_num, spatial_idx); + frame_stat.length_bytes += base_frame_stat->length_bytes; + frame_stat.target_bitrate_kbps += base_frame_stat->target_bitrate_kbps; + + inter_layer_predicted = base_frame_stat->inter_layer_predicted; + } + } + + return frame_stat; +} + +size_t VideoCodecTestStatsImpl::CalcLayerTargetBitrateKbps( + size_t first_frame_num, + size_t last_frame_num, + size_t spatial_idx, + size_t temporal_idx, + bool aggregate_independent_layers) { + size_t target_bitrate_kbps = 0; + + // We don't know if superframe includes all required spatial layers because + // of possible frame drops. Run through all frames in specified range, find + // and return maximum target bitrate. Assume that target bitrate in frame + // statistic is specified per temporal layer. + for (size_t frame_num = first_frame_num; frame_num <= last_frame_num; + ++frame_num) { + FrameStatistics superframe = AggregateFrameStatistic( + frame_num, spatial_idx, aggregate_independent_layers); + + if (superframe.temporal_idx <= temporal_idx) { + target_bitrate_kbps = + std::max(target_bitrate_kbps, superframe.target_bitrate_kbps); + } + } + + RTC_DCHECK_GT(target_bitrate_kbps, 0); + return target_bitrate_kbps; +} + +VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcVideoStatistic( + size_t first_frame_num, + size_t last_frame_num, + size_t spatial_idx, + size_t temporal_idx, + bool aggregate_independent_layers) { + VideoStatistics video_stat; + + float buffer_level_bits = 0.0f; + webrtc_impl::RunningStatistics<float> buffer_level_sec; + + webrtc_impl::RunningStatistics<size_t> key_frame_size_bytes; + webrtc_impl::RunningStatistics<size_t> delta_frame_size_bytes; + + webrtc_impl::RunningStatistics<size_t> frame_encoding_time_us; + webrtc_impl::RunningStatistics<size_t> frame_decoding_time_us; + + webrtc_impl::RunningStatistics<float> psnr_y; + webrtc_impl::RunningStatistics<float> psnr_u; + webrtc_impl::RunningStatistics<float> psnr_v; + webrtc_impl::RunningStatistics<float> psnr; + webrtc_impl::RunningStatistics<float> ssim; + webrtc_impl::RunningStatistics<int> qp; + + size_t rtp_timestamp_first_frame = 0; + size_t rtp_timestamp_prev_frame = 0; + + FrameStatistics last_successfully_decoded_frame(0, 0, 0); + + const size_t target_bitrate_kbps = + CalcLayerTargetBitrateKbps(first_frame_num, last_frame_num, spatial_idx, + temporal_idx, aggregate_independent_layers); + const size_t target_bitrate_bps = 1000 * target_bitrate_kbps; + RTC_CHECK_GT(target_bitrate_kbps, 0); // We divide by `target_bitrate_kbps`. + + for (size_t frame_num = first_frame_num; frame_num <= last_frame_num; + ++frame_num) { + FrameStatistics frame_stat = AggregateFrameStatistic( + frame_num, spatial_idx, aggregate_independent_layers); + + float time_since_first_frame_sec = + 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_first_frame) / + kVideoPayloadTypeFrequency; + float time_since_prev_frame_sec = + 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_prev_frame) / + kVideoPayloadTypeFrequency; + + if (frame_stat.temporal_idx > temporal_idx) { + continue; + } + + buffer_level_bits -= time_since_prev_frame_sec * 1000 * target_bitrate_kbps; + buffer_level_bits = std::max(0.0f, buffer_level_bits); + buffer_level_bits += 8.0 * frame_stat.length_bytes; + buffer_level_sec.AddSample(buffer_level_bits / + (1000 * target_bitrate_kbps)); + + video_stat.length_bytes += frame_stat.length_bytes; + + if (frame_stat.encoding_successful) { + ++video_stat.num_encoded_frames; + + if (frame_stat.frame_type == VideoFrameType::kVideoFrameKey) { + key_frame_size_bytes.AddSample(frame_stat.length_bytes); + ++video_stat.num_key_frames; + } else { + delta_frame_size_bytes.AddSample(frame_stat.length_bytes); + } + + frame_encoding_time_us.AddSample(frame_stat.encode_time_us); + qp.AddSample(frame_stat.qp); + + video_stat.max_nalu_size_bytes = std::max(video_stat.max_nalu_size_bytes, + frame_stat.max_nalu_size_bytes); + } + + if (frame_stat.decoding_successful) { + ++video_stat.num_decoded_frames; + + video_stat.width = std::max(video_stat.width, frame_stat.decoded_width); + video_stat.height = + std::max(video_stat.height, frame_stat.decoded_height); + + if (video_stat.num_decoded_frames > 1) { + if (last_successfully_decoded_frame.decoded_width != + frame_stat.decoded_width || + last_successfully_decoded_frame.decoded_height != + frame_stat.decoded_height) { + ++video_stat.num_spatial_resizes; + } + } + + frame_decoding_time_us.AddSample(frame_stat.decode_time_us); + last_successfully_decoded_frame = frame_stat; + } + + if (frame_stat.quality_analysis_successful) { + psnr_y.AddSample(frame_stat.psnr_y); + psnr_u.AddSample(frame_stat.psnr_u); + psnr_v.AddSample(frame_stat.psnr_v); + psnr.AddSample(frame_stat.psnr); + ssim.AddSample(frame_stat.ssim); + } + + if (video_stat.num_input_frames > 0) { + if (video_stat.time_to_reach_target_bitrate_sec == 0.0f) { + RTC_CHECK_GT(time_since_first_frame_sec, 0); + const float curr_kbps = + 8.0 * video_stat.length_bytes / 1000 / time_since_first_frame_sec; + const float bitrate_mismatch_percent = + 100 * std::fabs(curr_kbps - target_bitrate_kbps) / + target_bitrate_kbps; + if (bitrate_mismatch_percent < kMaxBitrateMismatchPercent) { + video_stat.time_to_reach_target_bitrate_sec = + time_since_first_frame_sec; + } + } + } + + rtp_timestamp_prev_frame = frame_stat.rtp_timestamp; + if (video_stat.num_input_frames == 0) { + rtp_timestamp_first_frame = frame_stat.rtp_timestamp; + } + + ++video_stat.num_input_frames; + } + + const size_t num_frames = last_frame_num - first_frame_num + 1; + const size_t timestamp_delta = + GetFrame(first_frame_num + 1, spatial_idx)->rtp_timestamp - + GetFrame(first_frame_num, spatial_idx)->rtp_timestamp; + RTC_CHECK_GT(timestamp_delta, 0); + const float input_framerate_fps = + 1.0 * kVideoPayloadTypeFrequency / timestamp_delta; + RTC_CHECK_GT(input_framerate_fps, 0); + const float duration_sec = num_frames / input_framerate_fps; + + video_stat.target_bitrate_kbps = target_bitrate_kbps; + video_stat.input_framerate_fps = input_framerate_fps; + + video_stat.spatial_idx = spatial_idx; + video_stat.temporal_idx = temporal_idx; + + RTC_CHECK_GT(duration_sec, 0); + const float bitrate_bps = 8 * video_stat.length_bytes / duration_sec; + video_stat.bitrate_kbps = static_cast<size_t>((bitrate_bps + 500) / 1000); + video_stat.framerate_fps = video_stat.num_encoded_frames / duration_sec; + + // http://bugs.webrtc.org/10400: On Windows, we only get millisecond + // granularity in the frame encode/decode timing measurements. + // So we need to softly avoid a div-by-zero here. + const float mean_encode_time_us = + frame_encoding_time_us.GetMean().value_or(0); + video_stat.enc_speed_fps = mean_encode_time_us > 0.0f + ? 1000000.0f / mean_encode_time_us + : std::numeric_limits<float>::max(); + const float mean_decode_time_us = + frame_decoding_time_us.GetMean().value_or(0); + video_stat.dec_speed_fps = mean_decode_time_us > 0.0f + ? 1000000.0f / mean_decode_time_us + : std::numeric_limits<float>::max(); + + video_stat.avg_encode_latency_sec = + frame_encoding_time_us.GetMean().value_or(0) / 1000000.0f; + video_stat.max_encode_latency_sec = + frame_encoding_time_us.GetMax().value_or(0) / 1000000.0f; + + video_stat.avg_decode_latency_sec = + frame_decoding_time_us.GetMean().value_or(0) / 1000000.0f; + video_stat.max_decode_latency_sec = + frame_decoding_time_us.GetMax().value_or(0) / 1000000.0f; + + auto MaxDelaySec = [target_bitrate_kbps]( + const webrtc_impl::RunningStatistics<size_t>& stats) { + return 8 * stats.GetMax().value_or(0) / 1000 / target_bitrate_kbps; + }; + + video_stat.avg_delay_sec = buffer_level_sec.GetMean().value_or(0); + video_stat.max_key_frame_delay_sec = MaxDelaySec(key_frame_size_bytes); + video_stat.max_delta_frame_delay_sec = MaxDelaySec(delta_frame_size_bytes); + + video_stat.avg_bitrate_mismatch_pct = + 100 * (bitrate_bps - target_bitrate_bps) / target_bitrate_bps; + video_stat.avg_framerate_mismatch_pct = + 100 * (video_stat.framerate_fps - input_framerate_fps) / + input_framerate_fps; + + video_stat.avg_key_frame_size_bytes = + key_frame_size_bytes.GetMean().value_or(0); + video_stat.avg_delta_frame_size_bytes = + delta_frame_size_bytes.GetMean().value_or(0); + video_stat.avg_qp = qp.GetMean().value_or(0); + + video_stat.avg_psnr_y = psnr_y.GetMean().value_or(0); + video_stat.avg_psnr_u = psnr_u.GetMean().value_or(0); + video_stat.avg_psnr_v = psnr_v.GetMean().value_or(0); + video_stat.avg_psnr = psnr.GetMean().value_or(0); + video_stat.min_psnr = + psnr.GetMin().value_or(std::numeric_limits<float>::max()); + video_stat.avg_ssim = ssim.GetMean().value_or(0); + video_stat.min_ssim = + ssim.GetMin().value_or(std::numeric_limits<float>::max()); + + return video_stat; +} + +void VideoCodecTestStatsImpl::GetNumberOfEncodedLayers( + size_t first_frame_num, + size_t last_frame_num, + size_t* num_encoded_spatial_layers, + size_t* num_encoded_temporal_layers) { + *num_encoded_spatial_layers = 0; + *num_encoded_temporal_layers = 0; + + const size_t num_spatial_layers = layer_stats_.size(); + + for (size_t frame_num = first_frame_num; frame_num <= last_frame_num; + ++frame_num) { + for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers; + ++spatial_idx) { + FrameStatistics* frame_stat = GetFrame(frame_num, spatial_idx); + if (frame_stat->encoding_successful) { + *num_encoded_spatial_layers = + std::max(*num_encoded_spatial_layers, frame_stat->spatial_idx + 1); + *num_encoded_temporal_layers = std::max(*num_encoded_temporal_layers, + frame_stat->temporal_idx + 1); + } + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.h b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.h new file mode 100644 index 0000000000..61850d3622 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_VIDEOCODEC_TEST_STATS_IMPL_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_VIDEOCODEC_TEST_STATS_IMPL_H_ + +#include <stddef.h> + +#include <map> +#include <string> +#include <vector> + +#include "api/test/videocodec_test_stats.h" // NOLINT(build/include) + +namespace webrtc { +namespace test { + +// Statistics for a sequence of processed frames. This class is not thread safe. +class VideoCodecTestStatsImpl : public VideoCodecTestStats { + public: + VideoCodecTestStatsImpl(); + ~VideoCodecTestStatsImpl() override; + + // Creates a FrameStatistics for the next frame to be processed. + void AddFrame(const FrameStatistics& frame_stat); + + // Returns the FrameStatistics corresponding to `frame_number` or `timestamp`. + FrameStatistics* GetFrame(size_t frame_number, size_t spatial_idx); + FrameStatistics* GetFrameWithTimestamp(size_t timestamp, size_t spatial_idx); + + // Implements VideoCodecTestStats. + std::vector<FrameStatistics> GetFrameStatistics() override; + std::vector<VideoStatistics> SliceAndCalcLayerVideoStatistic( + size_t first_frame_num, + size_t last_frame_num) override; + + VideoStatistics SliceAndCalcAggregatedVideoStatistic(size_t first_frame_num, + size_t last_frame_num); + + size_t Size(size_t spatial_idx); + + void Clear(); + + private: + VideoCodecTestStats::FrameStatistics AggregateFrameStatistic( + size_t frame_num, + size_t spatial_idx, + bool aggregate_independent_layers); + + size_t CalcLayerTargetBitrateKbps(size_t first_frame_num, + size_t last_frame_num, + size_t spatial_idx, + size_t temporal_idx, + bool aggregate_independent_layers); + + VideoCodecTestStats::VideoStatistics SliceAndCalcVideoStatistic( + size_t first_frame_num, + size_t last_frame_num, + size_t spatial_idx, + size_t temporal_idx, + bool aggregate_independent_layers); + + void GetNumberOfEncodedLayers(size_t first_frame_num, + size_t last_frame_num, + size_t* num_encoded_spatial_layers, + size_t* num_encoded_temporal_layers); + + // layer_idx -> stats. + std::map<size_t, std::vector<FrameStatistics>> layer_stats_; + // layer_idx -> rtp_timestamp -> frame_num. + std::map<size_t, std::map<size_t, size_t>> rtp_timestamp_to_frame_num_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_TEST_VIDEOCODEC_TEST_STATS_IMPL_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl_unittest.cc new file mode 100644 index 0000000000..6477b6ab8c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_stats_impl_unittest.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h" + +#include <vector> + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +using FrameStatistics = VideoCodecTestStatsImpl::FrameStatistics; + +namespace { + +const size_t kTimestamp = 12345; + +using ::testing::AllOf; +using ::testing::Contains; +using ::testing::Field; + +} // namespace + +TEST(StatsTest, AddAndGetFrame) { + VideoCodecTestStatsImpl stats; + stats.AddFrame(FrameStatistics(0, kTimestamp, 0)); + FrameStatistics* frame_stat = stats.GetFrame(0u, 0); + EXPECT_EQ(0u, frame_stat->frame_number); + EXPECT_EQ(kTimestamp, frame_stat->rtp_timestamp); +} + +TEST(StatsTest, AddAndGetFrames) { + VideoCodecTestStatsImpl stats; + const size_t kNumFrames = 1000; + for (size_t i = 0; i < kNumFrames; ++i) { + stats.AddFrame(FrameStatistics(i, kTimestamp + i, 0)); + FrameStatistics* frame_stat = stats.GetFrame(i, 0); + EXPECT_EQ(i, frame_stat->frame_number); + EXPECT_EQ(kTimestamp + i, frame_stat->rtp_timestamp); + } + EXPECT_EQ(kNumFrames, stats.Size(0)); + // Get frame. + size_t i = 22; + FrameStatistics* frame_stat = stats.GetFrameWithTimestamp(kTimestamp + i, 0); + EXPECT_EQ(i, frame_stat->frame_number); + EXPECT_EQ(kTimestamp + i, frame_stat->rtp_timestamp); +} + +TEST(StatsTest, AddFrameLayering) { + VideoCodecTestStatsImpl stats; + for (size_t i = 0; i < 3; ++i) { + stats.AddFrame(FrameStatistics(0, kTimestamp + i, i)); + FrameStatistics* frame_stat = stats.GetFrame(0u, i); + EXPECT_EQ(0u, frame_stat->frame_number); + EXPECT_EQ(kTimestamp, frame_stat->rtp_timestamp - i); + EXPECT_EQ(1u, stats.Size(i)); + } +} + +TEST(StatsTest, GetFrameStatistics) { + VideoCodecTestStatsImpl stats; + + stats.AddFrame(FrameStatistics(0, kTimestamp, 0)); + stats.AddFrame(FrameStatistics(0, kTimestamp, 1)); + stats.AddFrame(FrameStatistics(1, kTimestamp + 3000, 0)); + stats.AddFrame(FrameStatistics(1, kTimestamp + 3000, 1)); + + const std::vector<FrameStatistics> frame_stats = stats.GetFrameStatistics(); + + auto field_matcher = [](size_t frame_number, size_t spatial_idx) { + return AllOf(Field(&FrameStatistics::frame_number, frame_number), + Field(&FrameStatistics::spatial_idx, spatial_idx)); + }; + EXPECT_THAT(frame_stats, Contains(field_matcher(0, 0))); + EXPECT_THAT(frame_stats, Contains(field_matcher(0, 1))); + EXPECT_THAT(frame_stats, Contains(field_matcher(1, 0))); + EXPECT_THAT(frame_stats, Contains(field_matcher(1, 1))); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_videotoolbox.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_videotoolbox.cc new file mode 100644 index 0000000000..6df974362f --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videocodec_test_videotoolbox.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <vector> + +#include "api/test/create_videocodec_test_fixture.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/test/objc_codec_factory_helper.h" +#include "modules/video_coding/codecs/test/videocodec_test_fixture_impl.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +namespace { +const int kForemanNumFrames = 300; + +VideoCodecTestFixture::Config CreateConfig() { + VideoCodecTestFixture::Config config; + config.filename = "foreman_cif"; + config.filepath = ResourcePath(config.filename, "yuv"); + config.num_frames = kForemanNumFrames; + return config; +} + +std::unique_ptr<VideoCodecTestFixture> CreateTestFixtureWithConfig( + VideoCodecTestFixture::Config config) { + auto decoder_factory = CreateObjCDecoderFactory(); + auto encoder_factory = CreateObjCEncoderFactory(); + return CreateVideoCodecTestFixture(config, std::move(decoder_factory), + std::move(encoder_factory)); +} +} // namespace + +// TODO(webrtc:9099): Disabled until the issue is fixed. +// HW codecs don't work on simulators. Only run these tests on device. +// #if TARGET_OS_IPHONE && !TARGET_IPHONE_SIMULATOR +// #define MAYBE_TEST TEST +// #else +#define MAYBE_TEST(s, name) TEST(s, DISABLED_##name) +// #endif + +// TODO(kthelgason): Use RC Thresholds when the internal bitrateAdjuster is no +// longer in use. +MAYBE_TEST(VideoCodecTestVideoToolbox, ForemanCif500kbpsH264CBP) { + const auto frame_checker = + std::make_unique<VideoCodecTestFixtureImpl::H264KeyframeChecker>(); + auto config = CreateConfig(); + config.SetCodecSettings(cricket::kH264CodecName, 1, 1, 1, false, false, false, + 352, 288); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateTestFixtureWithConfig(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<QualityThresholds> quality_thresholds = {{33, 29, 0.9, 0.82}}; + + fixture->RunTest(rate_profiles, nullptr, &quality_thresholds, nullptr); +} + +MAYBE_TEST(VideoCodecTestVideoToolbox, ForemanCif500kbpsH264CHP) { + const auto frame_checker = + std::make_unique<VideoCodecTestFixtureImpl::H264KeyframeChecker>(); + auto config = CreateConfig(); + config.h264_codec_settings.profile = H264Profile::kProfileConstrainedHigh; + config.SetCodecSettings(cricket::kH264CodecName, 1, 1, 1, false, false, false, + 352, 288); + config.encoded_frame_checker = frame_checker.get(); + auto fixture = CreateTestFixtureWithConfig(config); + + std::vector<RateProfile> rate_profiles = {{500, 30, 0}}; + + std::vector<QualityThresholds> quality_thresholds = {{33, 30, 0.91, 0.83}}; + + fixture->RunTest(rate_profiles, nullptr, &quality_thresholds, nullptr); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.cc new file mode 100644 index 0000000000..353a00df79 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.cc @@ -0,0 +1,700 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/videoprocessor.h" + +#include <string.h> + +#include <algorithm> +#include <cstddef> +#include <limits> +#include <memory> +#include <utility> + +#include "api/scoped_refptr.h" +#include "api/video/builtin_video_bitrate_allocator_factory.h" +#include "api/video/i420_buffer.h" +#include "api/video/video_bitrate_allocator_factory.h" +#include "api/video/video_frame_buffer.h" +#include "api/video/video_rotation.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_encoder.h" +#include "common_video/h264/h264_common.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/checks.h" +#include "rtc_base/time_utils.h" +#include "test/gtest.h" +#include "third_party/libyuv/include/libyuv/compare.h" +#include "third_party/libyuv/include/libyuv/scale.h" + +namespace webrtc { +namespace test { + +namespace { +const int kMsToRtpTimestamp = kVideoPayloadTypeFrequency / 1000; +const int kMaxBufferedInputFrames = 20; + +const VideoEncoder::Capabilities kCapabilities(false); + +size_t GetMaxNaluSizeBytes(const EncodedImage& encoded_frame, + const VideoCodecTestFixture::Config& config) { + if (config.codec_settings.codecType != kVideoCodecH264) + return 0; + + std::vector<webrtc::H264::NaluIndex> nalu_indices = + webrtc::H264::FindNaluIndices(encoded_frame.data(), encoded_frame.size()); + + RTC_CHECK(!nalu_indices.empty()); + + size_t max_size = 0; + for (const webrtc::H264::NaluIndex& index : nalu_indices) + max_size = std::max(max_size, index.payload_size); + + return max_size; +} + +size_t GetTemporalLayerIndex(const CodecSpecificInfo& codec_specific) { + size_t temporal_idx = 0; + if (codec_specific.codecType == kVideoCodecVP8) { + temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx; + } else if (codec_specific.codecType == kVideoCodecVP9) { + temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx; + } + if (temporal_idx == kNoTemporalIdx) { + temporal_idx = 0; + } + return temporal_idx; +} + +int GetElapsedTimeMicroseconds(int64_t start_ns, int64_t stop_ns) { + int64_t diff_us = (stop_ns - start_ns) / rtc::kNumNanosecsPerMicrosec; + RTC_DCHECK_GE(diff_us, std::numeric_limits<int>::min()); + RTC_DCHECK_LE(diff_us, std::numeric_limits<int>::max()); + return static_cast<int>(diff_us); +} + +void CalculateFrameQuality(const I420BufferInterface& ref_buffer, + const I420BufferInterface& dec_buffer, + VideoCodecTestStats::FrameStatistics* frame_stat, + bool calc_ssim) { + if (ref_buffer.width() != dec_buffer.width() || + ref_buffer.height() != dec_buffer.height()) { + RTC_CHECK_GE(ref_buffer.width(), dec_buffer.width()); + RTC_CHECK_GE(ref_buffer.height(), dec_buffer.height()); + // Downscale reference frame. + rtc::scoped_refptr<I420Buffer> scaled_buffer = + I420Buffer::Create(dec_buffer.width(), dec_buffer.height()); + I420Scale(ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(), + ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(), + ref_buffer.width(), ref_buffer.height(), + scaled_buffer->MutableDataY(), scaled_buffer->StrideY(), + scaled_buffer->MutableDataU(), scaled_buffer->StrideU(), + scaled_buffer->MutableDataV(), scaled_buffer->StrideV(), + scaled_buffer->width(), scaled_buffer->height(), + libyuv::kFilterBox); + + CalculateFrameQuality(*scaled_buffer, dec_buffer, frame_stat, calc_ssim); + } else { + const uint64_t sse_y = libyuv::ComputeSumSquareErrorPlane( + dec_buffer.DataY(), dec_buffer.StrideY(), ref_buffer.DataY(), + ref_buffer.StrideY(), dec_buffer.width(), dec_buffer.height()); + + const uint64_t sse_u = libyuv::ComputeSumSquareErrorPlane( + dec_buffer.DataU(), dec_buffer.StrideU(), ref_buffer.DataU(), + ref_buffer.StrideU(), dec_buffer.width() / 2, dec_buffer.height() / 2); + + const uint64_t sse_v = libyuv::ComputeSumSquareErrorPlane( + dec_buffer.DataV(), dec_buffer.StrideV(), ref_buffer.DataV(), + ref_buffer.StrideV(), dec_buffer.width() / 2, dec_buffer.height() / 2); + + const size_t num_y_samples = dec_buffer.width() * dec_buffer.height(); + const size_t num_u_samples = + dec_buffer.width() / 2 * dec_buffer.height() / 2; + + frame_stat->psnr_y = libyuv::SumSquareErrorToPsnr(sse_y, num_y_samples); + frame_stat->psnr_u = libyuv::SumSquareErrorToPsnr(sse_u, num_u_samples); + frame_stat->psnr_v = libyuv::SumSquareErrorToPsnr(sse_v, num_u_samples); + frame_stat->psnr = libyuv::SumSquareErrorToPsnr( + sse_y + sse_u + sse_v, num_y_samples + 2 * num_u_samples); + + if (calc_ssim) { + frame_stat->ssim = I420SSIM(ref_buffer, dec_buffer); + } + } +} + +} // namespace + +VideoProcessor::VideoProcessor(webrtc::VideoEncoder* encoder, + VideoDecoderList* decoders, + FrameReader* input_frame_reader, + const VideoCodecTestFixture::Config& config, + VideoCodecTestStatsImpl* stats, + IvfFileWriterMap* encoded_frame_writers, + FrameWriterList* decoded_frame_writers) + : config_(config), + num_simulcast_or_spatial_layers_( + std::max(config_.NumberOfSimulcastStreams(), + config_.NumberOfSpatialLayers())), + analyze_frame_quality_(!config_.measure_cpu), + stats_(stats), + encoder_(encoder), + decoders_(decoders), + bitrate_allocator_( + CreateBuiltinVideoBitrateAllocatorFactory() + ->CreateVideoBitrateAllocator(config_.codec_settings)), + framerate_fps_(0), + encode_callback_(this), + input_frame_reader_(input_frame_reader), + merged_encoded_frames_(num_simulcast_or_spatial_layers_), + encoded_frame_writers_(encoded_frame_writers), + decoded_frame_writers_(decoded_frame_writers), + last_inputed_frame_num_(0), + last_inputed_timestamp_(0), + first_encoded_frame_(num_simulcast_or_spatial_layers_, true), + last_encoded_frame_num_(num_simulcast_or_spatial_layers_), + first_decoded_frame_(num_simulcast_or_spatial_layers_, true), + last_decoded_frame_num_(num_simulcast_or_spatial_layers_), + last_decoded_frame_buffer_(num_simulcast_or_spatial_layers_), + post_encode_time_ns_(0), + is_finalized_(false) { + // Sanity checks. + RTC_CHECK(TaskQueueBase::Current()) + << "VideoProcessor must be run on a task queue."; + RTC_CHECK(stats_); + RTC_CHECK(encoder_); + RTC_CHECK(decoders_); + RTC_CHECK_EQ(decoders_->size(), num_simulcast_or_spatial_layers_); + RTC_CHECK(input_frame_reader_); + RTC_CHECK(encoded_frame_writers_); + RTC_CHECK(!decoded_frame_writers || + decoded_frame_writers->size() == num_simulcast_or_spatial_layers_); + + // Setup required callbacks for the encoder and decoder and initialize them. + RTC_CHECK_EQ(encoder_->RegisterEncodeCompleteCallback(&encode_callback_), + WEBRTC_VIDEO_CODEC_OK); + + // Initialize codecs so that they are ready to receive frames. + RTC_CHECK_EQ(encoder_->InitEncode( + &config_.codec_settings, + VideoEncoder::Settings( + kCapabilities, static_cast<int>(config_.NumberOfCores()), + config_.max_payload_size_bytes)), + WEBRTC_VIDEO_CODEC_OK); + + for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { + decode_callback_.push_back( + std::make_unique<VideoProcessorDecodeCompleteCallback>(this, i)); + VideoDecoder::Settings decoder_settings; + decoder_settings.set_max_render_resolution( + {config_.codec_settings.width, config_.codec_settings.height}); + decoder_settings.set_codec_type(config_.codec_settings.codecType); + decoder_settings.set_number_of_cores(config_.NumberOfCores()); + RTC_CHECK(decoders_->at(i)->Configure(decoder_settings)); + RTC_CHECK_EQ(decoders_->at(i)->RegisterDecodeCompleteCallback( + decode_callback_.at(i).get()), + WEBRTC_VIDEO_CODEC_OK); + } +} + +VideoProcessor::~VideoProcessor() { + RTC_DCHECK_RUN_ON(&sequence_checker_); + + if (!is_finalized_) { + Finalize(); + } + + // Explicitly reset codecs, in case they don't do that themselves when they + // go out of scope. + RTC_CHECK_EQ(encoder_->Release(), WEBRTC_VIDEO_CODEC_OK); + encoder_->RegisterEncodeCompleteCallback(nullptr); + for (auto& decoder : *decoders_) { + RTC_CHECK_EQ(decoder->Release(), WEBRTC_VIDEO_CODEC_OK); + decoder->RegisterDecodeCompleteCallback(nullptr); + } + + // Sanity check. + RTC_CHECK_LE(input_frames_.size(), kMaxBufferedInputFrames); +} + +void VideoProcessor::ProcessFrame() { + RTC_DCHECK_RUN_ON(&sequence_checker_); + RTC_DCHECK(!is_finalized_); + + const size_t frame_number = last_inputed_frame_num_++; + + // Get input frame and store for future quality calculation. + rtc::scoped_refptr<I420BufferInterface> buffer = + input_frame_reader_->ReadFrame(); + RTC_CHECK(buffer) << "Tried to read too many frames from the file."; + const size_t timestamp = + last_inputed_timestamp_ + + static_cast<size_t>(kVideoPayloadTypeFrequency / framerate_fps_); + VideoFrame input_frame = + VideoFrame::Builder() + .set_video_frame_buffer(buffer) + .set_timestamp_rtp(static_cast<uint32_t>(timestamp)) + .set_timestamp_ms(static_cast<int64_t>(timestamp / kMsToRtpTimestamp)) + .set_rotation(webrtc::kVideoRotation_0) + .build(); + // Store input frame as a reference for quality calculations. + if (config_.decode && !config_.measure_cpu) { + if (input_frames_.size() == kMaxBufferedInputFrames) { + input_frames_.erase(input_frames_.begin()); + } + + if (config_.reference_width != -1 && config_.reference_height != -1 && + (input_frame.width() != config_.reference_width || + input_frame.height() != config_.reference_height)) { + rtc::scoped_refptr<I420Buffer> scaled_buffer = I420Buffer::Create( + config_.codec_settings.width, config_.codec_settings.height); + scaled_buffer->ScaleFrom(*input_frame.video_frame_buffer()->ToI420()); + + VideoFrame scaled_reference_frame = input_frame; + scaled_reference_frame.set_video_frame_buffer(scaled_buffer); + input_frames_.emplace(frame_number, scaled_reference_frame); + + if (config_.reference_width == config_.codec_settings.width && + config_.reference_height == config_.codec_settings.height) { + // Both encoding and comparison uses the same down-scale factor, reuse + // it for encoder below. + input_frame = scaled_reference_frame; + } + } else { + input_frames_.emplace(frame_number, input_frame); + } + } + last_inputed_timestamp_ = timestamp; + + post_encode_time_ns_ = 0; + + // Create frame statistics object for all simulcast/spatial layers. + for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { + FrameStatistics frame_stat(frame_number, timestamp, i); + stats_->AddFrame(frame_stat); + } + + // For the highest measurement accuracy of the encode time, the start/stop + // time recordings should wrap the Encode call as tightly as possible. + const int64_t encode_start_ns = rtc::TimeNanos(); + for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { + FrameStatistics* frame_stat = stats_->GetFrame(frame_number, i); + frame_stat->encode_start_ns = encode_start_ns; + } + + if (input_frame.width() != config_.codec_settings.width || + input_frame.height() != config_.codec_settings.height) { + rtc::scoped_refptr<I420Buffer> scaled_buffer = I420Buffer::Create( + config_.codec_settings.width, config_.codec_settings.height); + scaled_buffer->ScaleFrom(*input_frame.video_frame_buffer()->ToI420()); + input_frame.set_video_frame_buffer(scaled_buffer); + } + + // Encode. + const std::vector<VideoFrameType> frame_types = + (frame_number == 0) + ? std::vector<VideoFrameType>{VideoFrameType::kVideoFrameKey} + : std::vector<VideoFrameType>{VideoFrameType::kVideoFrameDelta}; + const int encode_return_code = encoder_->Encode(input_frame, &frame_types); + for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { + FrameStatistics* frame_stat = stats_->GetFrame(frame_number, i); + frame_stat->encode_return_code = encode_return_code; + } +} + +void VideoProcessor::SetRates(size_t bitrate_kbps, double framerate_fps) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + RTC_DCHECK(!is_finalized_); + + framerate_fps_ = framerate_fps; + bitrate_allocation_ = + bitrate_allocator_->Allocate(VideoBitrateAllocationParameters( + static_cast<uint32_t>(bitrate_kbps * 1000), framerate_fps_)); + encoder_->SetRates( + VideoEncoder::RateControlParameters(bitrate_allocation_, framerate_fps_)); +} + +int32_t VideoProcessor::VideoProcessorDecodeCompleteCallback::Decoded( + VideoFrame& image) { + // Post the callback to the right task queue, if needed. + if (!task_queue_->IsCurrent()) { + // There might be a limited amount of output buffers, make a copy to make + // sure we don't block the decoder. + VideoFrame copy = VideoFrame::Builder() + .set_video_frame_buffer(I420Buffer::Copy( + *image.video_frame_buffer()->ToI420())) + .set_rotation(image.rotation()) + .set_timestamp_us(image.timestamp_us()) + .set_id(image.id()) + .build(); + copy.set_timestamp(image.timestamp()); + + task_queue_->PostTask([this, copy]() { + video_processor_->FrameDecoded(copy, simulcast_svc_idx_); + }); + return 0; + } + video_processor_->FrameDecoded(image, simulcast_svc_idx_); + return 0; +} + +void VideoProcessor::FrameEncoded( + const webrtc::EncodedImage& encoded_image, + const webrtc::CodecSpecificInfo& codec_specific) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + + // For the highest measurement accuracy of the encode time, the start/stop + // time recordings should wrap the Encode call as tightly as possible. + const int64_t encode_stop_ns = rtc::TimeNanos(); + + const VideoCodecType codec_type = codec_specific.codecType; + if (config_.encoded_frame_checker) { + config_.encoded_frame_checker->CheckEncodedFrame(codec_type, encoded_image); + } + + // Layer metadata. + size_t spatial_idx = encoded_image.SpatialIndex().value_or(0); + size_t temporal_idx = GetTemporalLayerIndex(codec_specific); + + FrameStatistics* frame_stat = + stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx); + const size_t frame_number = frame_stat->frame_number; + + // Ensure that the encode order is monotonically increasing, within this + // simulcast/spatial layer. + RTC_CHECK(first_encoded_frame_[spatial_idx] || + last_encoded_frame_num_[spatial_idx] < frame_number); + + // Ensure SVC spatial layers are delivered in ascending order. + const size_t num_spatial_layers = config_.NumberOfSpatialLayers(); + if (!first_encoded_frame_[spatial_idx] && num_spatial_layers > 1) { + for (size_t i = 0; i < spatial_idx; ++i) { + RTC_CHECK_LE(last_encoded_frame_num_[i], frame_number); + } + for (size_t i = spatial_idx + 1; i < num_simulcast_or_spatial_layers_; + ++i) { + RTC_CHECK_GT(frame_number, last_encoded_frame_num_[i]); + } + } + first_encoded_frame_[spatial_idx] = false; + last_encoded_frame_num_[spatial_idx] = frame_number; + + // Update frame statistics. + frame_stat->encoding_successful = true; + frame_stat->encode_time_us = GetElapsedTimeMicroseconds( + frame_stat->encode_start_ns, encode_stop_ns - post_encode_time_ns_); + frame_stat->target_bitrate_kbps = + bitrate_allocation_.GetTemporalLayerSum(spatial_idx, temporal_idx) / 1000; + frame_stat->target_framerate_fps = framerate_fps_; + frame_stat->length_bytes = encoded_image.size(); + frame_stat->frame_type = encoded_image._frameType; + frame_stat->temporal_idx = temporal_idx; + frame_stat->max_nalu_size_bytes = GetMaxNaluSizeBytes(encoded_image, config_); + frame_stat->qp = encoded_image.qp_; + + if (codec_type == kVideoCodecVP9) { + const CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9; + frame_stat->inter_layer_predicted = vp9_info.inter_layer_predicted; + frame_stat->non_ref_for_inter_layer_pred = + vp9_info.non_ref_for_inter_layer_pred; + } else { + frame_stat->inter_layer_predicted = false; + frame_stat->non_ref_for_inter_layer_pred = true; + } + + const webrtc::EncodedImage* encoded_image_for_decode = &encoded_image; + if (config_.decode || !encoded_frame_writers_->empty()) { + if (num_spatial_layers > 1) { + encoded_image_for_decode = BuildAndStoreSuperframe( + encoded_image, codec_type, frame_number, spatial_idx, + frame_stat->inter_layer_predicted); + } + } + + if (config_.decode) { + DecodeFrame(*encoded_image_for_decode, spatial_idx); + + if (codec_specific.end_of_picture && num_spatial_layers > 1) { + // If inter-layer prediction is enabled and upper layer was dropped then + // base layer should be passed to upper layer decoder. Otherwise decoder + // won't be able to decode next superframe. + const EncodedImage* base_image = nullptr; + const FrameStatistics* base_stat = nullptr; + for (size_t i = 0; i < num_spatial_layers; ++i) { + const bool layer_dropped = (first_decoded_frame_[i] || + last_decoded_frame_num_[i] < frame_number); + + // Ensure current layer was decoded. + RTC_CHECK(layer_dropped == false || i != spatial_idx); + + if (!layer_dropped) { + base_image = &merged_encoded_frames_[i]; + base_stat = + stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), i); + } else if (base_image && !base_stat->non_ref_for_inter_layer_pred) { + DecodeFrame(*base_image, i); + } + } + } + } else { + frame_stat->decode_return_code = WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + + // Since frames in higher TLs typically depend on frames in lower TLs, + // write out frames in lower TLs to bitstream dumps of higher TLs. + for (size_t write_temporal_idx = temporal_idx; + write_temporal_idx < config_.NumberOfTemporalLayers(); + ++write_temporal_idx) { + const VideoProcessor::LayerKey layer_key(spatial_idx, write_temporal_idx); + auto it = encoded_frame_writers_->find(layer_key); + if (it != encoded_frame_writers_->cend()) { + RTC_CHECK(it->second->WriteFrame(*encoded_image_for_decode, + config_.codec_settings.codecType)); + } + } + + if (!config_.encode_in_real_time) { + // To get pure encode time for next layers, measure time spent in encode + // callback and subtract it from encode time of next layers. + post_encode_time_ns_ += rtc::TimeNanos() - encode_stop_ns; + } +} + +void VideoProcessor::CalcFrameQuality(const I420BufferInterface& decoded_frame, + FrameStatistics* frame_stat) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + + const auto reference_frame = input_frames_.find(frame_stat->frame_number); + RTC_CHECK(reference_frame != input_frames_.cend()) + << "The codecs are either buffering too much, dropping too much, or " + "being too slow relative to the input frame rate."; + + // SSIM calculation is not optimized. Skip it in real-time mode. + const bool calc_ssim = !config_.encode_in_real_time; + CalculateFrameQuality(*reference_frame->second.video_frame_buffer()->ToI420(), + decoded_frame, frame_stat, calc_ssim); + + frame_stat->quality_analysis_successful = true; +} + +void VideoProcessor::WriteDecodedFrame(const I420BufferInterface& decoded_frame, + FrameWriter& frame_writer) { + int input_video_width = config_.codec_settings.width; + int input_video_height = config_.codec_settings.height; + + rtc::scoped_refptr<I420Buffer> scaled_buffer; + const I420BufferInterface* scaled_frame; + + if (decoded_frame.width() == input_video_width && + decoded_frame.height() == input_video_height) { + scaled_frame = &decoded_frame; + } else { + EXPECT_DOUBLE_EQ( + static_cast<double>(input_video_width) / input_video_height, + static_cast<double>(decoded_frame.width()) / decoded_frame.height()); + + scaled_buffer = I420Buffer::Create(input_video_width, input_video_height); + scaled_buffer->ScaleFrom(decoded_frame); + + scaled_frame = scaled_buffer.get(); + } + + // Ensure there is no padding. + RTC_CHECK_EQ(scaled_frame->StrideY(), input_video_width); + RTC_CHECK_EQ(scaled_frame->StrideU(), input_video_width / 2); + RTC_CHECK_EQ(scaled_frame->StrideV(), input_video_width / 2); + + RTC_CHECK_EQ(3 * input_video_width * input_video_height / 2, + frame_writer.FrameLength()); + + RTC_CHECK(frame_writer.WriteFrame(scaled_frame->DataY())); +} + +void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame, + size_t spatial_idx) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + + // For the highest measurement accuracy of the decode time, the start/stop + // time recordings should wrap the Decode call as tightly as possible. + const int64_t decode_stop_ns = rtc::TimeNanos(); + + FrameStatistics* frame_stat = + stats_->GetFrameWithTimestamp(decoded_frame.timestamp(), spatial_idx); + const size_t frame_number = frame_stat->frame_number; + + if (!first_decoded_frame_[spatial_idx]) { + for (size_t dropped_frame_number = last_decoded_frame_num_[spatial_idx] + 1; + dropped_frame_number < frame_number; ++dropped_frame_number) { + FrameStatistics* dropped_frame_stat = + stats_->GetFrame(dropped_frame_number, spatial_idx); + + if (analyze_frame_quality_ && config_.analyze_quality_of_dropped_frames) { + // Calculate frame quality comparing input frame with last decoded one. + CalcFrameQuality(*last_decoded_frame_buffer_[spatial_idx], + dropped_frame_stat); + } + + if (decoded_frame_writers_ != nullptr) { + // Fill drops with last decoded frame to make them look like freeze at + // playback and to keep decoded layers in sync. + WriteDecodedFrame(*last_decoded_frame_buffer_[spatial_idx], + *decoded_frame_writers_->at(spatial_idx)); + } + } + } + + // Ensure that the decode order is monotonically increasing, within this + // simulcast/spatial layer. + RTC_CHECK(first_decoded_frame_[spatial_idx] || + last_decoded_frame_num_[spatial_idx] < frame_number); + first_decoded_frame_[spatial_idx] = false; + last_decoded_frame_num_[spatial_idx] = frame_number; + + // Update frame statistics. + frame_stat->decoding_successful = true; + frame_stat->decode_time_us = + GetElapsedTimeMicroseconds(frame_stat->decode_start_ns, decode_stop_ns); + frame_stat->decoded_width = decoded_frame.width(); + frame_stat->decoded_height = decoded_frame.height(); + + // Skip quality metrics calculation to not affect CPU usage. + if (analyze_frame_quality_ || decoded_frame_writers_) { + // Save last decoded frame to handle possible future drops. + rtc::scoped_refptr<I420BufferInterface> i420buffer = + decoded_frame.video_frame_buffer()->ToI420(); + + // Copy decoded frame to a buffer without padding/stride such that we can + // dump Y, U and V planes into a file in one shot. + last_decoded_frame_buffer_[spatial_idx] = I420Buffer::Copy( + i420buffer->width(), i420buffer->height(), i420buffer->DataY(), + i420buffer->StrideY(), i420buffer->DataU(), i420buffer->StrideU(), + i420buffer->DataV(), i420buffer->StrideV()); + } + + if (analyze_frame_quality_) { + CalcFrameQuality(*decoded_frame.video_frame_buffer()->ToI420(), frame_stat); + } + + if (decoded_frame_writers_ != nullptr) { + WriteDecodedFrame(*last_decoded_frame_buffer_[spatial_idx], + *decoded_frame_writers_->at(spatial_idx)); + } + + // Erase all buffered input frames that we have moved past for all + // simulcast/spatial layers. Never buffer more than + // `kMaxBufferedInputFrames` frames, to protect against long runs of + // consecutive frame drops for a particular layer. + const auto min_last_decoded_frame_num = std::min_element( + last_decoded_frame_num_.cbegin(), last_decoded_frame_num_.cend()); + const size_t min_buffered_frame_num = + std::max(0, static_cast<int>(frame_number) - kMaxBufferedInputFrames + 1); + RTC_CHECK(min_last_decoded_frame_num != last_decoded_frame_num_.cend()); + const auto input_frames_erase_before = input_frames_.lower_bound( + std::max(*min_last_decoded_frame_num, min_buffered_frame_num)); + input_frames_.erase(input_frames_.cbegin(), input_frames_erase_before); +} + +void VideoProcessor::DecodeFrame(const EncodedImage& encoded_image, + size_t spatial_idx) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + FrameStatistics* frame_stat = + stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx); + + frame_stat->decode_start_ns = rtc::TimeNanos(); + frame_stat->decode_return_code = + decoders_->at(spatial_idx)->Decode(encoded_image, false, 0); +} + +const webrtc::EncodedImage* VideoProcessor::BuildAndStoreSuperframe( + const EncodedImage& encoded_image, + const VideoCodecType codec, + size_t frame_number, + size_t spatial_idx, + bool inter_layer_predicted) { + // Should only be called for SVC. + RTC_CHECK_GT(config_.NumberOfSpatialLayers(), 1); + + EncodedImage base_image; + RTC_CHECK_EQ(base_image.size(), 0); + + // Each SVC layer is decoded with dedicated decoder. Find the nearest + // non-dropped base frame and merge it and current frame into superframe. + if (inter_layer_predicted) { + for (int base_idx = static_cast<int>(spatial_idx) - 1; base_idx >= 0; + --base_idx) { + EncodedImage lower_layer = merged_encoded_frames_.at(base_idx); + if (lower_layer.Timestamp() == encoded_image.Timestamp()) { + base_image = lower_layer; + break; + } + } + } + const size_t payload_size_bytes = base_image.size() + encoded_image.size(); + + auto buffer = EncodedImageBuffer::Create(payload_size_bytes); + if (base_image.size()) { + RTC_CHECK(base_image.data()); + memcpy(buffer->data(), base_image.data(), base_image.size()); + } + memcpy(buffer->data() + base_image.size(), encoded_image.data(), + encoded_image.size()); + + EncodedImage copied_image = encoded_image; + copied_image.SetEncodedData(buffer); + if (base_image.size()) + copied_image._frameType = base_image._frameType; + + // Replace previous EncodedImage for this spatial layer. + merged_encoded_frames_.at(spatial_idx) = std::move(copied_image); + + return &merged_encoded_frames_.at(spatial_idx); +} + +void VideoProcessor::Finalize() { + RTC_DCHECK_RUN_ON(&sequence_checker_); + RTC_DCHECK(!is_finalized_); + is_finalized_ = true; + + if (!(analyze_frame_quality_ && config_.analyze_quality_of_dropped_frames) && + decoded_frame_writers_ == nullptr) { + return; + } + + for (size_t spatial_idx = 0; spatial_idx < num_simulcast_or_spatial_layers_; + ++spatial_idx) { + if (first_decoded_frame_[spatial_idx]) { + continue; // No decoded frames on this spatial layer. + } + + for (size_t dropped_frame_number = last_decoded_frame_num_[spatial_idx] + 1; + dropped_frame_number < last_inputed_frame_num_; + ++dropped_frame_number) { + FrameStatistics* frame_stat = + stats_->GetFrame(dropped_frame_number, spatial_idx); + + RTC_DCHECK(!frame_stat->decoding_successful); + + if (analyze_frame_quality_ && config_.analyze_quality_of_dropped_frames) { + CalcFrameQuality(*last_decoded_frame_buffer_[spatial_idx], frame_stat); + } + + if (decoded_frame_writers_ != nullptr) { + WriteDecodedFrame(*last_decoded_frame_buffer_[spatial_idx], + *decoded_frame_writers_->at(spatial_idx)); + } + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.h b/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.h new file mode 100644 index 0000000000..4c89c790a9 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_TEST_VIDEOPROCESSOR_H_ +#define MODULES_VIDEO_CODING_CODECS_TEST_VIDEOPROCESSOR_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <memory> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_base.h" +#include "api/test/videocodec_test_fixture.h" +#include "api/video/encoded_image.h" +#include "api/video/i420_buffer.h" +#include "api/video/video_bitrate_allocation.h" +#include "api/video/video_bitrate_allocator.h" +#include "api/video/video_frame.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/include/module_common_types.h" +#include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/utility/ivf_file_writer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/no_unique_address.h" +#include "rtc_base/thread_annotations.h" +#include "test/testsupport/frame_reader.h" +#include "test/testsupport/frame_writer.h" + +namespace webrtc { +namespace test { + +// Handles encoding/decoding of video using the VideoEncoder/VideoDecoder +// interfaces. This is done in a sequential manner in order to be able to +// measure times properly. +// The class processes a frame at the time for the configured input file. +// It maintains state of where in the source input file the processing is at. +class VideoProcessor { + public: + using VideoDecoderList = std::vector<std::unique_ptr<VideoDecoder>>; + using LayerKey = std::pair<int /* spatial_idx */, int /* temporal_idx */>; + using IvfFileWriterMap = std::map<LayerKey, std::unique_ptr<IvfFileWriter>>; + // TODO(brandtr): Consider changing FrameWriterList to be a FrameWriterMap, + // to be able to save different TLs separately. + using FrameWriterList = std::vector<std::unique_ptr<FrameWriter>>; + using FrameStatistics = VideoCodecTestStats::FrameStatistics; + + VideoProcessor(webrtc::VideoEncoder* encoder, + VideoDecoderList* decoders, + FrameReader* input_frame_reader, + const VideoCodecTestFixture::Config& config, + VideoCodecTestStatsImpl* stats, + IvfFileWriterMap* encoded_frame_writers, + FrameWriterList* decoded_frame_writers); + ~VideoProcessor(); + + VideoProcessor(const VideoProcessor&) = delete; + VideoProcessor& operator=(const VideoProcessor&) = delete; + + // Reads a frame and sends it to the encoder. When the encode callback + // is received, the encoded frame is buffered. After encoding is finished + // buffered frame is sent to decoder. Quality evaluation is done in + // the decode callback. + void ProcessFrame(); + + // Updates the encoder with target rates. Must be called at least once. + void SetRates(size_t bitrate_kbps, double framerate_fps); + + // Signals processor to finalize frame processing and handle possible tail + // drops. If not called expelicitly, this will be called in dtor. It is + // unexpected to get ProcessFrame() or SetRates() calls after Finalize(). + void Finalize(); + + private: + class VideoProcessorEncodeCompleteCallback + : public webrtc::EncodedImageCallback { + public: + explicit VideoProcessorEncodeCompleteCallback( + VideoProcessor* video_processor) + : video_processor_(video_processor), + task_queue_(TaskQueueBase::Current()) { + RTC_DCHECK(video_processor_); + RTC_DCHECK(task_queue_); + } + + Result OnEncodedImage( + const webrtc::EncodedImage& encoded_image, + const webrtc::CodecSpecificInfo* codec_specific_info) override { + RTC_CHECK(codec_specific_info); + + // Post the callback to the right task queue, if needed. + if (!task_queue_->IsCurrent()) { + VideoProcessor* video_processor = video_processor_; + task_queue_->PostTask([video_processor, encoded_image, + codec_specific_info = *codec_specific_info] { + video_processor->FrameEncoded(encoded_image, codec_specific_info); + }); + return Result(Result::OK, 0); + } + + video_processor_->FrameEncoded(encoded_image, *codec_specific_info); + return Result(Result::OK, 0); + } + + private: + VideoProcessor* const video_processor_; + TaskQueueBase* const task_queue_; + }; + + class VideoProcessorDecodeCompleteCallback + : public webrtc::DecodedImageCallback { + public: + explicit VideoProcessorDecodeCompleteCallback( + VideoProcessor* video_processor, + size_t simulcast_svc_idx) + : video_processor_(video_processor), + simulcast_svc_idx_(simulcast_svc_idx), + task_queue_(TaskQueueBase::Current()) { + RTC_DCHECK(video_processor_); + RTC_DCHECK(task_queue_); + } + + int32_t Decoded(webrtc::VideoFrame& image) override; + + int32_t Decoded(webrtc::VideoFrame& image, + int64_t decode_time_ms) override { + return Decoded(image); + } + + void Decoded(webrtc::VideoFrame& image, + absl::optional<int32_t> decode_time_ms, + absl::optional<uint8_t> qp) override { + Decoded(image); + } + + private: + VideoProcessor* const video_processor_; + const size_t simulcast_svc_idx_; + TaskQueueBase* const task_queue_; + }; + + // Invoked by the callback adapter when a frame has completed encoding. + void FrameEncoded(const webrtc::EncodedImage& encoded_image, + const webrtc::CodecSpecificInfo& codec_specific); + + // Invoked by the callback adapter when a frame has completed decoding. + void FrameDecoded(const webrtc::VideoFrame& image, size_t simulcast_svc_idx); + + void DecodeFrame(const EncodedImage& encoded_image, size_t simulcast_svc_idx); + + // In order to supply the SVC decoders with super frames containing all + // lower layer frames, we merge and store the layer frames in this method. + const webrtc::EncodedImage* BuildAndStoreSuperframe( + const EncodedImage& encoded_image, + VideoCodecType codec, + size_t frame_number, + size_t simulcast_svc_idx, + bool inter_layer_predicted) RTC_RUN_ON(sequence_checker_); + + void CalcFrameQuality(const I420BufferInterface& decoded_frame, + FrameStatistics* frame_stat); + + void WriteDecodedFrame(const I420BufferInterface& decoded_frame, + FrameWriter& frame_writer); + + void HandleTailDrops(); + + // Test config. + const VideoCodecTestFixture::Config config_; + const size_t num_simulcast_or_spatial_layers_; + const bool analyze_frame_quality_; + + // Frame statistics. + VideoCodecTestStatsImpl* const stats_; + + // Codecs. + webrtc::VideoEncoder* const encoder_; + VideoDecoderList* const decoders_; + const std::unique_ptr<VideoBitrateAllocator> bitrate_allocator_; + VideoBitrateAllocation bitrate_allocation_ RTC_GUARDED_BY(sequence_checker_); + double framerate_fps_ RTC_GUARDED_BY(sequence_checker_); + + // Adapters for the codec callbacks. + VideoProcessorEncodeCompleteCallback encode_callback_; + // Assign separate callback object to each decoder. This allows us to identify + // decoded layer in frame decode callback. + // simulcast_svc_idx -> decode callback. + std::vector<std::unique_ptr<VideoProcessorDecodeCompleteCallback>> + decode_callback_; + + // Each call to ProcessFrame() will read one frame from `input_frame_reader_`. + FrameReader* const input_frame_reader_; + + // Input frames are used as reference for frame quality evaluations. + // Async codecs might queue frames. To handle that we keep input frame + // and release it after corresponding coded frame is decoded and quality + // measurement is done. + // frame_number -> frame. + std::map<size_t, VideoFrame> input_frames_ RTC_GUARDED_BY(sequence_checker_); + + // Encoder delivers coded frame layer-by-layer. We store coded frames and + // then, after all layers are encoded, decode them. Such separation of + // frame processing on superframe level simplifies encoding/decoding time + // measurement. + // simulcast_svc_idx -> merged SVC encoded frame. + std::vector<EncodedImage> merged_encoded_frames_ + RTC_GUARDED_BY(sequence_checker_); + + // These (optional) file writers are used to persistently store the encoded + // and decoded bitstreams. Each frame writer is enabled by being non-null. + IvfFileWriterMap* const encoded_frame_writers_; + FrameWriterList* const decoded_frame_writers_; + + // Metadata for inputed/encoded/decoded frames. Used for frame identification, + // frame drop detection, etc. We assume that encoded/decoded frames are + // ordered within each simulcast/spatial layer, but we do not make any + // assumptions of frame ordering between layers. + size_t last_inputed_frame_num_ RTC_GUARDED_BY(sequence_checker_); + size_t last_inputed_timestamp_ RTC_GUARDED_BY(sequence_checker_); + // simulcast_svc_idx -> encode status. + std::vector<bool> first_encoded_frame_ RTC_GUARDED_BY(sequence_checker_); + // simulcast_svc_idx -> frame_number. + std::vector<size_t> last_encoded_frame_num_ RTC_GUARDED_BY(sequence_checker_); + // simulcast_svc_idx -> decode status. + std::vector<bool> first_decoded_frame_ RTC_GUARDED_BY(sequence_checker_); + // simulcast_svc_idx -> frame_number. + std::vector<size_t> last_decoded_frame_num_ RTC_GUARDED_BY(sequence_checker_); + // simulcast_svc_idx -> buffer. + std::vector<rtc::scoped_refptr<I420Buffer>> last_decoded_frame_buffer_ + RTC_GUARDED_BY(sequence_checker_); + + // Time spent in frame encode callback. It is accumulated for layers and + // reset when frame encode starts. When next layer is encoded post-encode time + // is substracted from measured encode time. Thus we get pure encode time. + int64_t post_encode_time_ns_ RTC_GUARDED_BY(sequence_checker_); + + // Indicates whether Finalize() was called or not. + bool is_finalized_ RTC_GUARDED_BY(sequence_checker_); + + // This class must be operated on a TaskQueue. + RTC_NO_UNIQUE_ADDRESS SequenceChecker sequence_checker_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_TEST_VIDEOPROCESSOR_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor_unittest.cc new file mode 100644 index 0000000000..6af775cece --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/test/videoprocessor_unittest.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/test/videoprocessor.h" + +#include <memory> + +#include "api/scoped_refptr.h" +#include "api/test/mock_video_decoder.h" +#include "api/test/mock_video_encoder.h" +#include "api/test/videocodec_test_fixture.h" +#include "api/video/i420_buffer.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/mock/mock_frame_reader.h" + +using ::testing::_; +using ::testing::AllOf; +using ::testing::Field; +using ::testing::Property; +using ::testing::ResultOf; +using ::testing::Return; + +namespace webrtc { +namespace test { + +namespace { + +const int kWidth = 352; +const int kHeight = 288; +const int kFrameSize = kWidth * kHeight * 3 / 2; // I420. + +} // namespace + +class VideoProcessorTest : public ::testing::Test { + protected: + VideoProcessorTest() : q_("VP queue") { + config_.SetCodecSettings(cricket::kVp8CodecName, 1, 1, 1, false, false, + false, kWidth, kHeight); + + decoder_mock_ = new MockVideoDecoder(); + decoders_.push_back(std::unique_ptr<VideoDecoder>(decoder_mock_)); + + ExpectInit(); + EXPECT_CALL(frame_reader_mock_, FrameLength()) + .WillRepeatedly(Return(kFrameSize)); + q_.SendTask( + [this] { + video_processor_ = std::make_unique<VideoProcessor>( + &encoder_mock_, &decoders_, &frame_reader_mock_, config_, &stats_, + &encoded_frame_writers_, /*decoded_frame_writers=*/nullptr); + }); + } + + ~VideoProcessorTest() { + q_.SendTask([this] { video_processor_.reset(); }); + } + + void ExpectInit() { + EXPECT_CALL(encoder_mock_, InitEncode(_, _)); + EXPECT_CALL(encoder_mock_, RegisterEncodeCompleteCallback); + EXPECT_CALL(*decoder_mock_, Configure); + EXPECT_CALL(*decoder_mock_, RegisterDecodeCompleteCallback); + } + + void ExpectRelease() { + EXPECT_CALL(encoder_mock_, Release()).Times(1); + EXPECT_CALL(encoder_mock_, RegisterEncodeCompleteCallback(_)).Times(1); + EXPECT_CALL(*decoder_mock_, Release()).Times(1); + EXPECT_CALL(*decoder_mock_, RegisterDecodeCompleteCallback(_)).Times(1); + } + + TaskQueueForTest q_; + + VideoCodecTestFixture::Config config_; + + MockVideoEncoder encoder_mock_; + MockVideoDecoder* decoder_mock_; + std::vector<std::unique_ptr<VideoDecoder>> decoders_; + MockFrameReader frame_reader_mock_; + VideoCodecTestStatsImpl stats_; + VideoProcessor::IvfFileWriterMap encoded_frame_writers_; + std::unique_ptr<VideoProcessor> video_processor_; +}; + +TEST_F(VideoProcessorTest, InitRelease) { + ExpectRelease(); +} + +TEST_F(VideoProcessorTest, ProcessFrames_FixedFramerate) { + const int kBitrateKbps = 456; + const int kFramerateFps = 31; + EXPECT_CALL( + encoder_mock_, + SetRates(Field(&VideoEncoder::RateControlParameters::framerate_fps, + static_cast<double>(kFramerateFps)))) + .Times(1); + q_.SendTask([=] { video_processor_->SetRates(kBitrateKbps, kFramerateFps); }); + + EXPECT_CALL(frame_reader_mock_, ReadFrame()) + .WillRepeatedly(Return(I420Buffer::Create(kWidth, kHeight))); + EXPECT_CALL( + encoder_mock_, + Encode(Property(&VideoFrame::timestamp, 1 * 90000 / kFramerateFps), _)) + .Times(1); + q_.SendTask([this] { video_processor_->ProcessFrame(); }); + + EXPECT_CALL( + encoder_mock_, + Encode(Property(&VideoFrame::timestamp, 2 * 90000 / kFramerateFps), _)) + .Times(1); + q_.SendTask([this] { video_processor_->ProcessFrame(); }); + + ExpectRelease(); +} + +TEST_F(VideoProcessorTest, ProcessFrames_VariableFramerate) { + const int kBitrateKbps = 456; + const int kStartFramerateFps = 27; + const int kStartTimestamp = 90000 / kStartFramerateFps; + EXPECT_CALL( + encoder_mock_, + SetRates(Field(&VideoEncoder::RateControlParameters::framerate_fps, + static_cast<double>(kStartFramerateFps)))) + .Times(1); + q_.SendTask( + [=] { video_processor_->SetRates(kBitrateKbps, kStartFramerateFps); }); + + EXPECT_CALL(frame_reader_mock_, ReadFrame()) + .WillRepeatedly(Return(I420Buffer::Create(kWidth, kHeight))); + EXPECT_CALL(encoder_mock_, + Encode(Property(&VideoFrame::timestamp, kStartTimestamp), _)) + .Times(1); + q_.SendTask([this] { video_processor_->ProcessFrame(); }); + + const int kNewFramerateFps = 13; + EXPECT_CALL( + encoder_mock_, + SetRates(Field(&VideoEncoder::RateControlParameters::framerate_fps, + static_cast<double>(kNewFramerateFps)))) + .Times(1); + q_.SendTask( + [=] { video_processor_->SetRates(kBitrateKbps, kNewFramerateFps); }); + + EXPECT_CALL(encoder_mock_, + Encode(Property(&VideoFrame::timestamp, + kStartTimestamp + 90000 / kNewFramerateFps), + _)) + .Times(1); + q_.SendTask([this] { video_processor_->ProcessFrame(); }); + + ExpectRelease(); +} + +TEST_F(VideoProcessorTest, SetRates) { + const uint32_t kBitrateKbps = 123; + const int kFramerateFps = 17; + + EXPECT_CALL( + encoder_mock_, + SetRates(AllOf(ResultOf( + [](const VideoEncoder::RateControlParameters& params) { + return params.bitrate.get_sum_kbps(); + }, + kBitrateKbps), + Field(&VideoEncoder::RateControlParameters::framerate_fps, + static_cast<double>(kFramerateFps))))) + .Times(1); + q_.SendTask([=] { video_processor_->SetRates(kBitrateKbps, kFramerateFps); }); + + const uint32_t kNewBitrateKbps = 456; + const int kNewFramerateFps = 34; + EXPECT_CALL( + encoder_mock_, + SetRates(AllOf(ResultOf( + [](const VideoEncoder::RateControlParameters& params) { + return params.bitrate.get_sum_kbps(); + }, + kNewBitrateKbps), + Field(&VideoEncoder::RateControlParameters::framerate_fps, + static_cast<double>(kNewFramerateFps))))) + .Times(1); + q_.SendTask( + [=] { video_processor_->SetRates(kNewBitrateKbps, kNewFramerateFps); }); + + ExpectRelease(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc new file mode 100644 index 0000000000..94860da1b6 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc @@ -0,0 +1,884 @@ +/* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/default_temporal_layers.h" + +#include <stdlib.h> + +#include <algorithm> +#include <array> +#include <memory> +#include <set> +#include <utility> +#include <vector> + +#include "modules/video_coding/include/video_codec_interface.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +DefaultTemporalLayers::PendingFrame::PendingFrame() = default; +DefaultTemporalLayers::PendingFrame::PendingFrame( + uint32_t timestamp, + bool expired, + uint8_t updated_buffers_mask, + const DependencyInfo& dependency_info) + : timestamp(timestamp), + expired(expired), + updated_buffer_mask(updated_buffers_mask), + dependency_info(dependency_info) {} + +namespace { +using BufferFlags = Vp8FrameConfig::BufferFlags; +using FreezeEntropy = Vp8FrameConfig::FreezeEntropy; +using Vp8BufferReference = Vp8FrameConfig::Vp8BufferReference; + +constexpr BufferFlags kNone = BufferFlags::kNone; +constexpr BufferFlags kReference = BufferFlags::kReference; +constexpr BufferFlags kUpdate = BufferFlags::kUpdate; +constexpr BufferFlags kReferenceAndUpdate = BufferFlags::kReferenceAndUpdate; +constexpr FreezeEntropy kFreezeEntropy = FreezeEntropy::kFreezeEntropy; + +static constexpr uint8_t kUninitializedPatternIndex = + std::numeric_limits<uint8_t>::max(); +static constexpr std::array<Vp8BufferReference, 3> kAllBuffers = { + {Vp8BufferReference::kLast, Vp8BufferReference::kGolden, + Vp8BufferReference::kAltref}}; + +std::vector<unsigned int> GetTemporalIds(size_t num_layers) { + switch (num_layers) { + case 1: + // Temporal layer structure (single layer): + // 0 0 0 0 ... + return {0}; + case 2: + // Temporal layer structure: + // 1 1 ... + // 0 0 ... + return {0, 1}; + case 3: + // Temporal layer structure: + // 2 2 2 2 ... + // 1 1 ... + // 0 0 ... + return {0, 2, 1, 2}; + case 4: + // Temporal layer structure: + // 3 3 3 3 3 3 3 3 ... + // 2 2 2 2 ... + // 1 1 ... + // 0 0 ... + return {0, 3, 2, 3, 1, 3, 2, 3}; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + RTC_DCHECK_NOTREACHED(); + return {0}; +} + +uint8_t GetUpdatedBuffers(const Vp8FrameConfig& config) { + uint8_t flags = 0; + if (config.last_buffer_flags & BufferFlags::kUpdate) { + flags |= static_cast<uint8_t>(Vp8BufferReference::kLast); + } + if (config.golden_buffer_flags & BufferFlags::kUpdate) { + flags |= static_cast<uint8_t>(Vp8BufferReference::kGolden); + } + if (config.arf_buffer_flags & BufferFlags::kUpdate) { + flags |= static_cast<uint8_t>(Vp8BufferReference::kAltref); + } + return flags; +} + +size_t BufferToIndex(Vp8BufferReference buffer) { + switch (buffer) { + case Vp8FrameConfig::Vp8BufferReference::kLast: + return 0; + case Vp8FrameConfig::Vp8BufferReference::kGolden: + return 1; + case Vp8FrameConfig::Vp8BufferReference::kAltref: + return 2; + case Vp8FrameConfig::Vp8BufferReference::kNone: + RTC_CHECK_NOTREACHED(); + } +} + +} // namespace + +constexpr size_t DefaultTemporalLayers::kNumReferenceBuffers; + +std::vector<DefaultTemporalLayers::DependencyInfo> +DefaultTemporalLayers::GetDependencyInfo(size_t num_layers) { + // For indexing in the patterns described below (which temporal layers they + // belong to), see the diagram above. + // Layer sync is done similarly for all patterns (except single stream) and + // happens every 8 frames: + // TL1 layer syncs by periodically by only referencing TL0 ('last'), but still + // updating 'golden', so it can be used as a reference by future TL1 frames. + // TL2 layer syncs just before TL1 by only depending on TL0 (and not depending + // on TL1's buffer before TL1 has layer synced). + // TODO(pbos): Consider cyclically updating 'arf' (and 'golden' for 1TL) for + // the base layer in 1-3TL instead of 'last' periodically on long intervals, + // so that if scene changes occur (user walks between rooms or rotates webcam) + // the 'arf' (or 'golden' respectively) is not stuck on a no-longer relevant + // keyframe. + + switch (num_layers) { + case 1: + // Always reference and update the same buffer. + return {{"S", {kReferenceAndUpdate, kNone, kNone}}}; + case 2: + // All layers can reference but not update the 'alt' buffer, this means + // that the 'alt' buffer reference is effectively the last keyframe. + // TL0 also references and updates the 'last' buffer. + // TL1 also references 'last' and references and updates 'golden'. + if (!field_trial::IsDisabled("WebRTC-UseShortVP8TL2Pattern")) { + // Shortened 4-frame pattern: + // 1---1 1---1 ... + // / / / / + // 0---0---0---0 ... + return {{"SS", {kReferenceAndUpdate, kNone, kNone}}, + {"-S", {kReference, kUpdate, kNone}}, + {"SR", {kReferenceAndUpdate, kNone, kNone}}, + {"-D", {kReference, kReference, kNone, kFreezeEntropy}}}; + } else { + // "Default" 8-frame pattern: + // 1---1---1---1 1---1---1---1 ... + // / / / / / / / / + // 0---0---0---0---0---0---0---0 ... + return {{"SS", {kReferenceAndUpdate, kNone, kNone}}, + {"-S", {kReference, kUpdate, kNone}}, + {"SR", {kReferenceAndUpdate, kNone, kNone}}, + {"-R", {kReference, kReferenceAndUpdate, kNone}}, + {"SR", {kReferenceAndUpdate, kNone, kNone}}, + {"-R", {kReference, kReferenceAndUpdate, kNone}}, + {"SR", {kReferenceAndUpdate, kNone, kNone}}, + {"-D", {kReference, kReference, kNone, kFreezeEntropy}}}; + } + case 3: + if (field_trial::IsEnabled("WebRTC-UseShortVP8TL3Pattern")) { + // This field trial is intended to check if it is worth using a shorter + // temporal pattern, trading some coding efficiency for less risk of + // dropped frames. + // The coding efficiency will decrease somewhat since the higher layer + // state is more volatile, but it will be offset slightly by updating + // the altref buffer with TL2 frames, instead of just referencing lower + // layers. + // If a frame is dropped in a higher layer, the jitter + // buffer on the receive side won't be able to decode any higher layer + // frame until the next sync frame. So we expect a noticeable decrease + // in frame drops on links with high packet loss. + + // TL0 references and updates the 'last' buffer. + // TL1 references 'last' and references and updates 'golden'. + // TL2 references both 'last' & 'golden' and references and updates + // 'arf'. + // 2-------2 2-------2 2 + // / __/ / __/ / + // / __1 / __1 / + // /___/ /___/ / + // 0---------------0---------------0----- + // 0 1 2 3 4 5 6 7 8 9 ... + return {{"SSS", {kReferenceAndUpdate, kNone, kNone}}, + {"--S", {kReference, kNone, kUpdate}}, + {"-DR", {kReference, kUpdate, kNone}}, + {"--D", {kReference, kReference, kReference, kFreezeEntropy}}}; + } else { + // All layers can reference but not update the 'alt' buffer, this means + // that the 'alt' buffer reference is effectively the last keyframe. + // TL0 also references and updates the 'last' buffer. + // TL1 also references 'last' and references and updates 'golden'. + // TL2 references both 'last' and 'golden' but updates no buffer. + // 2 __2 _____2 __2 2 + // / /____/ / / / + // / 1---------/-----1 / + // /_____/ /_____/ / + // 0---------------0---------------0----- + // 0 1 2 3 4 5 6 7 8 9 ... + return {{"SSS", {kReferenceAndUpdate, kNone, kNone}}, + {"--D", {kReference, kNone, kNone, kFreezeEntropy}}, + {"-SS", {kReference, kUpdate, kNone}}, + {"--D", {kReference, kReference, kNone, kFreezeEntropy}}, + {"SRR", {kReferenceAndUpdate, kNone, kNone}}, + {"--D", {kReference, kReference, kNone, kFreezeEntropy}}, + {"-DS", {kReference, kReferenceAndUpdate, kNone}}, + {"--D", {kReference, kReference, kNone, kFreezeEntropy}}}; + } + case 4: + // TL0 references and updates only the 'last' buffer. + // TL1 references 'last' and updates and references 'golden'. + // TL2 references 'last' and 'golden', and references and updates 'arf'. + // TL3 references all buffers but update none of them. + // TODO(philipel): Set decode target information for this structure. + return {{"----", {kReferenceAndUpdate, kNone, kNone}}, + {"----", {kReference, kNone, kNone, kFreezeEntropy}}, + {"----", {kReference, kNone, kUpdate}}, + {"----", {kReference, kNone, kReference, kFreezeEntropy}}, + {"----", {kReference, kUpdate, kNone}}, + {"----", {kReference, kReference, kReference, kFreezeEntropy}}, + {"----", {kReference, kReference, kReferenceAndUpdate}}, + {"----", {kReference, kReference, kReference, kFreezeEntropy}}, + {"----", {kReferenceAndUpdate, kNone, kNone}}, + {"----", {kReference, kReference, kReference, kFreezeEntropy}}, + {"----", {kReference, kReference, kReferenceAndUpdate}}, + {"----", {kReference, kReference, kReference, kFreezeEntropy}}, + {"----", {kReference, kReferenceAndUpdate, kNone}}, + {"----", {kReference, kReference, kReference, kFreezeEntropy}}, + {"----", {kReference, kReference, kReferenceAndUpdate}}, + {"----", {kReference, kReference, kReference, kFreezeEntropy}}}; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + RTC_DCHECK_NOTREACHED(); + return {{"", {kNone, kNone, kNone}}}; +} + +std::bitset<DefaultTemporalLayers::kNumReferenceBuffers> +DefaultTemporalLayers::DetermineStaticBuffers( + const std::vector<DependencyInfo>& temporal_pattern) { + std::bitset<kNumReferenceBuffers> buffers; + buffers.set(); + for (const DependencyInfo& info : temporal_pattern) { + uint8_t updated_buffers = GetUpdatedBuffers(info.frame_config); + + for (Vp8BufferReference buffer : kAllBuffers) { + if (static_cast<uint8_t>(buffer) & updated_buffers) { + buffers.reset(BufferToIndex(buffer)); + } + } + } + return buffers; +} + +DefaultTemporalLayers::DefaultTemporalLayers(int number_of_temporal_layers) + : num_layers_(std::max(1, number_of_temporal_layers)), + temporal_ids_(GetTemporalIds(num_layers_)), + temporal_pattern_(GetDependencyInfo(num_layers_)), + is_static_buffer_(DetermineStaticBuffers(temporal_pattern_)), + pattern_idx_(kUninitializedPatternIndex), + new_bitrates_bps_(std::vector<uint32_t>(num_layers_, 0u)) { + RTC_CHECK_GE(kMaxTemporalStreams, number_of_temporal_layers); + RTC_CHECK_GE(number_of_temporal_layers, 0); + RTC_CHECK_LE(number_of_temporal_layers, 4); + // pattern_idx_ wraps around temporal_pattern_.size, this is incorrect if + // temporal_ids_ are ever longer. If this is no longer correct it needs to + // wrap at max(temporal_ids_.size(), temporal_pattern_.size()). + RTC_DCHECK_LE(temporal_ids_.size(), temporal_pattern_.size()); + + RTC_DCHECK( + checker_ = TemporalLayersChecker::CreateTemporalLayersChecker( + Vp8TemporalLayersType::kFixedPattern, number_of_temporal_layers)); + + // Always need to start with a keyframe, so pre-populate all frame counters. + frames_since_buffer_refresh_.fill(0); +} + +DefaultTemporalLayers::~DefaultTemporalLayers() = default; + +void DefaultTemporalLayers::SetQpLimits(size_t stream_index, + int min_qp, + int max_qp) { + RTC_DCHECK_LT(stream_index, StreamCount()); + // Ignore. +} + +size_t DefaultTemporalLayers::StreamCount() const { + return 1; +} + +bool DefaultTemporalLayers::SupportsEncoderFrameDropping( + size_t stream_index) const { + RTC_DCHECK_LT(stream_index, StreamCount()); + // This class allows the encoder drop frames as it sees fit. + return true; +} + +void DefaultTemporalLayers::OnRatesUpdated( + size_t stream_index, + const std::vector<uint32_t>& bitrates_bps, + int framerate_fps) { + RTC_DCHECK_LT(stream_index, StreamCount()); + RTC_DCHECK_GT(bitrates_bps.size(), 0); + RTC_DCHECK_LE(bitrates_bps.size(), num_layers_); + // `bitrates_bps` uses individual rate per layer, but Vp8EncoderConfig wants + // the accumulated rate, so sum them up. + new_bitrates_bps_ = bitrates_bps; + new_bitrates_bps_->resize(num_layers_); + for (size_t i = 1; i < num_layers_; ++i) { + (*new_bitrates_bps_)[i] += (*new_bitrates_bps_)[i - 1]; + } +} + +Vp8EncoderConfig DefaultTemporalLayers::UpdateConfiguration( + size_t stream_index) { + RTC_DCHECK_LT(stream_index, StreamCount()); + + Vp8EncoderConfig config; + + if (!new_bitrates_bps_) { + return config; + } + + config.temporal_layer_config.emplace(); + Vp8EncoderConfig::TemporalLayerConfig& ts_config = + config.temporal_layer_config.value(); + + for (size_t i = 0; i < num_layers_; ++i) { + ts_config.ts_target_bitrate[i] = (*new_bitrates_bps_)[i] / 1000; + // ..., 4, 2, 1 + ts_config.ts_rate_decimator[i] = 1 << (num_layers_ - i - 1); + } + + ts_config.ts_number_layers = num_layers_; + ts_config.ts_periodicity = temporal_ids_.size(); + std::copy(temporal_ids_.begin(), temporal_ids_.end(), + ts_config.ts_layer_id.begin()); + + new_bitrates_bps_.reset(); + + return config; +} + +bool DefaultTemporalLayers::IsSyncFrame(const Vp8FrameConfig& config) const { + // Since we always assign TL0 to 'last' in these patterns, we can infer layer + // sync by checking if temporal id > 0 and we only reference TL0 or buffers + // containing the last key-frame. + if (config.packetizer_temporal_idx == 0) { + // TL0 frames are per definition not sync frames. + return false; + } + + if ((config.last_buffer_flags & BufferFlags::kReference) == 0) { + // Sync frames must reference TL0. + return false; + } + + if ((config.golden_buffer_flags & BufferFlags::kReference) && + !is_static_buffer_[BufferToIndex(Vp8BufferReference::kGolden)]) { + // Referencing a golden frame that contains a non-(base layer|key frame). + return false; + } + if ((config.arf_buffer_flags & BufferFlags::kReference) && + !is_static_buffer_[BufferToIndex(Vp8BufferReference::kAltref)]) { + // Referencing an altref frame that contains a non-(base layer|key frame). + return false; + } + + return true; +} + +Vp8FrameConfig DefaultTemporalLayers::NextFrameConfig(size_t stream_index, + uint32_t timestamp) { + RTC_DCHECK_LT(stream_index, StreamCount()); + RTC_DCHECK_GT(num_layers_, 0); + RTC_DCHECK_GT(temporal_pattern_.size(), 0); + + RTC_DCHECK_GT(kUninitializedPatternIndex, temporal_pattern_.size()); + const bool first_frame = (pattern_idx_ == kUninitializedPatternIndex); + + pattern_idx_ = (pattern_idx_ + 1) % temporal_pattern_.size(); + DependencyInfo dependency_info = temporal_pattern_[pattern_idx_]; + Vp8FrameConfig& tl_config = dependency_info.frame_config; + tl_config.encoder_layer_id = tl_config.packetizer_temporal_idx = + temporal_ids_[pattern_idx_ % temporal_ids_.size()]; + + if (pattern_idx_ == 0) { + // Start of new pattern iteration, set up clear state by invalidating any + // pending frames, so that we don't make an invalid reference to a buffer + // containing data from a previous iteration. + for (auto& frame : pending_frames_) { + frame.expired = true; + } + } + + if (first_frame) { + tl_config = Vp8FrameConfig::GetIntraFrameConfig(); + } else { + // Last is always ok to reference as it contains the base layer. For other + // buffers though, we need to check if the buffer has actually been + // refreshed this cycle of the temporal pattern. If the encoder dropped + // a frame, it might not have. + ValidateReferences(&tl_config.golden_buffer_flags, + Vp8BufferReference::kGolden); + ValidateReferences(&tl_config.arf_buffer_flags, + Vp8BufferReference::kAltref); + // Update search order to let the encoder know which buffers contains the + // most recent data. + UpdateSearchOrder(&tl_config); + // Figure out if this a sync frame (non-base-layer frame with only + // base-layer references). + tl_config.layer_sync = IsSyncFrame(tl_config); + + // Increment frame age, this needs to be in sync with `pattern_idx_`, + // so must update it here. Resetting age to 0 must be done when encoding is + // complete though, and so in the case of pipelining encoder it might lag. + // To prevent this data spill over into the next iteration, + // the `pedning_frames_` map is reset in loops. If delay is constant, + // the relative age should still be OK for the search order. + for (size_t& n : frames_since_buffer_refresh_) { + ++n; + } + } + + // Add frame to set of pending frames, awaiting completion. + pending_frames_.emplace_back(timestamp, false, GetUpdatedBuffers(tl_config), + dependency_info); + + // Checker does not yet support encoder frame dropping, so validate flags + // here before they can be dropped. + // TODO(sprang): Update checker to support dropping. + RTC_DCHECK(checker_->CheckTemporalConfig(first_frame, tl_config)); + + return tl_config; +} + +void DefaultTemporalLayers::ValidateReferences(BufferFlags* flags, + Vp8BufferReference ref) const { + // Check if the buffer specified by `ref` is actually referenced, and if so + // if it also a dynamically updating one (buffers always just containing + // keyframes are always safe to reference). + if ((*flags & BufferFlags::kReference) && + !is_static_buffer_[BufferToIndex(ref)]) { + if (NumFramesSinceBufferRefresh(ref) >= pattern_idx_) { + // No valid buffer state, or buffer contains frame that is older than the + // current pattern. This reference is not valid, so remove it. + *flags = static_cast<BufferFlags>(*flags & ~BufferFlags::kReference); + } + } +} + +void DefaultTemporalLayers::UpdateSearchOrder(Vp8FrameConfig* config) { + // Figure out which of the buffers we can reference, and order them so that + // the most recently refreshed is first. Otherwise prioritize last first, + // golden second, and altref third. + using BufferRefAge = std::pair<Vp8BufferReference, size_t>; + std::vector<BufferRefAge> eligible_buffers; + if (config->last_buffer_flags & BufferFlags::kReference) { + eligible_buffers.emplace_back( + Vp8BufferReference::kLast, + NumFramesSinceBufferRefresh(Vp8BufferReference::kLast)); + } + if (config->golden_buffer_flags & BufferFlags::kReference) { + eligible_buffers.emplace_back( + Vp8BufferReference::kGolden, + NumFramesSinceBufferRefresh(Vp8BufferReference::kGolden)); + } + if (config->arf_buffer_flags & BufferFlags::kReference) { + eligible_buffers.emplace_back( + Vp8BufferReference::kAltref, + NumFramesSinceBufferRefresh(Vp8BufferReference::kAltref)); + } + + std::sort(eligible_buffers.begin(), eligible_buffers.end(), + [](const BufferRefAge& lhs, const BufferRefAge& rhs) { + if (lhs.second != rhs.second) { + // Lower count has highest precedence. + return lhs.second < rhs.second; + } + return lhs.first < rhs.first; + }); + + // Populate the search order fields where possible. + if (!eligible_buffers.empty()) { + config->first_reference = eligible_buffers.front().first; + if (eligible_buffers.size() > 1) + config->second_reference = eligible_buffers[1].first; + } +} + +size_t DefaultTemporalLayers::NumFramesSinceBufferRefresh( + Vp8FrameConfig::Vp8BufferReference ref) const { + return frames_since_buffer_refresh_[BufferToIndex(ref)]; +} + +void DefaultTemporalLayers::ResetNumFramesSinceBufferRefresh( + Vp8FrameConfig::Vp8BufferReference ref) { + frames_since_buffer_refresh_[BufferToIndex(ref)] = 0; +} + +void DefaultTemporalLayers::CullPendingFramesBefore(uint32_t timestamp) { + while (!pending_frames_.empty() && + pending_frames_.front().timestamp != timestamp) { + pending_frames_.pop_front(); + } +} + +void DefaultTemporalLayers::OnEncodeDone(size_t stream_index, + uint32_t rtp_timestamp, + size_t size_bytes, + bool is_keyframe, + int qp, + CodecSpecificInfo* info) { + RTC_DCHECK_LT(stream_index, StreamCount()); + RTC_DCHECK_GT(num_layers_, 0); + + if (size_bytes == 0) { + RTC_LOG(LS_WARNING) << "Empty frame; treating as dropped."; + OnFrameDropped(stream_index, rtp_timestamp); + return; + } + + CullPendingFramesBefore(rtp_timestamp); + RTC_CHECK(!pending_frames_.empty()); + PendingFrame& frame = pending_frames_.front(); + RTC_DCHECK_EQ(frame.timestamp, rtp_timestamp); + const Vp8FrameConfig& frame_config = frame.dependency_info.frame_config; + if (is_keyframe) { + // Signal key-frame so checker resets state. + RTC_DCHECK(checker_->CheckTemporalConfig(true, frame_config)); + } + + CodecSpecificInfoVP8& vp8_info = info->codecSpecific.VP8; + if (num_layers_ == 1) { + vp8_info.temporalIdx = kNoTemporalIdx; + vp8_info.layerSync = false; + } else { + if (is_keyframe) { + // Restart the temporal pattern on keyframes. + pattern_idx_ = 0; + vp8_info.temporalIdx = 0; + vp8_info.layerSync = true; // Keyframes are always sync frames. + + for (Vp8BufferReference buffer : kAllBuffers) { + if (is_static_buffer_[BufferToIndex(buffer)]) { + // Update frame count of all kf-only buffers, regardless of state of + // `pending_frames_`. + ResetNumFramesSinceBufferRefresh(buffer); + } else { + // Key-frames update all buffers, this should be reflected when + // updating state in FrameEncoded(). + frame.updated_buffer_mask |= static_cast<uint8_t>(buffer); + } + } + } else { + // Delta frame, update codec specifics with temporal id and sync flag. + vp8_info.temporalIdx = frame_config.packetizer_temporal_idx; + vp8_info.layerSync = frame_config.layer_sync; + } + } + + vp8_info.useExplicitDependencies = true; + RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); + RTC_DCHECK_EQ(vp8_info.updatedBuffersCount, 0u); + + GenericFrameInfo& generic_frame_info = info->generic_frame_info.emplace(); + + for (int i = 0; i < static_cast<int>(Vp8FrameConfig::Buffer::kCount); ++i) { + bool references = false; + bool updates = is_keyframe; + + if (!is_keyframe && + frame_config.References(static_cast<Vp8FrameConfig::Buffer>(i))) { + RTC_DCHECK_LT(vp8_info.referencedBuffersCount, + arraysize(CodecSpecificInfoVP8::referencedBuffers)); + references = true; + vp8_info.referencedBuffers[vp8_info.referencedBuffersCount++] = i; + } + + if (is_keyframe || + frame_config.Updates(static_cast<Vp8FrameConfig::Buffer>(i))) { + RTC_DCHECK_LT(vp8_info.updatedBuffersCount, + arraysize(CodecSpecificInfoVP8::updatedBuffers)); + updates = true; + vp8_info.updatedBuffers[vp8_info.updatedBuffersCount++] = i; + } + + if (references || updates) { + generic_frame_info.encoder_buffers.emplace_back(i, references, updates); + } + } + + // The templates are always present on keyframes, and then refered to by + // subsequent frames. + if (is_keyframe) { + info->template_structure = GetTemplateStructure(num_layers_); + generic_frame_info.decode_target_indications = + temporal_pattern_.front().decode_target_indications; + generic_frame_info.temporal_id = 0; + } else { + generic_frame_info.decode_target_indications = + frame.dependency_info.decode_target_indications; + generic_frame_info.temporal_id = frame_config.packetizer_temporal_idx; + } + + if (!frame.expired) { + for (Vp8BufferReference buffer : kAllBuffers) { + if (frame.updated_buffer_mask & static_cast<uint8_t>(buffer)) { + ResetNumFramesSinceBufferRefresh(buffer); + } + } + } + + pending_frames_.pop_front(); +} + +void DefaultTemporalLayers::OnFrameDropped(size_t stream_index, + uint32_t rtp_timestamp) { + CullPendingFramesBefore(rtp_timestamp); + RTC_CHECK(!pending_frames_.empty()); + RTC_DCHECK_EQ(pending_frames_.front().timestamp, rtp_timestamp); + pending_frames_.pop_front(); +} + +void DefaultTemporalLayers::OnPacketLossRateUpdate(float packet_loss_rate) {} + +void DefaultTemporalLayers::OnRttUpdate(int64_t rtt_ms) {} + +void DefaultTemporalLayers::OnLossNotification( + const VideoEncoder::LossNotification& loss_notification) {} + +FrameDependencyStructure DefaultTemporalLayers::GetTemplateStructure( + int num_layers) const { + RTC_CHECK_LT(num_layers, 5); + RTC_CHECK_GT(num_layers, 0); + + FrameDependencyStructure template_structure; + template_structure.num_decode_targets = num_layers; + + switch (num_layers) { + case 1: { + template_structure.templates.resize(2); + template_structure.templates[0].T(0).Dtis("S"); + template_structure.templates[1].T(0).Dtis("S").FrameDiffs({1}); + return template_structure; + } + case 2: { + template_structure.templates.resize(5); + template_structure.templates[0].T(0).Dtis("SS"); + template_structure.templates[1].T(0).Dtis("SS").FrameDiffs({2}); + template_structure.templates[2].T(0).Dtis("SR").FrameDiffs({2}); + template_structure.templates[3].T(1).Dtis("-S").FrameDiffs({1}); + template_structure.templates[4].T(1).Dtis("-D").FrameDiffs({2, 1}); + return template_structure; + } + case 3: { + if (field_trial::IsEnabled("WebRTC-UseShortVP8TL3Pattern")) { + template_structure.templates.resize(5); + template_structure.templates[0].T(0).Dtis("SSS"); + template_structure.templates[1].T(0).Dtis("SSS").FrameDiffs({4}); + template_structure.templates[2].T(1).Dtis("-DR").FrameDiffs({2}); + template_structure.templates[3].T(2).Dtis("--S").FrameDiffs({1}); + template_structure.templates[4].T(2).Dtis("--D").FrameDiffs({2, 1}); + } else { + template_structure.templates.resize(7); + template_structure.templates[0].T(0).Dtis("SSS"); + template_structure.templates[1].T(0).Dtis("SSS").FrameDiffs({4}); + template_structure.templates[2].T(0).Dtis("SRR").FrameDiffs({4}); + template_structure.templates[3].T(1).Dtis("-SS").FrameDiffs({2}); + template_structure.templates[4].T(1).Dtis("-DS").FrameDiffs({4, 2}); + template_structure.templates[5].T(2).Dtis("--D").FrameDiffs({1}); + template_structure.templates[6].T(2).Dtis("--D").FrameDiffs({3, 1}); + } + return template_structure; + } + case 4: { + template_structure.templates.resize(8); + template_structure.templates[0].T(0).Dtis("SSSS"); + template_structure.templates[1].T(0).Dtis("SSSS").FrameDiffs({8}); + template_structure.templates[2].T(1).Dtis("-SRR").FrameDiffs({4}); + template_structure.templates[3].T(1).Dtis("-SRR").FrameDiffs({4, 8}); + template_structure.templates[4].T(2).Dtis("--SR").FrameDiffs({2}); + template_structure.templates[5].T(2).Dtis("--SR").FrameDiffs({2, 4}); + template_structure.templates[6].T(3).Dtis("---D").FrameDiffs({1}); + template_structure.templates[7].T(3).Dtis("---D").FrameDiffs({1, 3}); + return template_structure; + } + default: + RTC_DCHECK_NOTREACHED(); + // To make the compiler happy! + return template_structure; + } +} + +// Returns list of temporal dependencies for each frame in the temporal pattern. +// Values are lists of indecies in the pattern. +std::vector<std::set<uint8_t>> GetTemporalDependencies( + int num_temporal_layers) { + switch (num_temporal_layers) { + case 1: + return {{0}}; + case 2: + if (!field_trial::IsDisabled("WebRTC-UseShortVP8TL2Pattern")) { + return {{2}, {0}, {0}, {1, 2}}; + } else { + return {{6}, {0}, {0}, {1, 2}, {2}, {3, 4}, {4}, {5, 6}}; + } + case 3: + if (field_trial::IsEnabled("WebRTC-UseShortVP8TL3Pattern")) { + return {{0}, {0}, {0}, {0, 1, 2}}; + } else { + return {{4}, {0}, {0}, {0, 2}, {0}, {2, 4}, {2, 4}, {4, 6}}; + } + case 4: + return {{8}, {0}, {0}, {0, 2}, + {0}, {0, 2, 4}, {0, 2, 4}, {0, 4, 6}, + {0}, {4, 6, 8}, {4, 6, 8}, {4, 8, 10}, + {4, 8}, {8, 10, 12}, {8, 10, 12}, {8, 12, 14}}; + default: + RTC_DCHECK_NOTREACHED(); + return {}; + } +} + +DefaultTemporalLayersChecker::DefaultTemporalLayersChecker( + int num_temporal_layers) + : TemporalLayersChecker(num_temporal_layers), + num_layers_(std::max(1, num_temporal_layers)), + temporal_ids_(GetTemporalIds(num_layers_)), + temporal_dependencies_(GetTemporalDependencies(num_layers_)), + pattern_idx_(255) { + int i = 0; + while (temporal_ids_.size() < temporal_dependencies_.size()) { + temporal_ids_.push_back(temporal_ids_[i++]); + } +} + +DefaultTemporalLayersChecker::~DefaultTemporalLayersChecker() = default; + +bool DefaultTemporalLayersChecker::CheckTemporalConfig( + bool frame_is_keyframe, + const Vp8FrameConfig& frame_config) { + if (!TemporalLayersChecker::CheckTemporalConfig(frame_is_keyframe, + frame_config)) { + return false; + } + if (frame_config.drop_frame) { + return true; + } + + if (frame_is_keyframe) { + pattern_idx_ = 0; + last_ = BufferState(); + golden_ = BufferState(); + arf_ = BufferState(); + return true; + } + + ++pattern_idx_; + if (pattern_idx_ == temporal_ids_.size()) { + // All non key-frame buffers should be updated each pattern cycle. + if (!last_.is_keyframe && !last_.is_updated_this_cycle) { + RTC_LOG(LS_ERROR) << "Last buffer was not updated during pattern cycle."; + return false; + } + if (!arf_.is_keyframe && !arf_.is_updated_this_cycle) { + RTC_LOG(LS_ERROR) << "Arf buffer was not updated during pattern cycle."; + return false; + } + if (!golden_.is_keyframe && !golden_.is_updated_this_cycle) { + RTC_LOG(LS_ERROR) + << "Golden buffer was not updated during pattern cycle."; + return false; + } + last_.is_updated_this_cycle = false; + arf_.is_updated_this_cycle = false; + golden_.is_updated_this_cycle = false; + pattern_idx_ = 0; + } + uint8_t expected_tl_idx = temporal_ids_[pattern_idx_]; + if (frame_config.packetizer_temporal_idx != expected_tl_idx) { + RTC_LOG(LS_ERROR) << "Frame has an incorrect temporal index. Expected: " + << static_cast<int>(expected_tl_idx) << " Actual: " + << static_cast<int>(frame_config.packetizer_temporal_idx); + return false; + } + + bool need_sync = temporal_ids_[pattern_idx_] > 0 && + temporal_ids_[pattern_idx_] != kNoTemporalIdx; + std::vector<int> dependencies; + + if (frame_config.last_buffer_flags & BufferFlags::kReference) { + uint8_t referenced_layer = temporal_ids_[last_.pattern_idx]; + if (referenced_layer > 0) { + need_sync = false; + } + if (!last_.is_keyframe) { + dependencies.push_back(last_.pattern_idx); + } + } else if (frame_config.first_reference == Vp8BufferReference::kLast || + frame_config.second_reference == Vp8BufferReference::kLast) { + RTC_LOG(LS_ERROR) + << "Last buffer not referenced, but present in search order."; + return false; + } + + if (frame_config.arf_buffer_flags & BufferFlags::kReference) { + uint8_t referenced_layer = temporal_ids_[arf_.pattern_idx]; + if (referenced_layer > 0) { + need_sync = false; + } + if (!arf_.is_keyframe) { + dependencies.push_back(arf_.pattern_idx); + } + } else if (frame_config.first_reference == Vp8BufferReference::kAltref || + frame_config.second_reference == Vp8BufferReference::kAltref) { + RTC_LOG(LS_ERROR) + << "Altret buffer not referenced, but present in search order."; + return false; + } + + if (frame_config.golden_buffer_flags & BufferFlags::kReference) { + uint8_t referenced_layer = temporal_ids_[golden_.pattern_idx]; + if (referenced_layer > 0) { + need_sync = false; + } + if (!golden_.is_keyframe) { + dependencies.push_back(golden_.pattern_idx); + } + } else if (frame_config.first_reference == Vp8BufferReference::kGolden || + frame_config.second_reference == Vp8BufferReference::kGolden) { + RTC_LOG(LS_ERROR) + << "Golden buffer not referenced, but present in search order."; + return false; + } + + if (need_sync != frame_config.layer_sync) { + RTC_LOG(LS_ERROR) << "Sync bit is set incorrectly on a frame. Expected: " + << need_sync << " Actual: " << frame_config.layer_sync; + return false; + } + + if (!frame_is_keyframe) { + size_t i; + for (i = 0; i < dependencies.size(); ++i) { + if (temporal_dependencies_[pattern_idx_].find(dependencies[i]) == + temporal_dependencies_[pattern_idx_].end()) { + RTC_LOG(LS_ERROR) + << "Illegal temporal dependency out of defined pattern " + "from position " + << static_cast<int>(pattern_idx_) << " to position " + << static_cast<int>(dependencies[i]); + return false; + } + } + } + + if (frame_config.last_buffer_flags & BufferFlags::kUpdate) { + last_.is_updated_this_cycle = true; + last_.pattern_idx = pattern_idx_; + last_.is_keyframe = false; + } + if (frame_config.arf_buffer_flags & BufferFlags::kUpdate) { + arf_.is_updated_this_cycle = true; + arf_.pattern_idx = pattern_idx_; + arf_.is_keyframe = false; + } + if (frame_config.golden_buffer_flags & BufferFlags::kUpdate) { + golden_.is_updated_this_cycle = true; + golden_.pattern_idx = pattern_idx_; + golden_.is_keyframe = false; + } + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h new file mode 100644 index 0000000000..bc6574c54c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h @@ -0,0 +1,168 @@ +/* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +/* + * This file defines classes for doing temporal layers with VP8. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_DEFAULT_TEMPORAL_LAYERS_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_DEFAULT_TEMPORAL_LAYERS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <bitset> +#include <deque> +#include <limits> +#include <memory> +#include <set> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/video_codecs/vp8_frame_config.h" +#include "api/video_codecs/vp8_temporal_layers.h" +#include "modules/video_coding/codecs/vp8/include/temporal_layers_checker.h" +#include "modules/video_coding/include/video_codec_interface.h" + +namespace webrtc { + +class DefaultTemporalLayers final : public Vp8FrameBufferController { + public: + explicit DefaultTemporalLayers(int number_of_temporal_layers); + ~DefaultTemporalLayers() override; + + void SetQpLimits(size_t stream_index, int min_qp, int max_qp) override; + + size_t StreamCount() const override; + + bool SupportsEncoderFrameDropping(size_t stream_index) const override; + + // Returns the recommended VP8 encode flags needed. May refresh the decoder + // and/or update the reference buffers. + Vp8FrameConfig NextFrameConfig(size_t stream_index, + uint32_t timestamp) override; + + // New target bitrate, per temporal layer. + void OnRatesUpdated(size_t stream_index, + const std::vector<uint32_t>& bitrates_bps, + int framerate_fps) override; + + Vp8EncoderConfig UpdateConfiguration(size_t stream_index) override; + + // Callbacks methods on frame completion. OnEncodeDone() or OnFrameDropped() + // should be called once for each NextFrameConfig() call (using the RTP + // timestamp as ID), and the calls MUST be in the same order. + void OnEncodeDone(size_t stream_index, + uint32_t rtp_timestamp, + size_t size_bytes, + bool is_keyframe, + int qp, + CodecSpecificInfo* info) override; + void OnFrameDropped(size_t stream_index, uint32_t rtp_timestamp) override; + + void OnPacketLossRateUpdate(float packet_loss_rate) override; + + void OnRttUpdate(int64_t rtt_ms) override; + + void OnLossNotification( + const VideoEncoder::LossNotification& loss_notification) override; + + private: + static constexpr size_t kNumReferenceBuffers = 3; // Last, golden, altref. + struct DependencyInfo { + DependencyInfo() = default; + DependencyInfo(absl::string_view indication_symbols, + Vp8FrameConfig frame_config) + : decode_target_indications( + webrtc_impl::StringToDecodeTargetIndications(indication_symbols)), + frame_config(frame_config) {} + + absl::InlinedVector<DecodeTargetIndication, 10> decode_target_indications; + Vp8FrameConfig frame_config; + }; + struct PendingFrame { + PendingFrame(); + PendingFrame(uint32_t timestamp, + bool expired, + uint8_t updated_buffers_mask, + const DependencyInfo& dependency_info); + uint32_t timestamp = 0; + // Flag indicating if this frame has expired, ie it belongs to a previous + // iteration of the temporal pattern. + bool expired = false; + // Bitmask of Vp8BufferReference flags, indicating which buffers this frame + // updates. + uint8_t updated_buffer_mask = 0; + // The frame config returned by NextFrameConfig() for this frame. + DependencyInfo dependency_info; + }; + + static std::vector<DependencyInfo> GetDependencyInfo(size_t num_layers); + static std::bitset<kNumReferenceBuffers> DetermineStaticBuffers( + const std::vector<DependencyInfo>& temporal_pattern); + bool IsSyncFrame(const Vp8FrameConfig& config) const; + void ValidateReferences(Vp8FrameConfig::BufferFlags* flags, + Vp8FrameConfig::Vp8BufferReference ref) const; + void UpdateSearchOrder(Vp8FrameConfig* config); + size_t NumFramesSinceBufferRefresh( + Vp8FrameConfig::Vp8BufferReference ref) const; + void ResetNumFramesSinceBufferRefresh(Vp8FrameConfig::Vp8BufferReference ref); + void CullPendingFramesBefore(uint32_t timestamp); + + const size_t num_layers_; + const std::vector<unsigned int> temporal_ids_; + const std::vector<DependencyInfo> temporal_pattern_; + // Per reference buffer flag indicating if it is static, meaning it is only + // updated by key-frames. + const std::bitset<kNumReferenceBuffers> is_static_buffer_; + FrameDependencyStructure GetTemplateStructure(int num_layers) const; + + uint8_t pattern_idx_; + // Updated cumulative bitrates, per temporal layer. + absl::optional<std::vector<uint32_t>> new_bitrates_bps_; + + // Status for each pending frame, in + std::deque<PendingFrame> pending_frames_; + + // One counter per reference buffer, indicating number of frames since last + // refresh. For non-base-layer frames (ie golden, altref buffers), this is + // reset when the pattern loops. + std::array<size_t, kNumReferenceBuffers> frames_since_buffer_refresh_; + + // Optional utility used to verify reference validity. + std::unique_ptr<TemporalLayersChecker> checker_; +}; + +class DefaultTemporalLayersChecker : public TemporalLayersChecker { + public: + explicit DefaultTemporalLayersChecker(int number_of_temporal_layers); + ~DefaultTemporalLayersChecker() override; + + bool CheckTemporalConfig(bool frame_is_keyframe, + const Vp8FrameConfig& frame_config) override; + + private: + struct BufferState { + BufferState() + : is_updated_this_cycle(false), is_keyframe(true), pattern_idx(0) {} + + bool is_updated_this_cycle; + bool is_keyframe; + uint8_t pattern_idx; + }; + const size_t num_layers_; + std::vector<unsigned int> temporal_ids_; + const std::vector<std::set<uint8_t>> temporal_dependencies_; + BufferState last_; + BufferState arf_; + BufferState golden_; + uint8_t pattern_idx_; +}; + +} // namespace webrtc +#endif // MODULES_VIDEO_CODING_CODECS_VP8_DEFAULT_TEMPORAL_LAYERS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc new file mode 100644 index 0000000000..ae027a9d8a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc @@ -0,0 +1,781 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/default_temporal_layers.h" + +#include <cstdint> +#include <memory> + +#include "api/video/video_bitrate_allocation.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/vp8_frame_config.h" +#include "modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/utility/simulcast_rate_allocator.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "vpx/vp8cx.h" + +// TODO(bugs.webrtc.org/10582): Test the behavior of UpdateConfiguration(). + +namespace webrtc { +namespace test { +namespace { + +using ::testing::Each; + +enum { + kTemporalUpdateLast = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF, + kTemporalUpdateGoldenWithoutDependency = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST, + kTemporalUpdateGolden = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST, + kTemporalUpdateAltrefWithoutDependency = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_LAST, + kTemporalUpdateAltref = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST, + kTemporalUpdateNone = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY, + kTemporalUpdateNoneNoRefAltRef = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY, + kTemporalUpdateNoneNoRefGolden = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY, + kTemporalUpdateNoneNoRefGoldenAltRef = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY, + kTemporalUpdateGoldenWithoutDependencyRefAltRef = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST, + kTemporalUpdateGoldenRefAltRef = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST, + kTemporalUpdateLastRefAltRef = + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF, + kTemporalUpdateLastAndGoldenRefAltRef = + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF, +}; + +using BufferFlags = Vp8FrameConfig::BufferFlags; +using Vp8BufferReference = Vp8FrameConfig::Vp8BufferReference; + +constexpr uint8_t kNone = static_cast<uint8_t>(Vp8BufferReference::kNone); +constexpr uint8_t kLast = static_cast<uint8_t>(Vp8BufferReference::kLast); +constexpr uint8_t kGolden = static_cast<uint8_t>(Vp8BufferReference::kGolden); +constexpr uint8_t kAltref = static_cast<uint8_t>(Vp8BufferReference::kAltref); +constexpr uint8_t kAll = kLast | kGolden | kAltref; + +constexpr int ToVp8CodecFlags(uint8_t referenced_buffers, + uint8_t updated_buffers, + bool update_entropy) { + return (((referenced_buffers & kLast) == 0) ? VP8_EFLAG_NO_REF_LAST : 0) | + (((referenced_buffers & kGolden) == 0) ? VP8_EFLAG_NO_REF_GF : 0) | + (((referenced_buffers & kAltref) == 0) ? VP8_EFLAG_NO_REF_ARF : 0) | + (((updated_buffers & kLast) == 0) ? VP8_EFLAG_NO_UPD_LAST : 0) | + (((updated_buffers & kGolden) == 0) ? VP8_EFLAG_NO_UPD_GF : 0) | + (((updated_buffers & kAltref) == 0) ? VP8_EFLAG_NO_UPD_ARF : 0) | + (update_entropy ? 0 : VP8_EFLAG_NO_UPD_ENTROPY); +} + +constexpr int kKeyFrameFlags = ToVp8CodecFlags(kNone, kAll, true); + +std::vector<uint32_t> GetTemporalLayerRates(int target_bitrate_kbps, + int framerate_fps, + int num_temporal_layers) { + VideoCodec codec; + codec.codecType = VideoCodecType::kVideoCodecVP8; + codec.numberOfSimulcastStreams = 1; + codec.maxBitrate = target_bitrate_kbps; + codec.maxFramerate = framerate_fps; + codec.simulcastStream[0].targetBitrate = target_bitrate_kbps; + codec.simulcastStream[0].maxBitrate = target_bitrate_kbps; + codec.simulcastStream[0].numberOfTemporalLayers = num_temporal_layers; + codec.simulcastStream[0].active = true; + SimulcastRateAllocator allocator(codec); + return allocator + .Allocate( + VideoBitrateAllocationParameters(target_bitrate_kbps, framerate_fps)) + .GetTemporalLayerAllocation(0); +} + +constexpr int kDefaultBitrateBps = 500; +constexpr int kDefaultFramerate = 30; +constexpr int kDefaultBytesPerFrame = + (kDefaultBitrateBps / 8) / kDefaultFramerate; +constexpr int kDefaultQp = 2; +} // namespace + +class TemporalLayersTest : public ::testing::Test { + public: + ~TemporalLayersTest() override = default; + + CodecSpecificInfo* IgnoredCodecSpecificInfo() { + codec_specific_info_ = std::make_unique<CodecSpecificInfo>(); + return codec_specific_info_.get(); + } + + private: + std::unique_ptr<CodecSpecificInfo> codec_specific_info_; +}; + +TEST_F(TemporalLayersTest, 2Layers) { + constexpr int kNumLayers = 2; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + constexpr size_t kPatternSize = 4; + constexpr size_t kRepetitions = 4; + + const int expected_flags[kPatternSize] = { + ToVp8CodecFlags(kLast, kLast, true), + ToVp8CodecFlags(kLast, kGolden, true), + ToVp8CodecFlags(kLast, kLast, true), + ToVp8CodecFlags(kLast | kGolden, kNone, false), + }; + const int expected_temporal_idx[kPatternSize] = {0, 1, 0, 1}; + const bool expected_layer_sync[kPatternSize] = {false, true, false, false}; + + uint32_t timestamp = 0; + for (size_t i = 0; i < kPatternSize * kRepetitions; ++i) { + const size_t ind = i % kPatternSize; + const bool is_keyframe = (i == 0); + CodecSpecificInfo info; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[ind], + LibvpxVp8Encoder::EncodeFlags(tl_config)) + << i; + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe, + kDefaultQp, &info); + EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config)); + EXPECT_EQ(expected_temporal_idx[ind], info.codecSpecific.VP8.temporalIdx); + EXPECT_EQ(expected_temporal_idx[ind], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[ind], tl_config.encoder_layer_id); + EXPECT_EQ(is_keyframe || expected_layer_sync[ind], + info.codecSpecific.VP8.layerSync); + EXPECT_EQ(expected_layer_sync[ind], tl_config.layer_sync); + timestamp += 3000; + } +} + +TEST_F(TemporalLayersTest, 3Layers) { + constexpr int kNumLayers = 3; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + int expected_flags[16] = { + kTemporalUpdateLast, + kTemporalUpdateNoneNoRefGoldenAltRef, + kTemporalUpdateGoldenWithoutDependency, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateLast, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateGolden, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateLast, + kTemporalUpdateNoneNoRefGoldenAltRef, + kTemporalUpdateGoldenWithoutDependency, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateLast, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateGolden, + kTemporalUpdateNoneNoRefAltRef, + }; + int expected_temporal_idx[16] = {0, 2, 1, 2, 0, 2, 1, 2, + 0, 2, 1, 2, 0, 2, 1, 2}; + + bool expected_layer_sync[16] = {false, true, true, false, false, false, + false, false, false, true, true, false, + false, false, false, false}; + + unsigned int timestamp = 0; + for (int i = 0; i < 16; ++i) { + const bool is_keyframe = (i == 0); + CodecSpecificInfo info; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[i], + LibvpxVp8Encoder::EncodeFlags(tl_config)) + << i; + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe, + kDefaultQp, &info); + EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config)); + EXPECT_EQ(expected_temporal_idx[i], info.codecSpecific.VP8.temporalIdx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); + EXPECT_EQ(is_keyframe || expected_layer_sync[i], + info.codecSpecific.VP8.layerSync); + EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); + timestamp += 3000; + } +} + +TEST_F(TemporalLayersTest, Alternative3Layers) { + constexpr int kNumLayers = 3; + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + int expected_flags[8] = {kTemporalUpdateLast, + kTemporalUpdateAltrefWithoutDependency, + kTemporalUpdateGoldenWithoutDependency, + kTemporalUpdateNone, + kTemporalUpdateLast, + kTemporalUpdateAltrefWithoutDependency, + kTemporalUpdateGoldenWithoutDependency, + kTemporalUpdateNone}; + int expected_temporal_idx[8] = {0, 2, 1, 2, 0, 2, 1, 2}; + + bool expected_layer_sync[8] = {false, true, true, false, + false, true, true, false}; + + unsigned int timestamp = 0; + for (int i = 0; i < 8; ++i) { + const bool is_keyframe = (i == 0); + CodecSpecificInfo info; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[i], + LibvpxVp8Encoder::EncodeFlags(tl_config)) + << i; + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe, + kDefaultQp, &info); + EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config)); + EXPECT_EQ(expected_temporal_idx[i], info.codecSpecific.VP8.temporalIdx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); + EXPECT_EQ(is_keyframe || expected_layer_sync[i], + info.codecSpecific.VP8.layerSync); + EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); + timestamp += 3000; + } +} + +TEST_F(TemporalLayersTest, SearchOrder) { + constexpr int kNumLayers = 3; + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1, 2 update last, golden, altref respectively. + + // Start with a key-frame. tl_config flags can be ignored. + uint32_t timestamp = 0; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, true, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame. First one only references TL0. Updates altref. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone); + + // TL1 frame. Can only reference TL0. Updated golden. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone); + + // TL2 frame. Can reference all three buffers. Golden was the last to be + // updated, the next to last was altref. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kGolden); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kAltref); +} + +TEST_F(TemporalLayersTest, SearchOrderWithDrop) { + constexpr int kNumLayers = 3; + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1, 2 update last, golden, altref respectively. + + // Start with a key-frame. tl_config flags can be ignored. + uint32_t timestamp = 0; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, true, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame. First one only references TL0. Updates altref. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone); + + // Dropped TL1 frame. Can only reference TL0. Should have updated golden. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, 0, false, 0, nullptr); + + // TL2 frame. Can normally reference all three buffers, but golden has not + // been populated this cycle. Altref was last to be updated, before that last. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kAltref); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kLast); +} + +TEST_F(TemporalLayersTest, DoesNotReferenceDroppedFrames) { + constexpr int kNumLayers = 3; + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1, 2 update last, golden, altref respectively. + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + // Start with a keyframe. + uint32_t timestamp = 0; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, true, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Dropped TL2 frame. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, 0, false, 0, nullptr); + + // Dropped TL1 frame. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, 0, false, 0, nullptr); + + // TL2 frame. Can reference all three buffers, valid since golden and altref + // both contain the last keyframe. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference); + + // Restart of cycle! + + // TL0 base layer frame, updating and referencing last. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame, updating altref. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL1 frame, updating golden. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame. Can still reference all buffer since they have been update this + // cycle. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference); + + // Restart of cycle! + + // TL0 base layer frame, updating and referencing last. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Dropped TL2 frame. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, 0, false, 0, nullptr); + + // Dropped TL1 frame. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, 0, false, 0, nullptr); + + // TL2 frame. This time golden and altref contain data from the previous cycle + // and cannot be referenced. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference); +} + +TEST_F(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExist) { + constexpr int kNumLayers = 3; + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1 updates last, golden respectively. Altref is always last keyframe. + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + // Start with a keyframe. + uint32_t timestamp = 0; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, true, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Do a full cycle of the pattern. + for (int i = 0; i < 7; ++i) { + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + } + + // TL0 base layer frame, starting the cycle over. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Encoder has a hiccup and builds a queue, so frame encoding is delayed. + // TL1 frame, updating golden. + tl_config = tl.NextFrameConfig(0, ++timestamp); + + // TL2 frame, that should be referencing golden, but we can't be certain it's + // not going to be dropped, so that is not allowed. + tl_config = tl.NextFrameConfig(0, timestamp + 1); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference); + + // TL0 base layer frame. + tl_config = tl.NextFrameConfig(0, timestamp + 2); + + // The previous four enqueued frames finally get encoded, and the updated + // buffers are now OK to reference. + // Enqueued TL1 frame ready. + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + // Enqueued TL2 frame. + tl.OnEncodeDone(0, ++timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + // Enqueued TL0 frame. + tl.OnEncodeDone(0, ++timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame, all buffers are now in a known good state, OK to reference. + tl_config = tl.NextFrameConfig(0, ++timestamp + 1); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference); +} + +TEST_F(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExistLongDelay) { + constexpr int kNumLayers = 3; + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1 updates last, golden, altref respectively. + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + // Start with a keyframe. + uint32_t timestamp = 0; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, true, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Do a full cycle of the pattern. + for (int i = 0; i < 3; ++i) { + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + } + + // TL0 base layer frame, starting the cycle over. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame. + tl_config = tl.NextFrameConfig(0, ++timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Encoder has a hiccup and builds a queue, so frame encoding is delayed. + // Encoded, but delayed frames in TL 1, 2. + tl_config = tl.NextFrameConfig(0, timestamp + 1); + tl_config = tl.NextFrameConfig(0, timestamp + 2); + + // Restart of the pattern! + + // Encoded, but delayed frames in TL 2, 1. + tl_config = tl.NextFrameConfig(0, timestamp + 3); + tl_config = tl.NextFrameConfig(0, timestamp + 4); + + // TL1 frame from last cycle is ready. + tl.OnEncodeDone(0, timestamp + 1, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + // TL2 frame from last cycle is ready. + tl.OnEncodeDone(0, timestamp + 2, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // TL2 frame, that should be referencing all buffers, but altref and golden + // haven not been updated this cycle. (Don't be fooled by the late frames from + // the last cycle!) + tl_config = tl.NextFrameConfig(0, timestamp + 5); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference); +} + +TEST_F(TemporalLayersTest, KeyFrame) { + constexpr int kNumLayers = 3; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + tl.OnRatesUpdated(0, + GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + int expected_flags[8] = { + kTemporalUpdateLastRefAltRef, + kTemporalUpdateNoneNoRefGoldenAltRef, + kTemporalUpdateGoldenWithoutDependency, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateLast, + kTemporalUpdateNoneNoRefAltRef, + kTemporalUpdateGolden, + kTemporalUpdateNone, + }; + int expected_temporal_idx[8] = {0, 2, 1, 2, 0, 2, 1, 2}; + bool expected_layer_sync[8] = {true, true, true, false, + false, false, false, false}; + + uint32_t timestamp = 0; + for (int i = 0; i < 7; ++i) { + // Temporal pattern starts from 0 after key frame. Let the first `i` - 1 + // frames be delta frames, and the `i`th one key frame. + for (int j = 1; j <= i; ++j) { + // Since last frame was always a keyframe and thus index 0 in the pattern, + // this loop starts at index 1. + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + EXPECT_EQ(expected_flags[j], LibvpxVp8Encoder::EncodeFlags(tl_config)) + << j; + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + EXPECT_TRUE(checker.CheckTemporalConfig(false, tl_config)); + EXPECT_EQ(expected_temporal_idx[j], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[j], tl_config.encoder_layer_id); + EXPECT_EQ(expected_layer_sync[j], tl_config.layer_sync); + timestamp += 3000; + } + + CodecSpecificInfo info; + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp); + tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, true, kDefaultQp, + &info); + EXPECT_TRUE(info.codecSpecific.VP8.layerSync) + << "Key frame should be marked layer sync."; + EXPECT_EQ(0, info.codecSpecific.VP8.temporalIdx) + << "Key frame should always be packetized as layer 0"; + EXPECT_EQ(0, info.generic_frame_info->temporal_id) + << "Key frame should always be packetized as layer 0"; + EXPECT_THAT(info.generic_frame_info->decode_target_indications, + Each(DecodeTargetIndication::kSwitch)) + << "Key frame is universal switch"; + EXPECT_TRUE(checker.CheckTemporalConfig(true, tl_config)); + } +} + +TEST_F(TemporalLayersTest, SetsTlCountOnFirstConfigUpdate) { + // Create an instance and fetch config update without setting any rate. + constexpr int kNumLayers = 2; + DefaultTemporalLayers tl(kNumLayers); + Vp8EncoderConfig config = tl.UpdateConfiguration(0); + + // Config should indicate correct number of temporal layers, but zero bitrate. + ASSERT_TRUE(config.temporal_layer_config.has_value()); + EXPECT_EQ(config.temporal_layer_config->ts_number_layers, + uint32_t{kNumLayers}); + std::array<uint32_t, Vp8EncoderConfig::TemporalLayerConfig::kMaxLayers> + kZeroRate = {}; + EXPECT_EQ(config.temporal_layer_config->ts_target_bitrate, kZeroRate); + + // On second call, no new update. + config = tl.UpdateConfiguration(0); + EXPECT_FALSE(config.temporal_layer_config.has_value()); +} + +class TemporalLayersReferenceTest : public TemporalLayersTest, + public ::testing::WithParamInterface<int> { + public: + TemporalLayersReferenceTest() + : timestamp_(1), + last_sync_timestamp_(timestamp_), + tl0_reference_(nullptr) {} + virtual ~TemporalLayersReferenceTest() {} + + protected: + static const int kMaxPatternLength = 32; + + struct BufferState { + BufferState() : BufferState(-1, 0, false) {} + BufferState(int temporal_idx, uint32_t timestamp, bool sync) + : temporal_idx(temporal_idx), timestamp(timestamp), sync(sync) {} + int temporal_idx; + uint32_t timestamp; + bool sync; + }; + + bool UpdateSyncRefState(const BufferFlags& flags, BufferState* buffer_state) { + if (flags & BufferFlags::kReference) { + if (buffer_state->temporal_idx == -1) + return true; // References key-frame. + if (buffer_state->temporal_idx == 0) { + // No more than one reference to TL0 frame. + EXPECT_EQ(nullptr, tl0_reference_); + tl0_reference_ = buffer_state; + return true; + } + return false; // References higher layer. + } + return true; // No reference, does not affect sync frame status. + } + + void ValidateReference(const BufferFlags& flags, + const BufferState& buffer_state, + int temporal_layer) { + if (flags & BufferFlags::kReference) { + if (temporal_layer > 0 && buffer_state.timestamp > 0) { + // Check that high layer reference does not go past last sync frame. + EXPECT_GE(buffer_state.timestamp, last_sync_timestamp_); + } + // No reference to buffer in higher layer. + EXPECT_LE(buffer_state.temporal_idx, temporal_layer); + } + } + + uint32_t timestamp_ = 1; + uint32_t last_sync_timestamp_ = timestamp_; + BufferState* tl0_reference_; + + BufferState last_state; + BufferState golden_state; + BufferState altref_state; +}; + +INSTANTIATE_TEST_SUITE_P(DefaultTemporalLayersTest, + TemporalLayersReferenceTest, + ::testing::Range(1, kMaxTemporalStreams + 1)); + +TEST_P(TemporalLayersReferenceTest, ValidFrameConfigs) { + const int num_layers = GetParam(); + DefaultTemporalLayers tl(num_layers); + tl.OnRatesUpdated( + 0, GetTemporalLayerRates(kDefaultBytesPerFrame, kDefaultFramerate, 1), + kDefaultFramerate); + tl.UpdateConfiguration(0); + + // Run through the pattern and store the frame dependencies, plus keep track + // of the buffer state; which buffers references which temporal layers (if + // (any). If a given buffer is never updated, it is legal to reference it + // even for sync frames. In order to be general, don't assume TL0 always + // updates `last`. + std::vector<Vp8FrameConfig> tl_configs(kMaxPatternLength); + for (int i = 0; i < kMaxPatternLength; ++i) { + Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp_); + tl.OnEncodeDone(0, timestamp_, kDefaultBytesPerFrame, i == 0, kDefaultQp, + IgnoredCodecSpecificInfo()); + ++timestamp_; + EXPECT_FALSE(tl_config.drop_frame); + tl_configs.push_back(tl_config); + int temporal_idx = tl_config.encoder_layer_id; + // For the default layers, always keep encoder and rtp layers in sync. + EXPECT_EQ(tl_config.packetizer_temporal_idx, temporal_idx); + + // Determine if this frame is in a higher layer but references only TL0 + // or untouched buffers, if so verify it is marked as a layer sync. + bool is_sync_frame = true; + tl0_reference_ = nullptr; + if (temporal_idx <= 0) { + is_sync_frame = false; // TL0 by definition not a sync frame. + } else if (!UpdateSyncRefState(tl_config.last_buffer_flags, &last_state)) { + is_sync_frame = false; + } else if (!UpdateSyncRefState(tl_config.golden_buffer_flags, + &golden_state)) { + is_sync_frame = false; + } else if (!UpdateSyncRefState(tl_config.arf_buffer_flags, &altref_state)) { + is_sync_frame = false; + } + if (is_sync_frame) { + // Cache timestamp for last found sync frame, so that we can verify no + // references back past this frame. + ASSERT_TRUE(tl0_reference_); + last_sync_timestamp_ = tl0_reference_->timestamp; + } + EXPECT_EQ(tl_config.layer_sync, is_sync_frame); + + // Validate no reference from lower to high temporal layer, or backwards + // past last reference frame. + ValidateReference(tl_config.last_buffer_flags, last_state, temporal_idx); + ValidateReference(tl_config.golden_buffer_flags, golden_state, + temporal_idx); + ValidateReference(tl_config.arf_buffer_flags, altref_state, temporal_idx); + + // Update the current layer state. + BufferState state = {temporal_idx, timestamp_, is_sync_frame}; + if (tl_config.last_buffer_flags & BufferFlags::kUpdate) + last_state = state; + if (tl_config.golden_buffer_flags & BufferFlags::kUpdate) + golden_state = state; + if (tl_config.arf_buffer_flags & BufferFlags::kUpdate) + altref_state = state; + } +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/temporal_layers_checker.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/temporal_layers_checker.h new file mode 100644 index 0000000000..3d1671a676 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/temporal_layers_checker.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_TEMPORAL_LAYERS_CHECKER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_TEMPORAL_LAYERS_CHECKER_H_ + +#include <stdint.h> + +#include <memory> + +#include "api/video_codecs/vp8_frame_config.h" +#include "api/video_codecs/vp8_temporal_layers.h" + +namespace webrtc { + +// Interface for a class that verifies correctness of temporal layer +// configurations (dependencies, sync flag, etc). +// Intended to be used in tests as well as with real apps in debug mode. +class TemporalLayersChecker { + public: + explicit TemporalLayersChecker(int num_temporal_layers); + virtual ~TemporalLayersChecker() {} + + virtual bool CheckTemporalConfig(bool frame_is_keyframe, + const Vp8FrameConfig& frame_config); + + static std::unique_ptr<TemporalLayersChecker> CreateTemporalLayersChecker( + Vp8TemporalLayersType type, + int num_temporal_layers); + + private: + struct BufferState { + BufferState() : is_keyframe(true), temporal_layer(0), sequence_number(0) {} + bool is_keyframe; + uint8_t temporal_layer; + uint32_t sequence_number; + }; + bool CheckAndUpdateBufferState(BufferState* state, + bool* need_sync, + bool frame_is_keyframe, + uint8_t temporal_layer, + Vp8FrameConfig::BufferFlags flags, + uint32_t sequence_number, + uint32_t* lowest_sequence_referenced); + BufferState last_; + BufferState arf_; + BufferState golden_; + int num_temporal_layers_; + uint32_t sequence_number_; + uint32_t last_sync_sequence_number_; + uint32_t last_tl0_sequence_number_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_TEMPORAL_LAYERS_CHECKER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8.h new file mode 100644 index 0000000000..2fc647874f --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_VP8_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_VP8_H_ + +#include <memory> +#include <vector> + +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp8_frame_buffer_controller.h" +#include "modules/video_coding/include/video_codec_interface.h" + +namespace webrtc { + +// TODO(brandtr): Move these interfaces to the api/ folder. +class VP8Encoder { + public: + struct Settings { + // Allows for overriding the Vp8FrameBufferController used by the encoder. + // If unset, a default Vp8FrameBufferController will be instantiated + // internally. + std::unique_ptr<Vp8FrameBufferControllerFactory> + frame_buffer_controller_factory = nullptr; + + // Allows for overriding the resolution/bitrate limits exposed through + // VideoEncoder::GetEncoderInfo(). No override is done if empty. + std::vector<VideoEncoder::ResolutionBitrateLimits> + resolution_bitrate_limits = {}; + }; + + static std::unique_ptr<VideoEncoder> Create(); + static std::unique_ptr<VideoEncoder> Create(Settings settings); +}; + +class VP8Decoder { + public: + static std::unique_ptr<VideoDecoder> Create(); +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_VP8_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8_globals.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8_globals.h new file mode 100644 index 0000000000..1fab5f45a6 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/include/vp8_globals.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains codec dependent definitions that are needed in +// order to compile the WebRTC codebase, even if this codec is not used. + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_VP8_GLOBALS_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_VP8_GLOBALS_H_ + +#include "modules/video_coding/codecs/interface/common_constants.h" + +namespace webrtc { + +struct RTPVideoHeaderVP8 { + void InitRTPVideoHeaderVP8() { + nonReference = false; + pictureId = kNoPictureId; + tl0PicIdx = kNoTl0PicIdx; + temporalIdx = kNoTemporalIdx; + layerSync = false; + keyIdx = kNoKeyIdx; + partitionId = 0; + beginningOfPartition = false; + } + + bool nonReference; // Frame is discardable. + int16_t pictureId; // Picture ID index, 15 bits; + // kNoPictureId if PictureID does not exist. + int16_t tl0PicIdx; // TL0PIC_IDX, 8 bits; + // kNoTl0PicIdx means no value provided. + uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx. + bool layerSync; // This frame is a layer sync frame. + // Disabled if temporalIdx == kNoTemporalIdx. + int keyIdx; // 5 bits; kNoKeyIdx means not used. + int partitionId; // VP8 partition ID + bool beginningOfPartition; // True if this packet is the first + // in a VP8 partition. Otherwise false +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_INCLUDE_VP8_GLOBALS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc new file mode 100644 index 0000000000..5a3032af0c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h" + +#include <stdio.h> +#include <string.h> + +#include <algorithm> +#include <memory> +#include <string> + +#include "absl/types/optional.h" +#include "api/scoped_refptr.h" +#include "api/video/i420_buffer.h" +#include "api/video/video_frame.h" +#include "api/video/video_frame_buffer.h" +#include "api/video/video_rotation.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/exp_filter.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" +#include "libyuv/include/libyuv/convert.h" +#include "vpx/vp8.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +namespace webrtc { +namespace { +constexpr int kVp8ErrorPropagationTh = 30; +// vpx_decoder.h documentation indicates decode deadline is time in us, with +// "Set to zero for unlimited.", but actual implementation requires this to be +// a mode with 0 meaning allow delay and 1 not allowing it. +constexpr long kDecodeDeadlineRealtime = 1; // NOLINT + +const char kVp8PostProcArmFieldTrial[] = "WebRTC-VP8-Postproc-Config-Arm"; +const char kVp8PostProcFieldTrial[] = "WebRTC-VP8-Postproc-Config"; + +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) || defined(WEBRTC_ARCH_MIPS) +constexpr bool kIsArm = true; +#else +constexpr bool kIsArm = false; +#endif + +absl::optional<LibvpxVp8Decoder::DeblockParams> DefaultDeblockParams() { + return LibvpxVp8Decoder::DeblockParams(/*max_level=*/8, + /*degrade_qp=*/60, + /*min_qp=*/30); +} + +absl::optional<LibvpxVp8Decoder::DeblockParams> +GetPostProcParamsFromFieldTrialGroup() { + std::string group = webrtc::field_trial::FindFullName( + kIsArm ? kVp8PostProcArmFieldTrial : kVp8PostProcFieldTrial); + if (group.empty()) { + return DefaultDeblockParams(); + } + + LibvpxVp8Decoder::DeblockParams params; + if (sscanf(group.c_str(), "Enabled-%d,%d,%d", ¶ms.max_level, + ¶ms.min_qp, ¶ms.degrade_qp) != 3) { + return DefaultDeblockParams(); + } + + if (params.max_level < 0 || params.max_level > 16) { + return DefaultDeblockParams(); + } + + if (params.min_qp < 0 || params.degrade_qp <= params.min_qp) { + return DefaultDeblockParams(); + } + + return params; +} + +} // namespace + +std::unique_ptr<VideoDecoder> VP8Decoder::Create() { + return std::make_unique<LibvpxVp8Decoder>(); +} + +class LibvpxVp8Decoder::QpSmoother { + public: + QpSmoother() : last_sample_ms_(rtc::TimeMillis()), smoother_(kAlpha) {} + + int GetAvg() const { + float value = smoother_.filtered(); + return (value == rtc::ExpFilter::kValueUndefined) ? 0 + : static_cast<int>(value); + } + + void Add(float sample) { + int64_t now_ms = rtc::TimeMillis(); + smoother_.Apply(static_cast<float>(now_ms - last_sample_ms_), sample); + last_sample_ms_ = now_ms; + } + + void Reset() { smoother_.Reset(kAlpha); } + + private: + const float kAlpha = 0.95f; + int64_t last_sample_ms_; + rtc::ExpFilter smoother_; +}; + +LibvpxVp8Decoder::LibvpxVp8Decoder() + : use_postproc_( + kIsArm ? webrtc::field_trial::IsEnabled(kVp8PostProcArmFieldTrial) + : true), + buffer_pool_(false, 300 /* max_number_of_buffers*/), + decode_complete_callback_(NULL), + inited_(false), + decoder_(NULL), + propagation_cnt_(-1), + last_frame_width_(0), + last_frame_height_(0), + key_frame_required_(true), + deblock_params_(use_postproc_ ? GetPostProcParamsFromFieldTrialGroup() + : absl::nullopt), + qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr), + preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode") + ? VideoFrameBuffer::Type::kNV12 + : VideoFrameBuffer::Type::kI420) {} + +LibvpxVp8Decoder::~LibvpxVp8Decoder() { + inited_ = true; // in order to do the actual release + Release(); +} + +bool LibvpxVp8Decoder::Configure(const Settings& settings) { + if (Release() < 0) { + return false; + } + if (decoder_ == NULL) { + decoder_ = new vpx_codec_ctx_t; + memset(decoder_, 0, sizeof(*decoder_)); + } + vpx_codec_dec_cfg_t cfg; + // Setting number of threads to a constant value (1) + cfg.threads = 1; + cfg.h = cfg.w = 0; // set after decode + + vpx_codec_flags_t flags = use_postproc_ ? VPX_CODEC_USE_POSTPROC : 0; + + if (vpx_codec_dec_init(decoder_, vpx_codec_vp8_dx(), &cfg, flags)) { + delete decoder_; + decoder_ = nullptr; + return false; + } + + propagation_cnt_ = -1; + inited_ = true; + + // Always start with a complete key frame. + key_frame_required_ = true; + if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) { + if (!buffer_pool_.Resize(*buffer_pool_size)) { + return false; + } + } + return true; +} + +int LibvpxVp8Decoder::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (decode_complete_callback_ == NULL) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (input_image.data() == NULL && input_image.size() > 0) { + // Reset to avoid requesting key frames too often. + if (propagation_cnt_ > 0) + propagation_cnt_ = 0; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + +// Post process configurations. + if (use_postproc_) { + vp8_postproc_cfg_t ppcfg; + // MFQE enabled to reduce key frame popping. + ppcfg.post_proc_flag = VP8_MFQE; + + if (kIsArm) { + RTC_DCHECK(deblock_params_.has_value()); + } + if (deblock_params_.has_value()) { + // For low resolutions, use stronger deblocking filter. + int last_width_x_height = last_frame_width_ * last_frame_height_; + if (last_width_x_height > 0 && last_width_x_height <= 320 * 240) { + // Enable the deblock and demacroblocker based on qp thresholds. + RTC_DCHECK(qp_smoother_); + int qp = qp_smoother_->GetAvg(); + if (qp > deblock_params_->min_qp) { + int level = deblock_params_->max_level; + if (qp < deblock_params_->degrade_qp) { + // Use lower level. + level = deblock_params_->max_level * + (qp - deblock_params_->min_qp) / + (deblock_params_->degrade_qp - deblock_params_->min_qp); + } + // Deblocking level only affects VP8_DEMACROBLOCK. + ppcfg.deblocking_level = std::max(level, 1); + ppcfg.post_proc_flag |= VP8_DEBLOCK | VP8_DEMACROBLOCK; + } + } + } else { + // Non-arm with no explicit deblock params set. + ppcfg.post_proc_flag |= VP8_DEBLOCK; + // For VGA resolutions and lower, enable the demacroblocker postproc. + if (last_frame_width_ * last_frame_height_ <= 640 * 360) { + ppcfg.post_proc_flag |= VP8_DEMACROBLOCK; + } + // Strength of deblocking filter. Valid range:[0,16] + ppcfg.deblocking_level = 3; + } + + vpx_codec_control(decoder_, VP8_SET_POSTPROC, &ppcfg); + } + + // Always start with a complete key frame. + if (key_frame_required_) { + if (input_image._frameType != VideoFrameType::kVideoFrameKey) + return WEBRTC_VIDEO_CODEC_ERROR; + key_frame_required_ = false; + } + // Restrict error propagation using key frame requests. + // Reset on a key frame refresh. + if (input_image._frameType == VideoFrameType::kVideoFrameKey) { + propagation_cnt_ = -1; + // Start count on first loss. + } else if (missing_frames && propagation_cnt_ == -1) { + propagation_cnt_ = 0; + } + if (propagation_cnt_ >= 0) { + propagation_cnt_++; + } + + vpx_codec_iter_t iter = NULL; + vpx_image_t* img; + int ret; + + // Check for missing frames. + if (missing_frames) { + // Call decoder with zero data length to signal missing frames. + if (vpx_codec_decode(decoder_, NULL, 0, 0, kDecodeDeadlineRealtime)) { + // Reset to avoid requesting key frames too often. + if (propagation_cnt_ > 0) + propagation_cnt_ = 0; + return WEBRTC_VIDEO_CODEC_ERROR; + } + img = vpx_codec_get_frame(decoder_, &iter); + iter = NULL; + } + + const uint8_t* buffer = input_image.data(); + if (input_image.size() == 0) { + buffer = NULL; // Triggers full frame concealment. + } + if (vpx_codec_decode(decoder_, buffer, input_image.size(), 0, + kDecodeDeadlineRealtime)) { + // Reset to avoid requesting key frames too often. + if (propagation_cnt_ > 0) { + propagation_cnt_ = 0; + } + return WEBRTC_VIDEO_CODEC_ERROR; + } + + img = vpx_codec_get_frame(decoder_, &iter); + int qp; + vpx_codec_err_t vpx_ret = + vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp); + RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK); + ret = ReturnFrame(img, input_image.Timestamp(), qp, input_image.ColorSpace()); + if (ret != 0) { + // Reset to avoid requesting key frames too often. + if (ret < 0 && propagation_cnt_ > 0) + propagation_cnt_ = 0; + return ret; + } + // Check Vs. threshold + if (propagation_cnt_ > kVp8ErrorPropagationTh) { + // Reset to avoid requesting key frames too often. + propagation_cnt_ = 0; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp8Decoder::ReturnFrame( + const vpx_image_t* img, + uint32_t timestamp, + int qp, + const webrtc::ColorSpace* explicit_color_space) { + if (img == NULL) { + // Decoder OK and NULL image => No show frame + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + if (qp_smoother_) { + if (last_frame_width_ != static_cast<int>(img->d_w) || + last_frame_height_ != static_cast<int>(img->d_h)) { + qp_smoother_->Reset(); + } + qp_smoother_->Add(qp); + } + last_frame_width_ = img->d_w; + last_frame_height_ = img->d_h; + // Allocate memory for decoded image. + rtc::scoped_refptr<VideoFrameBuffer> buffer; + + if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) { + // Convert instead of making a copy. + // Note: libvpx doesn't support creating NV12 image directly. + // Due to the bitstream structure such a change would just hide the + // conversion operation inside the decode call. + rtc::scoped_refptr<NV12Buffer> nv12_buffer = + buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h); + buffer = nv12_buffer; + if (nv12_buffer.get()) { + libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + nv12_buffer->MutableDataY(), nv12_buffer->StrideY(), + nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(), + img->d_w, img->d_h); + } + } else { + rtc::scoped_refptr<I420Buffer> i420_buffer = + buffer_pool_.CreateI420Buffer(img->d_w, img->d_h); + buffer = i420_buffer; + if (i420_buffer.get()) { + libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + img->d_w, img->d_h); + } + } + + if (!buffer.get()) { + // Pool has too many pending frames. + RTC_HISTOGRAM_BOOLEAN("WebRTC.Video.LibvpxVp8Decoder.TooManyPendingFrames", + 1); + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + + VideoFrame decoded_image = VideoFrame::Builder() + .set_video_frame_buffer(buffer) + .set_timestamp_rtp(timestamp) + .set_color_space(explicit_color_space) + .build(); + decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp); + + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp8Decoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + decode_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp8Decoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + if (decoder_ != NULL) { + if (inited_) { + if (vpx_codec_destroy(decoder_)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + delete decoder_; + decoder_ = NULL; + } + buffer_pool_.Release(); + inited_ = false; + return ret_val; +} + +VideoDecoder::DecoderInfo LibvpxVp8Decoder::GetDecoderInfo() const { + DecoderInfo info; + info.implementation_name = "libvpx"; + info.is_hardware_accelerated = false; + return info; +} + +const char* LibvpxVp8Decoder::ImplementationName() const { + return "libvpx"; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h new file mode 100644 index 0000000000..4d1e20d246 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_LIBVPX_VP8_DECODER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_LIBVPX_VP8_DECODER_H_ + +#include <memory> + +#include "absl/types/optional.h" +#include "api/video/encoded_image.h" +#include "api/video_codecs/video_decoder.h" +#include "common_video/include/video_frame_buffer_pool.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +namespace webrtc { + +class LibvpxVp8Decoder : public VideoDecoder { + public: + LibvpxVp8Decoder(); + ~LibvpxVp8Decoder() override; + + bool Configure(const Settings& settings) override; + int Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) override; + + int RegisterDecodeCompleteCallback(DecodedImageCallback* callback) override; + int Release() override; + + DecoderInfo GetDecoderInfo() const override; + const char* ImplementationName() const override; + + struct DeblockParams { + DeblockParams() : max_level(6), degrade_qp(1), min_qp(0) {} + DeblockParams(int max_level, int degrade_qp, int min_qp) + : max_level(max_level), degrade_qp(degrade_qp), min_qp(min_qp) {} + int max_level; // Deblocking strength: [0, 16]. + int degrade_qp; // If QP value is below, start lowering `max_level`. + int min_qp; // If QP value is below, turn off deblocking. + }; + + private: + class QpSmoother; + int ReturnFrame(const vpx_image_t* img, + uint32_t timeStamp, + int qp, + const webrtc::ColorSpace* explicit_color_space); + const bool use_postproc_; + + VideoFrameBufferPool buffer_pool_; + DecodedImageCallback* decode_complete_callback_; + bool inited_; + vpx_codec_ctx_t* decoder_; + int propagation_cnt_; + int last_frame_width_; + int last_frame_height_; + bool key_frame_required_; + const absl::optional<DeblockParams> deblock_params_; + const std::unique_ptr<QpSmoother> qp_smoother_; + + // Decoder should produce this format if possible. + const VideoFrameBuffer::Type preferred_output_format_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_LIBVPX_VP8_DECODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc new file mode 100644 index 0000000000..61732443f4 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -0,0 +1,1428 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h" + +#include <string.h> + +#include <algorithm> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "absl/algorithm/container.h" +#include "api/scoped_refptr.h" +#include "api/video/video_content_type.h" +#include "api/video/video_frame_buffer.h" +#include "api/video/video_timing.h" +#include "api/video_codecs/vp8_temporal_layers.h" +#include "api/video_codecs/vp8_temporal_layers_factory.h" +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/codecs/vp8/vp8_scalability.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/utility/simulcast_rate_allocator.h" +#include "modules/video_coding/utility/simulcast_utility.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/field_trial_units.h" +#include "rtc_base/logging.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/field_trial.h" +#include "libyuv/include/libyuv/scale.h" +#include "vpx/vp8cx.h" + +namespace webrtc { +namespace { +#if defined(WEBRTC_IOS) +constexpr char kVP8IosMaxNumberOfThreadFieldTrial[] = + "WebRTC-VP8IosMaxNumberOfThread"; +constexpr char kVP8IosMaxNumberOfThreadFieldTrialParameter[] = "max_thread"; +#endif + +constexpr char kVp8ForcePartitionResilience[] = + "WebRTC-VP8-ForcePartitionResilience"; + +// QP is obtained from VP8-bitstream for HW, so the QP corresponds to the +// bitstream range of [0, 127] and not the user-level range of [0,63]. +constexpr int kLowVp8QpThreshold = 29; +constexpr int kHighVp8QpThreshold = 95; + +constexpr int kTokenPartitions = VP8_ONE_TOKENPARTITION; +constexpr uint32_t kVp832ByteAlign = 32u; + +constexpr int kRtpTicksPerSecond = 90000; +constexpr int kRtpTicksPerMs = kRtpTicksPerSecond / 1000; + +// VP8 denoiser states. +enum denoiserState : uint32_t { + kDenoiserOff, + kDenoiserOnYOnly, + kDenoiserOnYUV, + kDenoiserOnYUVAggressive, + // Adaptive mode defaults to kDenoiserOnYUV on key frame, but may switch + // to kDenoiserOnYUVAggressive based on a computed noise metric. + kDenoiserOnAdaptive +}; + +// Greatest common divisior +int GCD(int a, int b) { + int c = a % b; + while (c != 0) { + a = b; + b = c; + c = a % b; + } + return b; +} + +static_assert(Vp8EncoderConfig::TemporalLayerConfig::kMaxPeriodicity == + VPX_TS_MAX_PERIODICITY, + "Vp8EncoderConfig::kMaxPeriodicity must be kept in sync with the " + "constant in libvpx."); +static_assert(Vp8EncoderConfig::TemporalLayerConfig::kMaxLayers == + VPX_TS_MAX_LAYERS, + "Vp8EncoderConfig::kMaxLayers must be kept in sync with the " + "constant in libvpx."); + +// Allow a newer value to override a current value only if the new value +// is set. +template <typename T> +bool MaybeSetNewValue(const absl::optional<T>& new_value, + absl::optional<T>* base_value) { + if (new_value.has_value() && new_value != *base_value) { + *base_value = new_value; + return true; + } else { + return false; + } +} + +// Adds configuration from `new_config` to `base_config`. Both configs consist +// of optionals, and only optionals which are set in `new_config` can have +// an effect. (That is, set values in `base_config` cannot be unset.) +// Returns `true` iff any changes were made to `base_config`. +bool MaybeExtendVp8EncoderConfig(const Vp8EncoderConfig& new_config, + Vp8EncoderConfig* base_config) { + bool changes_made = false; + changes_made |= MaybeSetNewValue(new_config.temporal_layer_config, + &base_config->temporal_layer_config); + changes_made |= MaybeSetNewValue(new_config.rc_target_bitrate, + &base_config->rc_target_bitrate); + changes_made |= MaybeSetNewValue(new_config.rc_max_quantizer, + &base_config->rc_max_quantizer); + changes_made |= MaybeSetNewValue(new_config.g_error_resilient, + &base_config->g_error_resilient); + return changes_made; +} + +void ApplyVp8EncoderConfigToVpxConfig(const Vp8EncoderConfig& encoder_config, + vpx_codec_enc_cfg_t* vpx_config) { + if (encoder_config.temporal_layer_config.has_value()) { + const Vp8EncoderConfig::TemporalLayerConfig& ts_config = + encoder_config.temporal_layer_config.value(); + vpx_config->ts_number_layers = ts_config.ts_number_layers; + std::copy(ts_config.ts_target_bitrate.begin(), + ts_config.ts_target_bitrate.end(), + std::begin(vpx_config->ts_target_bitrate)); + std::copy(ts_config.ts_rate_decimator.begin(), + ts_config.ts_rate_decimator.end(), + std::begin(vpx_config->ts_rate_decimator)); + vpx_config->ts_periodicity = ts_config.ts_periodicity; + std::copy(ts_config.ts_layer_id.begin(), ts_config.ts_layer_id.end(), + std::begin(vpx_config->ts_layer_id)); + } else { + vpx_config->ts_number_layers = 1; + vpx_config->ts_rate_decimator[0] = 1; + vpx_config->ts_periodicity = 1; + vpx_config->ts_layer_id[0] = 0; + } + + if (encoder_config.rc_target_bitrate.has_value()) { + vpx_config->rc_target_bitrate = encoder_config.rc_target_bitrate.value(); + } + + if (encoder_config.rc_max_quantizer.has_value()) { + vpx_config->rc_max_quantizer = encoder_config.rc_max_quantizer.value(); + } + + if (encoder_config.g_error_resilient.has_value()) { + vpx_config->g_error_resilient = encoder_config.g_error_resilient.value(); + } +} + +bool IsCompatibleVideoFrameBufferType(VideoFrameBuffer::Type left, + VideoFrameBuffer::Type right) { + if (left == VideoFrameBuffer::Type::kI420 || + left == VideoFrameBuffer::Type::kI420A) { + // LibvpxVp8Encoder does not care about the alpha channel, I420A and I420 + // are considered compatible. + return right == VideoFrameBuffer::Type::kI420 || + right == VideoFrameBuffer::Type::kI420A; + } + return left == right; +} + +void SetRawImagePlanes(vpx_image_t* raw_image, VideoFrameBuffer* buffer) { + switch (buffer->type()) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI420A: { + const I420BufferInterface* i420_buffer = buffer->GetI420(); + RTC_DCHECK(i420_buffer); + raw_image->planes[VPX_PLANE_Y] = + const_cast<uint8_t*>(i420_buffer->DataY()); + raw_image->planes[VPX_PLANE_U] = + const_cast<uint8_t*>(i420_buffer->DataU()); + raw_image->planes[VPX_PLANE_V] = + const_cast<uint8_t*>(i420_buffer->DataV()); + raw_image->stride[VPX_PLANE_Y] = i420_buffer->StrideY(); + raw_image->stride[VPX_PLANE_U] = i420_buffer->StrideU(); + raw_image->stride[VPX_PLANE_V] = i420_buffer->StrideV(); + break; + } + case VideoFrameBuffer::Type::kNV12: { + const NV12BufferInterface* nv12_buffer = buffer->GetNV12(); + RTC_DCHECK(nv12_buffer); + raw_image->planes[VPX_PLANE_Y] = + const_cast<uint8_t*>(nv12_buffer->DataY()); + raw_image->planes[VPX_PLANE_U] = + const_cast<uint8_t*>(nv12_buffer->DataUV()); + raw_image->planes[VPX_PLANE_V] = raw_image->planes[VPX_PLANE_U] + 1; + raw_image->stride[VPX_PLANE_Y] = nv12_buffer->StrideY(); + raw_image->stride[VPX_PLANE_U] = nv12_buffer->StrideUV(); + raw_image->stride[VPX_PLANE_V] = nv12_buffer->StrideUV(); + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } +} + +} // namespace + +std::unique_ptr<VideoEncoder> VP8Encoder::Create() { + return std::make_unique<LibvpxVp8Encoder>(LibvpxInterface::Create(), + VP8Encoder::Settings()); +} + +std::unique_ptr<VideoEncoder> VP8Encoder::Create( + VP8Encoder::Settings settings) { + return std::make_unique<LibvpxVp8Encoder>(LibvpxInterface::Create(), + std::move(settings)); +} + +vpx_enc_frame_flags_t LibvpxVp8Encoder::EncodeFlags( + const Vp8FrameConfig& references) { + RTC_DCHECK(!references.drop_frame); + + vpx_enc_frame_flags_t flags = 0; + + if ((references.last_buffer_flags & + Vp8FrameConfig::BufferFlags::kReference) == 0) + flags |= VP8_EFLAG_NO_REF_LAST; + if ((references.last_buffer_flags & Vp8FrameConfig::BufferFlags::kUpdate) == + 0) + flags |= VP8_EFLAG_NO_UPD_LAST; + if ((references.golden_buffer_flags & + Vp8FrameConfig::BufferFlags::kReference) == 0) + flags |= VP8_EFLAG_NO_REF_GF; + if ((references.golden_buffer_flags & Vp8FrameConfig::BufferFlags::kUpdate) == + 0) + flags |= VP8_EFLAG_NO_UPD_GF; + if ((references.arf_buffer_flags & Vp8FrameConfig::BufferFlags::kReference) == + 0) + flags |= VP8_EFLAG_NO_REF_ARF; + if ((references.arf_buffer_flags & Vp8FrameConfig::BufferFlags::kUpdate) == 0) + flags |= VP8_EFLAG_NO_UPD_ARF; + if (references.freeze_entropy) + flags |= VP8_EFLAG_NO_UPD_ENTROPY; + + return flags; +} + +LibvpxVp8Encoder::LibvpxVp8Encoder(std::unique_ptr<LibvpxInterface> interface, + VP8Encoder::Settings settings) + : libvpx_(std::move(interface)), + rate_control_settings_(RateControlSettings::ParseFromFieldTrials()), + frame_buffer_controller_factory_( + std::move(settings.frame_buffer_controller_factory)), + resolution_bitrate_limits_(std::move(settings.resolution_bitrate_limits)), + key_frame_request_(kMaxSimulcastStreams, false), + variable_framerate_experiment_(ParseVariableFramerateConfig( + "WebRTC-VP8VariableFramerateScreenshare")), + framerate_controller_(variable_framerate_experiment_.framerate_limit) { + // TODO(eladalon/ilnik): These reservations might be wasting memory. + // InitEncode() is resizing to the actual size, which might be smaller. + raw_images_.reserve(kMaxSimulcastStreams); + encoded_images_.reserve(kMaxSimulcastStreams); + send_stream_.reserve(kMaxSimulcastStreams); + cpu_speed_.assign(kMaxSimulcastStreams, cpu_speed_default_); + encoders_.reserve(kMaxSimulcastStreams); + vpx_configs_.reserve(kMaxSimulcastStreams); + config_overrides_.reserve(kMaxSimulcastStreams); + downsampling_factors_.reserve(kMaxSimulcastStreams); +} + +LibvpxVp8Encoder::~LibvpxVp8Encoder() { + Release(); +} + +int LibvpxVp8Encoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + encoded_images_.clear(); + + if (inited_) { + for (auto it = encoders_.rbegin(); it != encoders_.rend(); ++it) { + if (libvpx_->codec_destroy(&*it)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + } + encoders_.clear(); + + vpx_configs_.clear(); + config_overrides_.clear(); + send_stream_.clear(); + cpu_speed_.clear(); + + for (auto it = raw_images_.rbegin(); it != raw_images_.rend(); ++it) { + libvpx_->img_free(&*it); + } + raw_images_.clear(); + + frame_buffer_controller_.reset(); + inited_ = false; + return ret_val; +} + +void LibvpxVp8Encoder::SetRates(const RateControlParameters& parameters) { + if (!inited_) { + RTC_LOG(LS_WARNING) << "SetRates() while not initialize"; + return; + } + + if (encoders_[0].err) { + RTC_LOG(LS_WARNING) << "Encoder in error state."; + return; + } + + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Unsupported framerate (must be >= 1.0): " + << parameters.framerate_fps; + return; + } + + if (parameters.bitrate.get_sum_bps() == 0) { + // Encoder paused, turn off all encoding. + const int num_streams = static_cast<size_t>(encoders_.size()); + for (int i = 0; i < num_streams; ++i) + SetStreamState(false, i); + return; + } + + codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5); + + if (encoders_.size() > 1) { + // If we have more than 1 stream, reduce the qp_max for the low resolution + // stream if frame rate is not too low. The trade-off with lower qp_max is + // possibly more dropped frames, so we only do this if the frame rate is + // above some threshold (base temporal layer is down to 1/4 for 3 layers). + // We may want to condition this on bitrate later. + if (rate_control_settings_.Vp8BoostBaseLayerQuality() && + parameters.framerate_fps > 20.0) { + vpx_configs_[encoders_.size() - 1].rc_max_quantizer = 45; + } else { + // Go back to default value set in InitEncode. + vpx_configs_[encoders_.size() - 1].rc_max_quantizer = qp_max_; + } + } + + for (size_t i = 0; i < encoders_.size(); ++i) { + const size_t stream_idx = encoders_.size() - 1 - i; + + unsigned int target_bitrate_kbps = + parameters.bitrate.GetSpatialLayerSum(stream_idx) / 1000; + + bool send_stream = target_bitrate_kbps > 0; + if (send_stream || encoders_.size() > 1) + SetStreamState(send_stream, stream_idx); + + vpx_configs_[i].rc_target_bitrate = target_bitrate_kbps; + if (send_stream) { + frame_buffer_controller_->OnRatesUpdated( + stream_idx, parameters.bitrate.GetTemporalLayerAllocation(stream_idx), + static_cast<int>(parameters.framerate_fps + 0.5)); + } + + UpdateVpxConfiguration(stream_idx); + + vpx_codec_err_t err = + libvpx_->codec_enc_config_set(&encoders_[i], &vpx_configs_[i]); + if (err != VPX_CODEC_OK) { + RTC_LOG(LS_WARNING) << "Error configuring codec, error code: " << err + << ", details: " + << libvpx_->codec_error_detail(&encoders_[i]); + } + } +} + +void LibvpxVp8Encoder::OnPacketLossRateUpdate(float packet_loss_rate) { + // TODO(bugs.webrtc.org/10431): Replace condition by DCHECK. + if (frame_buffer_controller_) { + frame_buffer_controller_->OnPacketLossRateUpdate(packet_loss_rate); + } +} + +void LibvpxVp8Encoder::OnRttUpdate(int64_t rtt_ms) { + // TODO(bugs.webrtc.org/10431): Replace condition by DCHECK. + if (frame_buffer_controller_) { + frame_buffer_controller_->OnRttUpdate(rtt_ms); + } +} + +void LibvpxVp8Encoder::OnLossNotification( + const LossNotification& loss_notification) { + if (frame_buffer_controller_) { + frame_buffer_controller_->OnLossNotification(loss_notification); + } +} + +void LibvpxVp8Encoder::SetStreamState(bool send_stream, int stream_idx) { + if (send_stream && !send_stream_[stream_idx]) { + // Need a key frame if we have not sent this stream before. + key_frame_request_[stream_idx] = true; + } + send_stream_[stream_idx] = send_stream; +} + +void LibvpxVp8Encoder::SetFecControllerOverride( + FecControllerOverride* fec_controller_override) { + // TODO(bugs.webrtc.org/10769): Update downstream and remove ability to + // pass nullptr. + // RTC_DCHECK(fec_controller_override); + RTC_DCHECK(!fec_controller_override_); + fec_controller_override_ = fec_controller_override; +} + +// TODO(eladalon): s/inst/codec_settings/g. +int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, + const VideoEncoder::Settings& settings) { + if (inst == NULL) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // allow zero to represent an unspecified maxBitRate + if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (settings.number_of_cores < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + if (absl::optional<ScalabilityMode> scalability_mode = + inst->GetScalabilityMode(); + scalability_mode.has_value() && + !VP8SupportsScalabilityMode(*scalability_mode)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + num_active_streams_ = 0; + for (int i = 0; i < inst->numberOfSimulcastStreams; ++i) { + if (inst->simulcastStream[i].active) { + ++num_active_streams_; + } + } + if (inst->numberOfSimulcastStreams == 0 && inst->active) { + num_active_streams_ = 1; + } + + if (inst->VP8().automaticResizeOn && num_active_streams_ > 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Use the previous pixel format to avoid extra image allocations. + vpx_img_fmt_t pixel_format = + raw_images_.empty() ? VPX_IMG_FMT_I420 : raw_images_[0].fmt; + + int retVal = Release(); + if (retVal < 0) { + return retVal; + } + + int number_of_streams = SimulcastUtility::NumberOfSimulcastStreams(*inst); + if (number_of_streams > 1 && + !SimulcastUtility::ValidSimulcastParameters(*inst, number_of_streams)) { + return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; + } + + RTC_DCHECK(!frame_buffer_controller_); + if (frame_buffer_controller_factory_) { + frame_buffer_controller_ = frame_buffer_controller_factory_->Create( + *inst, settings, fec_controller_override_); + } else { + Vp8TemporalLayersFactory factory; + frame_buffer_controller_ = + factory.Create(*inst, settings, fec_controller_override_); + } + RTC_DCHECK(frame_buffer_controller_); + + number_of_cores_ = settings.number_of_cores; + timestamp_ = 0; + codec_ = *inst; + + // Code expects simulcastStream resolutions to be correct, make sure they are + // filled even when there are no simulcast layers. + if (codec_.numberOfSimulcastStreams == 0) { + codec_.simulcastStream[0].width = codec_.width; + codec_.simulcastStream[0].height = codec_.height; + } + + encoded_images_.resize(number_of_streams); + encoders_.resize(number_of_streams); + vpx_configs_.resize(number_of_streams); + config_overrides_.resize(number_of_streams); + downsampling_factors_.resize(number_of_streams); + raw_images_.resize(number_of_streams); + send_stream_.resize(number_of_streams); + send_stream_[0] = true; // For non-simulcast case. + cpu_speed_.resize(number_of_streams); + std::fill(key_frame_request_.begin(), key_frame_request_.end(), false); + + int idx = number_of_streams - 1; + for (int i = 0; i < (number_of_streams - 1); ++i, --idx) { + int gcd = GCD(inst->simulcastStream[idx].width, + inst->simulcastStream[idx - 1].width); + downsampling_factors_[i].num = inst->simulcastStream[idx].width / gcd; + downsampling_factors_[i].den = inst->simulcastStream[idx - 1].width / gcd; + send_stream_[i] = false; + } + if (number_of_streams > 1) { + send_stream_[number_of_streams - 1] = false; + downsampling_factors_[number_of_streams - 1].num = 1; + downsampling_factors_[number_of_streams - 1].den = 1; + } + + // populate encoder configuration with default values + if (libvpx_->codec_enc_config_default(vpx_codec_vp8_cx(), &vpx_configs_[0], + 0)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + // setting the time base of the codec + vpx_configs_[0].g_timebase.num = 1; + vpx_configs_[0].g_timebase.den = kRtpTicksPerSecond; + vpx_configs_[0].g_lag_in_frames = 0; // 0- no frame lagging + + // Set the error resilience mode for temporal layers (but not simulcast). + vpx_configs_[0].g_error_resilient = + (SimulcastUtility::NumberOfTemporalLayers(*inst, 0) > 1) + ? VPX_ERROR_RESILIENT_DEFAULT + : 0; + + // Override the error resilience mode if this is not simulcast, but we are + // using temporal layers. + if (field_trial::IsEnabled(kVp8ForcePartitionResilience) && + (number_of_streams == 1) && + (SimulcastUtility::NumberOfTemporalLayers(*inst, 0) > 1)) { + RTC_LOG(LS_INFO) << "Overriding g_error_resilient from " + << vpx_configs_[0].g_error_resilient << " to " + << VPX_ERROR_RESILIENT_PARTITIONS; + vpx_configs_[0].g_error_resilient = VPX_ERROR_RESILIENT_PARTITIONS; + } + + // rate control settings + vpx_configs_[0].rc_dropframe_thresh = FrameDropThreshold(0); + vpx_configs_[0].rc_end_usage = VPX_CBR; + vpx_configs_[0].g_pass = VPX_RC_ONE_PASS; + // Handle resizing outside of libvpx. + vpx_configs_[0].rc_resize_allowed = 0; + vpx_configs_[0].rc_min_quantizer = + codec_.mode == VideoCodecMode::kScreensharing ? 12 : 2; + if (inst->qpMax >= vpx_configs_[0].rc_min_quantizer) { + qp_max_ = inst->qpMax; + } + if (rate_control_settings_.LibvpxVp8QpMax()) { + qp_max_ = std::max(rate_control_settings_.LibvpxVp8QpMax().value(), + static_cast<int>(vpx_configs_[0].rc_min_quantizer)); + } + vpx_configs_[0].rc_max_quantizer = qp_max_; + vpx_configs_[0].rc_undershoot_pct = 100; + vpx_configs_[0].rc_overshoot_pct = 15; + vpx_configs_[0].rc_buf_initial_sz = 500; + vpx_configs_[0].rc_buf_optimal_sz = 600; + vpx_configs_[0].rc_buf_sz = 1000; + + // Set the maximum target size of any key-frame. + rc_max_intra_target_ = MaxIntraTarget(vpx_configs_[0].rc_buf_optimal_sz); + + if (inst->VP8().keyFrameInterval > 0) { + vpx_configs_[0].kf_mode = VPX_KF_AUTO; + vpx_configs_[0].kf_max_dist = inst->VP8().keyFrameInterval; + } else { + vpx_configs_[0].kf_mode = VPX_KF_DISABLED; + } + + // Allow the user to set the complexity for the base stream. + switch (inst->GetVideoEncoderComplexity()) { + case VideoCodecComplexity::kComplexityHigh: + cpu_speed_[0] = -5; + break; + case VideoCodecComplexity::kComplexityHigher: + cpu_speed_[0] = -4; + break; + case VideoCodecComplexity::kComplexityMax: + cpu_speed_[0] = -3; + break; + default: + cpu_speed_[0] = -6; + break; + } + cpu_speed_default_ = cpu_speed_[0]; + // Set encoding complexity (cpu_speed) based on resolution and/or platform. + cpu_speed_[0] = GetCpuSpeed(inst->width, inst->height); + for (int i = 1; i < number_of_streams; ++i) { + cpu_speed_[i] = + GetCpuSpeed(inst->simulcastStream[number_of_streams - 1 - i].width, + inst->simulcastStream[number_of_streams - 1 - i].height); + } + vpx_configs_[0].g_w = inst->width; + vpx_configs_[0].g_h = inst->height; + + // Determine number of threads based on the image size and #cores. + // TODO(fbarchard): Consider number of Simulcast layers. + vpx_configs_[0].g_threads = NumberOfThreads( + vpx_configs_[0].g_w, vpx_configs_[0].g_h, settings.number_of_cores); + + // Creating a wrapper to the image - setting image data to NULL. + // Actual pointer will be set in encode. Setting align to 1, as it + // is meaningless (no memory allocation is done here). + libvpx_->img_wrap(&raw_images_[0], pixel_format, inst->width, inst->height, 1, + NULL); + + // Note the order we use is different from webm, we have lowest resolution + // at position 0 and they have highest resolution at position 0. + const size_t stream_idx_cfg_0 = encoders_.size() - 1; + SimulcastRateAllocator init_allocator(codec_); + VideoBitrateAllocation allocation = + init_allocator.Allocate(VideoBitrateAllocationParameters( + inst->startBitrate * 1000, inst->maxFramerate)); + std::vector<uint32_t> stream_bitrates; + for (int i = 0; i == 0 || i < inst->numberOfSimulcastStreams; ++i) { + uint32_t bitrate = allocation.GetSpatialLayerSum(i) / 1000; + stream_bitrates.push_back(bitrate); + } + + vpx_configs_[0].rc_target_bitrate = stream_bitrates[stream_idx_cfg_0]; + if (stream_bitrates[stream_idx_cfg_0] > 0) { + uint32_t maxFramerate = + inst->simulcastStream[stream_idx_cfg_0].maxFramerate; + if (!maxFramerate) { + maxFramerate = inst->maxFramerate; + } + + frame_buffer_controller_->OnRatesUpdated( + stream_idx_cfg_0, + allocation.GetTemporalLayerAllocation(stream_idx_cfg_0), maxFramerate); + } + frame_buffer_controller_->SetQpLimits(stream_idx_cfg_0, + vpx_configs_[0].rc_min_quantizer, + vpx_configs_[0].rc_max_quantizer); + UpdateVpxConfiguration(stream_idx_cfg_0); + vpx_configs_[0].rc_dropframe_thresh = FrameDropThreshold(stream_idx_cfg_0); + + for (size_t i = 1; i < encoders_.size(); ++i) { + const size_t stream_idx = encoders_.size() - 1 - i; + memcpy(&vpx_configs_[i], &vpx_configs_[0], sizeof(vpx_configs_[0])); + + vpx_configs_[i].g_w = inst->simulcastStream[stream_idx].width; + vpx_configs_[i].g_h = inst->simulcastStream[stream_idx].height; + + // Use 1 thread for lower resolutions. + vpx_configs_[i].g_threads = 1; + + vpx_configs_[i].rc_dropframe_thresh = FrameDropThreshold(stream_idx); + + // Setting alignment to 32 - as that ensures at least 16 for all + // planes (32 for Y, 16 for U,V). Libvpx sets the requested stride for + // the y plane, but only half of it to the u and v planes. + libvpx_->img_alloc( + &raw_images_[i], pixel_format, inst->simulcastStream[stream_idx].width, + inst->simulcastStream[stream_idx].height, kVp832ByteAlign); + SetStreamState(stream_bitrates[stream_idx] > 0, stream_idx); + vpx_configs_[i].rc_target_bitrate = stream_bitrates[stream_idx]; + if (stream_bitrates[stream_idx] > 0) { + uint32_t maxFramerate = inst->simulcastStream[stream_idx].maxFramerate; + if (!maxFramerate) { + maxFramerate = inst->maxFramerate; + } + frame_buffer_controller_->OnRatesUpdated( + stream_idx, allocation.GetTemporalLayerAllocation(stream_idx), + maxFramerate); + } + frame_buffer_controller_->SetQpLimits(stream_idx, + vpx_configs_[i].rc_min_quantizer, + vpx_configs_[i].rc_max_quantizer); + UpdateVpxConfiguration(stream_idx); + } + + return InitAndSetControlSettings(); +} + +int LibvpxVp8Encoder::GetCpuSpeed(int width, int height) { +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) || defined(WEBRTC_ARCH_MIPS) + // On mobile platform, use a lower speed setting for lower resolutions for + // CPUs with 4 or more cores. + RTC_DCHECK_GT(number_of_cores_, 0); + if (experimental_cpu_speed_config_arm_ + .GetValue(width * height, number_of_cores_) + .has_value()) { + return experimental_cpu_speed_config_arm_ + .GetValue(width * height, number_of_cores_) + .value(); + } + + if (number_of_cores_ <= 3) + return -12; + + if (width * height <= 352 * 288) + return -8; + else if (width * height <= 640 * 480) + return -10; + else + return -12; +#else + // For non-ARM, increase encoding complexity (i.e., use lower speed setting) + // if resolution is below CIF. Otherwise, keep the default/user setting + // (`cpu_speed_default_`) set on InitEncode via VP8().complexity. + if (width * height < 352 * 288) + return (cpu_speed_default_ < -4) ? -4 : cpu_speed_default_; + else + return cpu_speed_default_; +#endif +} + +int LibvpxVp8Encoder::NumberOfThreads(int width, int height, int cpus) { +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_ARCH_MIPS) + if (width * height >= 320 * 180) { + if (cpus >= 4) { + // 3 threads for CPUs with 4 and more cores since most of times only 4 + // cores will be active. + return 3; + } else if (cpus == 3 || cpus == 2) { + return 2; + } else { + return 1; + } + } + return 1; +#else +#if defined(WEBRTC_IOS) + std::string trial_string = + field_trial::FindFullName(kVP8IosMaxNumberOfThreadFieldTrial); + FieldTrialParameter<int> max_thread_number( + kVP8IosMaxNumberOfThreadFieldTrialParameter, 0); + ParseFieldTrial({&max_thread_number}, trial_string); + if (max_thread_number.Get() > 0) { + if (width * height < 320 * 180) { + return 1; // Use single thread for small screens + } + // thread number must be less than or equal to the number of CPUs. + return std::min(cpus, max_thread_number.Get()); + } +#endif // defined(WEBRTC_IOS) + if (width * height >= 1920 * 1080 && cpus > 8) { + return 8; // 8 threads for 1080p on high perf machines. + } else if (width * height > 1280 * 960 && cpus >= 6) { + // 3 threads for 1080p. + return 3; + } else if (width * height > 640 * 480 && cpus >= 3) { + // Default 2 threads for qHD/HD, but allow 3 if core count is high enough, + // as this will allow more margin for high-core/low clock machines or if + // not built with highest optimization. + if (cpus >= 6) { + return 3; + } + return 2; + } else { + // 1 thread for VGA or less. + return 1; + } +#endif +} + +int LibvpxVp8Encoder::InitAndSetControlSettings() { + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + + if (encoders_.size() > 1) { + int error = libvpx_->codec_enc_init_multi( + &encoders_[0], vpx_codec_vp8_cx(), &vpx_configs_[0], encoders_.size(), + flags, &downsampling_factors_[0]); + if (error) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + } else { + if (libvpx_->codec_enc_init(&encoders_[0], vpx_codec_vp8_cx(), + &vpx_configs_[0], flags)) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + } + // Enable denoising for the highest resolution stream, and for + // the second highest resolution if we are doing more than 2 + // spatial layers/streams. + // TODO(holmer): Investigate possibility of adding a libvpx API + // for getting the denoised frame from the encoder and using that + // when encoding lower resolution streams. Would it work with the + // multi-res encoding feature? + denoiserState denoiser_state = kDenoiserOnYOnly; +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) || defined(WEBRTC_ARCH_MIPS) + denoiser_state = kDenoiserOnYOnly; +#else + denoiser_state = kDenoiserOnAdaptive; +#endif + libvpx_->codec_control( + &encoders_[0], VP8E_SET_NOISE_SENSITIVITY, + codec_.VP8()->denoisingOn ? denoiser_state : kDenoiserOff); + if (encoders_.size() > 2) { + libvpx_->codec_control( + &encoders_[1], VP8E_SET_NOISE_SENSITIVITY, + codec_.VP8()->denoisingOn ? denoiser_state : kDenoiserOff); + } + for (size_t i = 0; i < encoders_.size(); ++i) { + // Allow more screen content to be detected as static. + libvpx_->codec_control( + &(encoders_[i]), VP8E_SET_STATIC_THRESHOLD, + codec_.mode == VideoCodecMode::kScreensharing ? 100u : 1u); + libvpx_->codec_control(&(encoders_[i]), VP8E_SET_CPUUSED, cpu_speed_[i]); + libvpx_->codec_control( + &(encoders_[i]), VP8E_SET_TOKEN_PARTITIONS, + static_cast<vp8e_token_partitions>(kTokenPartitions)); + libvpx_->codec_control(&(encoders_[i]), VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + // VP8E_SET_SCREEN_CONTENT_MODE 2 = screen content with more aggressive + // rate control (drop frames on large target bitrate overshoot) + libvpx_->codec_control( + &(encoders_[i]), VP8E_SET_SCREEN_CONTENT_MODE, + codec_.mode == VideoCodecMode::kScreensharing ? 2u : 0u); + } + inited_ = true; + return WEBRTC_VIDEO_CODEC_OK; +} + +uint32_t LibvpxVp8Encoder::MaxIntraTarget(uint32_t optimalBuffersize) { + // Set max to the optimal buffer level (normalized by target BR), + // and scaled by a scalePar. + // Max target size = scalePar * optimalBufferSize * targetBR[Kbps]. + // This values is presented in percentage of perFrameBw: + // perFrameBw = targetBR[Kbps] * 1000 / frameRate. + // The target in % is as follows: + + float scalePar = 0.5; + uint32_t targetPct = optimalBuffersize * scalePar * codec_.maxFramerate / 10; + + // Don't go below 3 times the per frame bandwidth. + const uint32_t minIntraTh = 300; + return (targetPct < minIntraTh) ? minIntraTh : targetPct; +} + +uint32_t LibvpxVp8Encoder::FrameDropThreshold(size_t spatial_idx) const { + if (!codec_.GetFrameDropEnabled()) { + return 0; + } + + // If temporal layers are used, they get to override the frame dropping + // setting, as eg. ScreenshareLayers does not work as intended with frame + // dropping on and DefaultTemporalLayers will have performance issues with + // frame dropping off. + RTC_DCHECK(frame_buffer_controller_); + RTC_DCHECK_LT(spatial_idx, frame_buffer_controller_->StreamCount()); + return frame_buffer_controller_->SupportsEncoderFrameDropping(spatial_idx) + ? 30 + : 0; +} + +size_t LibvpxVp8Encoder::SteadyStateSize(int sid, int tid) { + const int encoder_id = encoders_.size() - 1 - sid; + size_t bitrate_bps; + float fps; + if ((SimulcastUtility::IsConferenceModeScreenshare(codec_) && sid == 0) || + vpx_configs_[encoder_id].ts_number_layers <= 1) { + // In conference screenshare there's no defined per temporal layer bitrate + // and framerate. + bitrate_bps = vpx_configs_[encoder_id].rc_target_bitrate * 1000; + fps = codec_.maxFramerate; + } else { + bitrate_bps = vpx_configs_[encoder_id].ts_target_bitrate[tid] * 1000; + fps = codec_.maxFramerate / + fmax(vpx_configs_[encoder_id].ts_rate_decimator[tid], 1.0); + if (tid > 0) { + // Layer bitrate and fps are counted as a partial sums. + bitrate_bps -= vpx_configs_[encoder_id].ts_target_bitrate[tid - 1] * 1000; + fps = codec_.maxFramerate / + fmax(vpx_configs_[encoder_id].ts_rate_decimator[tid - 1], 1.0); + } + } + + if (fps < 1e-9) + return 0; + return static_cast<size_t>( + bitrate_bps / (8 * fps) * + (100 - + variable_framerate_experiment_.steady_state_undershoot_percentage) / + 100 + + 0.5); +} + +bool LibvpxVp8Encoder::UpdateVpxConfiguration(size_t stream_index) { + RTC_DCHECK(frame_buffer_controller_); + + const size_t config_index = vpx_configs_.size() - 1 - stream_index; + + RTC_DCHECK_LT(config_index, config_overrides_.size()); + Vp8EncoderConfig* config = &config_overrides_[config_index]; + + const Vp8EncoderConfig new_config = + frame_buffer_controller_->UpdateConfiguration(stream_index); + + if (new_config.reset_previous_configuration_overrides) { + *config = new_config; + return true; + } + + const bool changes_made = MaybeExtendVp8EncoderConfig(new_config, config); + + // Note that overrides must be applied even if they haven't changed. + RTC_DCHECK_LT(config_index, vpx_configs_.size()); + vpx_codec_enc_cfg_t* vpx_config = &vpx_configs_[config_index]; + ApplyVp8EncoderConfigToVpxConfig(*config, vpx_config); + + return changes_made; +} + +int LibvpxVp8Encoder::Encode(const VideoFrame& frame, + const std::vector<VideoFrameType>* frame_types) { + RTC_DCHECK_EQ(frame.width(), codec_.width); + RTC_DCHECK_EQ(frame.height(), codec_.height); + + if (!inited_) + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + if (encoded_complete_callback_ == NULL) + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + + bool key_frame_requested = false; + for (size_t i = 0; i < key_frame_request_.size() && i < send_stream_.size(); + ++i) { + if (key_frame_request_[i] && send_stream_[i]) { + key_frame_requested = true; + break; + } + } + if (!key_frame_requested && frame_types) { + for (size_t i = 0; i < frame_types->size() && i < send_stream_.size(); + ++i) { + if ((*frame_types)[i] == VideoFrameType::kVideoFrameKey && + send_stream_[i]) { + key_frame_requested = true; + break; + } + } + } + + if (frame.update_rect().IsEmpty() && num_steady_state_frames_ >= 3 && + !key_frame_requested) { + if (variable_framerate_experiment_.enabled && + framerate_controller_.DropFrame(frame.timestamp() / kRtpTicksPerMs)) { + return WEBRTC_VIDEO_CODEC_OK; + } + framerate_controller_.AddFrame(frame.timestamp() / kRtpTicksPerMs); + } + + bool send_key_frame = key_frame_requested; + bool drop_frame = false; + bool retransmission_allowed = true; + Vp8FrameConfig tl_configs[kMaxSimulcastStreams]; + for (size_t i = 0; i < encoders_.size(); ++i) { + tl_configs[i] = + frame_buffer_controller_->NextFrameConfig(i, frame.timestamp()); + send_key_frame |= tl_configs[i].IntraFrame(); + drop_frame |= tl_configs[i].drop_frame; + RTC_DCHECK(i == 0 || + retransmission_allowed == tl_configs[i].retransmission_allowed); + retransmission_allowed = tl_configs[i].retransmission_allowed; + } + + if (drop_frame && !send_key_frame) { + return WEBRTC_VIDEO_CODEC_OK; + } + + vpx_enc_frame_flags_t flags[kMaxSimulcastStreams]; + for (size_t i = 0; i < encoders_.size(); ++i) { + flags[i] = send_key_frame ? VPX_EFLAG_FORCE_KF : EncodeFlags(tl_configs[i]); + } + + // Scale and map buffers and set `raw_images_` to hold pointers to the result. + // Because `raw_images_` are set to hold pointers to the prepared buffers, we + // need to keep these buffers alive through reference counting until after + // encoding is complete. + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers = + PrepareBuffers(frame.video_frame_buffer()); + if (prepared_buffers.empty()) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + struct CleanUpOnExit { + explicit CleanUpOnExit( + vpx_image_t* raw_image, + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers) + : raw_image_(raw_image), + prepared_buffers_(std::move(prepared_buffers)) {} + ~CleanUpOnExit() { + raw_image_->planes[VPX_PLANE_Y] = nullptr; + raw_image_->planes[VPX_PLANE_U] = nullptr; + raw_image_->planes[VPX_PLANE_V] = nullptr; + } + vpx_image_t* raw_image_; + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers_; + } clean_up_on_exit(&raw_images_[0], std::move(prepared_buffers)); + + if (send_key_frame) { + // Adapt the size of the key frame when in screenshare with 1 temporal + // layer. + if (encoders_.size() == 1 && + codec_.mode == VideoCodecMode::kScreensharing && + codec_.VP8()->numberOfTemporalLayers <= 1) { + const uint32_t forceKeyFrameIntraTh = 100; + libvpx_->codec_control(&(encoders_[0]), VP8E_SET_MAX_INTRA_BITRATE_PCT, + forceKeyFrameIntraTh); + } + + std::fill(key_frame_request_.begin(), key_frame_request_.end(), false); + } + + // Set the encoder frame flags and temporal layer_id for each spatial stream. + // Note that streams are defined starting from lowest resolution at + // position 0 to highest resolution at position |encoders_.size() - 1|, + // whereas `encoder_` is from highest to lowest resolution. + for (size_t i = 0; i < encoders_.size(); ++i) { + const size_t stream_idx = encoders_.size() - 1 - i; + + if (UpdateVpxConfiguration(stream_idx)) { + if (libvpx_->codec_enc_config_set(&encoders_[i], &vpx_configs_[i])) + return WEBRTC_VIDEO_CODEC_ERROR; + } + + libvpx_->codec_control(&encoders_[i], VP8E_SET_FRAME_FLAGS, + static_cast<int>(flags[stream_idx])); + libvpx_->codec_control(&encoders_[i], VP8E_SET_TEMPORAL_LAYER_ID, + tl_configs[i].encoder_layer_id); + } + // TODO(holmer): Ideally the duration should be the timestamp diff of this + // frame and the next frame to be encoded, which we don't have. Instead we + // would like to use the duration of the previous frame. Unfortunately the + // rate control seems to be off with that setup. Using the average input + // frame rate to calculate an average duration for now. + RTC_DCHECK_GT(codec_.maxFramerate, 0); + uint32_t duration = kRtpTicksPerSecond / codec_.maxFramerate; + + int error = WEBRTC_VIDEO_CODEC_OK; + int num_tries = 0; + // If the first try returns WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT + // the frame must be reencoded with the same parameters again because + // target bitrate is exceeded and encoder state has been reset. + while (num_tries == 0 || + (num_tries == 1 && + error == WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT)) { + ++num_tries; + // Note we must pass 0 for `flags` field in encode call below since they are + // set above in `libvpx_interface_->vpx_codec_control_` function for each + // encoder/spatial layer. + error = libvpx_->codec_encode(&encoders_[0], &raw_images_[0], timestamp_, + duration, 0, VPX_DL_REALTIME); + // Reset specific intra frame thresholds, following the key frame. + if (send_key_frame) { + libvpx_->codec_control(&(encoders_[0]), VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + } + if (error) + return WEBRTC_VIDEO_CODEC_ERROR; + // Examines frame timestamps only. + error = GetEncodedPartitions(frame, retransmission_allowed); + } + // TODO(sprang): Shouldn't we use the frame timestamp instead? + timestamp_ += duration; + return error; +} + +void LibvpxVp8Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + const vpx_codec_cx_pkt_t& pkt, + int stream_idx, + int encoder_idx, + uint32_t timestamp) { + RTC_DCHECK(codec_specific); + codec_specific->codecType = kVideoCodecVP8; + codec_specific->codecSpecific.VP8.keyIdx = + kNoKeyIdx; // TODO(hlundin) populate this + codec_specific->codecSpecific.VP8.nonReference = + (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0; + + int qp = 0; + vpx_codec_control(&encoders_[encoder_idx], VP8E_GET_LAST_QUANTIZER_64, &qp); + bool is_keyframe = (pkt.data.frame.flags & VPX_FRAME_IS_KEY) != 0; + frame_buffer_controller_->OnEncodeDone(stream_idx, timestamp, + encoded_images_[encoder_idx].size(), + is_keyframe, qp, codec_specific); + if (is_keyframe && codec_specific->template_structure != absl::nullopt) { + // Number of resolutions must match number of spatial layers, VP8 structures + // expected to use single spatial layer. Templates must be ordered by + // spatial_id, so assumption there is exactly one spatial layer is same as + // assumption last template uses spatial_id = 0. + // This check catches potential scenario where template_structure is shared + // across multiple vp8 streams and they are distinguished using spatial_id. + // Assigning single resolution doesn't support such scenario, i.e. assumes + // vp8 simulcast is sent using multiple ssrcs. + RTC_DCHECK(!codec_specific->template_structure->templates.empty()); + RTC_DCHECK_EQ( + codec_specific->template_structure->templates.back().spatial_id, 0); + codec_specific->template_structure->resolutions = { + RenderResolution(pkt.data.frame.width[0], pkt.data.frame.height[0])}; + } +} + +int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image, + bool retransmission_allowed) { + int stream_idx = static_cast<int>(encoders_.size()) - 1; + int result = WEBRTC_VIDEO_CODEC_OK; + for (size_t encoder_idx = 0; encoder_idx < encoders_.size(); + ++encoder_idx, --stream_idx) { + vpx_codec_iter_t iter = NULL; + encoded_images_[encoder_idx].set_size(0); + encoded_images_[encoder_idx]._frameType = VideoFrameType::kVideoFrameDelta; + CodecSpecificInfo codec_specific; + const vpx_codec_cx_pkt_t* pkt = NULL; + + size_t encoded_size = 0; + while ((pkt = libvpx_->codec_get_cx_data(&encoders_[encoder_idx], &iter)) != + NULL) { + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + encoded_size += pkt->data.frame.sz; + } + } + + auto buffer = EncodedImageBuffer::Create(encoded_size); + + iter = NULL; + size_t encoded_pos = 0; + while ((pkt = libvpx_->codec_get_cx_data(&encoders_[encoder_idx], &iter)) != + NULL) { + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + RTC_CHECK_LE(encoded_pos + pkt->data.frame.sz, buffer->size()); + memcpy(&buffer->data()[encoded_pos], pkt->data.frame.buf, + pkt->data.frame.sz); + encoded_pos += pkt->data.frame.sz; + break; + } + default: + break; + } + // End of frame + if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { + // check if encoded frame is a key frame + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + encoded_images_[encoder_idx]._frameType = + VideoFrameType::kVideoFrameKey; + } + encoded_images_[encoder_idx].SetEncodedData(buffer); + encoded_images_[encoder_idx].set_size(encoded_pos); + encoded_images_[encoder_idx].SetSpatialIndex(stream_idx); + PopulateCodecSpecific(&codec_specific, *pkt, stream_idx, encoder_idx, + input_image.timestamp()); + if (codec_specific.codecSpecific.VP8.temporalIdx != kNoTemporalIdx) { + encoded_images_[encoder_idx].SetTemporalIndex( + codec_specific.codecSpecific.VP8.temporalIdx); + } + break; + } + } + encoded_images_[encoder_idx].SetTimestamp(input_image.timestamp()); + encoded_images_[encoder_idx].SetColorSpace(input_image.color_space()); + encoded_images_[encoder_idx].SetRetransmissionAllowed( + retransmission_allowed); + + if (send_stream_[stream_idx]) { + if (encoded_images_[encoder_idx].size() > 0) { + TRACE_COUNTER_ID1("webrtc", "EncodedFrameSize", encoder_idx, + encoded_images_[encoder_idx].size()); + encoded_images_[encoder_idx]._encodedHeight = + codec_.simulcastStream[stream_idx].height; + encoded_images_[encoder_idx]._encodedWidth = + codec_.simulcastStream[stream_idx].width; + int qp_128 = -1; + libvpx_->codec_control(&encoders_[encoder_idx], VP8E_GET_LAST_QUANTIZER, + &qp_128); + encoded_images_[encoder_idx].qp_ = qp_128; + encoded_images_[encoder_idx].SetAtTargetQuality( + qp_128 <= variable_framerate_experiment_.steady_state_qp); + encoded_complete_callback_->OnEncodedImage(encoded_images_[encoder_idx], + &codec_specific); + const size_t steady_state_size = SteadyStateSize( + stream_idx, codec_specific.codecSpecific.VP8.temporalIdx); + if (qp_128 > variable_framerate_experiment_.steady_state_qp || + encoded_images_[encoder_idx].size() > steady_state_size) { + num_steady_state_frames_ = 0; + } else { + ++num_steady_state_frames_; + } + } else if (!frame_buffer_controller_->SupportsEncoderFrameDropping( + stream_idx)) { + result = WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT; + if (encoded_images_[encoder_idx].size() == 0) { + // Dropped frame that will be re-encoded. + frame_buffer_controller_->OnFrameDropped(stream_idx, + input_image.timestamp()); + } + } + } + } + return result; +} + +VideoEncoder::EncoderInfo LibvpxVp8Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "libvpx"; + info.has_trusted_rate_controller = + rate_control_settings_.LibvpxVp8TrustedRateController(); + info.is_hardware_accelerated = false; + info.supports_simulcast = true; + if (!resolution_bitrate_limits_.empty()) { + info.resolution_bitrate_limits = resolution_bitrate_limits_; + } + if (encoder_info_override_.requested_resolution_alignment()) { + info.requested_resolution_alignment = + *encoder_info_override_.requested_resolution_alignment(); + info.apply_alignment_to_all_simulcast_layers = + encoder_info_override_.apply_alignment_to_all_simulcast_layers(); + } + if (!encoder_info_override_.resolution_bitrate_limits().empty()) { + info.resolution_bitrate_limits = + encoder_info_override_.resolution_bitrate_limits(); + } + + const bool enable_scaling = + num_active_streams_ == 1 && + (vpx_configs_.empty() || vpx_configs_[0].rc_dropframe_thresh > 0) && + codec_.VP8().automaticResizeOn; + + info.scaling_settings = enable_scaling + ? VideoEncoder::ScalingSettings( + kLowVp8QpThreshold, kHighVp8QpThreshold) + : VideoEncoder::ScalingSettings::kOff; + if (rate_control_settings_.LibvpxVp8MinPixels()) { + info.scaling_settings.min_pixels_per_frame = + rate_control_settings_.LibvpxVp8MinPixels().value(); + } + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + + if (inited_) { + // `encoder_idx` is libvpx index where 0 is highest resolution. + // `si` is simulcast index, where 0 is lowest resolution. + for (size_t si = 0, encoder_idx = encoders_.size() - 1; + si < encoders_.size(); ++si, --encoder_idx) { + info.fps_allocation[si].clear(); + if ((codec_.numberOfSimulcastStreams > si && + !codec_.simulcastStream[si].active) || + (si == 0 && SimulcastUtility::IsConferenceModeScreenshare(codec_))) { + // No defined frame rate fractions if not active or if using + // ScreenshareLayers, leave vector empty and continue; + continue; + } + if (vpx_configs_[encoder_idx].ts_number_layers <= 1) { + info.fps_allocation[si].push_back(EncoderInfo::kMaxFramerateFraction); + } else { + for (size_t ti = 0; ti < vpx_configs_[encoder_idx].ts_number_layers; + ++ti) { + RTC_DCHECK_GT(vpx_configs_[encoder_idx].ts_rate_decimator[ti], 0); + info.fps_allocation[si].push_back(rtc::saturated_cast<uint8_t>( + EncoderInfo::kMaxFramerateFraction / + vpx_configs_[encoder_idx].ts_rate_decimator[ti] + + 0.5)); + } + } + } + } + + return info; +} + +int LibvpxVp8Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +void LibvpxVp8Encoder::MaybeUpdatePixelFormat(vpx_img_fmt fmt) { + RTC_DCHECK(!raw_images_.empty()); + if (raw_images_[0].fmt == fmt) { + RTC_DCHECK(std::all_of( + std::next(raw_images_.begin()), raw_images_.end(), + [fmt](const vpx_image_t& raw_img) { return raw_img.fmt == fmt; })) + << "Not all raw images had the right format!"; + return; + } + RTC_LOG(LS_INFO) << "Updating vp8 encoder pixel format to " + << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420"); + for (size_t i = 0; i < raw_images_.size(); ++i) { + vpx_image_t& img = raw_images_[i]; + auto d_w = img.d_w; + auto d_h = img.d_h; + libvpx_->img_free(&img); + // First image is wrapping the input frame, the rest are allocated. + if (i == 0) { + libvpx_->img_wrap(&img, fmt, d_w, d_h, 1, NULL); + } else { + libvpx_->img_alloc(&img, fmt, d_w, d_h, kVp832ByteAlign); + } + } +} + +std::vector<rtc::scoped_refptr<VideoFrameBuffer>> +LibvpxVp8Encoder::PrepareBuffers(rtc::scoped_refptr<VideoFrameBuffer> buffer) { + RTC_DCHECK_EQ(buffer->width(), raw_images_[0].d_w); + RTC_DCHECK_EQ(buffer->height(), raw_images_[0].d_h); + absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> + supported_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + + rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer; + if (buffer->type() != VideoFrameBuffer::Type::kNative) { + // `buffer` is already mapped. + mapped_buffer = buffer; + } else { + // Attempt to map to one of the supported formats. + mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats); + } + if (!mapped_buffer || + (absl::c_find(supported_formats, mapped_buffer->type()) == + supported_formats.end() && + mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) { + // Unknown pixel format or unable to map, convert to I420 and prepare that + // buffer instead to ensure Scale() is safe to use. + auto converted_buffer = buffer->ToI420(); + if (!converted_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString(buffer->type()) + << " image to I420. Can't encode frame."; + return {}; + } + RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 || + converted_buffer->type() == VideoFrameBuffer::Type::kI420A); + + // Because `buffer` had to be converted, use `converted_buffer` instead... + buffer = mapped_buffer = converted_buffer; + } + + // Maybe update pixel format. + absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> + mapped_type = {mapped_buffer->type()}; + switch (mapped_buffer->type()) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI420A: + MaybeUpdatePixelFormat(VPX_IMG_FMT_I420); + break; + case VideoFrameBuffer::Type::kNV12: + MaybeUpdatePixelFormat(VPX_IMG_FMT_NV12); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + // Prepare `raw_images_` from `mapped_buffer` and, if simulcast, scaled + // versions of `buffer`. + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers; + SetRawImagePlanes(&raw_images_[0], mapped_buffer.get()); + prepared_buffers.push_back(mapped_buffer); + for (size_t i = 1; i < encoders_.size(); ++i) { + // Native buffers should implement optimized scaling and is the preferred + // buffer to scale. But if the buffer isn't native, it should be cheaper to + // scale from the previously prepared buffer which is smaller than `buffer`. + VideoFrameBuffer* buffer_to_scale = + buffer->type() == VideoFrameBuffer::Type::kNative + ? buffer.get() + : prepared_buffers.back().get(); + + auto scaled_buffer = + buffer_to_scale->Scale(raw_images_[i].d_w, raw_images_[i].d_h); + if (scaled_buffer->type() == VideoFrameBuffer::Type::kNative) { + auto mapped_scaled_buffer = + scaled_buffer->GetMappedFrameBuffer(mapped_type); + RTC_DCHECK(mapped_scaled_buffer) << "Unable to map the scaled buffer."; + if (!mapped_scaled_buffer) { + RTC_LOG(LS_ERROR) << "Failed to map scaled " + << VideoFrameBufferTypeToString(scaled_buffer->type()) + << " image to " + << VideoFrameBufferTypeToString(mapped_buffer->type()) + << ". Can't encode frame."; + return {}; + } + scaled_buffer = mapped_scaled_buffer; + } + if (!IsCompatibleVideoFrameBufferType(scaled_buffer->type(), + mapped_buffer->type())) { + RTC_LOG(LS_ERROR) << "When scaling " + << VideoFrameBufferTypeToString(buffer_to_scale->type()) + << ", the image was unexpectedly converted to " + << VideoFrameBufferTypeToString(scaled_buffer->type()) + << " instead of " + << VideoFrameBufferTypeToString(mapped_buffer->type()) + << ". Can't encode frame."; + RTC_DCHECK_NOTREACHED() + << "Scaled buffer type " + << VideoFrameBufferTypeToString(scaled_buffer->type()) + << " is not compatible with mapped buffer type " + << VideoFrameBufferTypeToString(mapped_buffer->type()); + return {}; + } + SetRawImagePlanes(&raw_images_[i], scaled_buffer.get()); + prepared_buffers.push_back(scaled_buffer); + } + return prepared_buffers; +} + +// static +LibvpxVp8Encoder::VariableFramerateExperiment +LibvpxVp8Encoder::ParseVariableFramerateConfig(std::string group_name) { + FieldTrialFlag disabled = FieldTrialFlag("Disabled"); + FieldTrialParameter<double> framerate_limit("min_fps", 5.0); + FieldTrialParameter<int> qp("min_qp", 15); + FieldTrialParameter<int> undershoot_percentage("undershoot", 30); + ParseFieldTrial({&disabled, &framerate_limit, &qp, &undershoot_percentage}, + field_trial::FindFullName(group_name)); + VariableFramerateExperiment config; + config.enabled = !disabled.Get(); + config.framerate_limit = framerate_limit.Get(); + config.steady_state_qp = qp.Get(); + config.steady_state_undershoot_percentage = undershoot_percentage.Get(); + + return config; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h new file mode 100644 index 0000000000..643758753d --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_LIBVPX_VP8_ENCODER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_LIBVPX_VP8_ENCODER_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "api/fec_controller_override.h" +#include "api/video/encoded_image.h" +#include "api/video/video_frame.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp8_frame_buffer_controller.h" +#include "api/video_codecs/vp8_frame_config.h" +#include "modules/video_coding/codecs/interface/libvpx_interface.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/utility/framerate_controller_deprecated.h" +#include "rtc_base/experiments/cpu_speed_experiment.h" +#include "rtc_base/experiments/encoder_info_settings.h" +#include "rtc_base/experiments/rate_control_settings.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +namespace webrtc { + +class LibvpxVp8Encoder : public VideoEncoder { + public: + LibvpxVp8Encoder(std::unique_ptr<LibvpxInterface> interface, + VP8Encoder::Settings settings); + ~LibvpxVp8Encoder() override; + + int Release() override; + + void SetFecControllerOverride( + FecControllerOverride* fec_controller_override) override; + + int InitEncode(const VideoCodec* codec_settings, + const VideoEncoder::Settings& settings) override; + + int Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) override; + + int RegisterEncodeCompleteCallback(EncodedImageCallback* callback) override; + + void SetRates(const RateControlParameters& parameters) override; + + void OnPacketLossRateUpdate(float packet_loss_rate) override; + + void OnRttUpdate(int64_t rtt_ms) override; + + void OnLossNotification(const LossNotification& loss_notification) override; + + EncoderInfo GetEncoderInfo() const override; + + static vpx_enc_frame_flags_t EncodeFlags(const Vp8FrameConfig& references); + + private: + // Get the cpu_speed setting for encoder based on resolution and/or platform. + int GetCpuSpeed(int width, int height); + + // Determine number of encoder threads to use. + int NumberOfThreads(int width, int height, int number_of_cores); + + // Call encoder initialize function and set control settings. + int InitAndSetControlSettings(); + + void PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + const vpx_codec_cx_pkt& pkt, + int stream_idx, + int encoder_idx, + uint32_t timestamp); + + int GetEncodedPartitions(const VideoFrame& input_image, + bool retransmission_allowed); + + // Set the stream state for stream `stream_idx`. + void SetStreamState(bool send_stream, int stream_idx); + + uint32_t MaxIntraTarget(uint32_t optimal_buffer_size); + + uint32_t FrameDropThreshold(size_t spatial_idx) const; + + size_t SteadyStateSize(int sid, int tid); + + bool UpdateVpxConfiguration(size_t stream_index); + + void MaybeUpdatePixelFormat(vpx_img_fmt fmt); + // Prepares `raw_image_` to reference image data of `buffer`, or of mapped or + // scaled versions of `buffer`. Returns a list of buffers that got referenced + // as a result, allowing the caller to keep references to them until after + // encoding has finished. On failure to convert the buffer, an empty list is + // returned. + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> PrepareBuffers( + rtc::scoped_refptr<VideoFrameBuffer> buffer); + + const std::unique_ptr<LibvpxInterface> libvpx_; + + const CpuSpeedExperiment experimental_cpu_speed_config_arm_; + const RateControlSettings rate_control_settings_; + + EncodedImageCallback* encoded_complete_callback_ = nullptr; + VideoCodec codec_; + bool inited_ = false; + int64_t timestamp_ = 0; + int qp_max_ = 56; + int cpu_speed_default_ = -6; + int number_of_cores_ = 0; + uint32_t rc_max_intra_target_ = 0; + int num_active_streams_ = 0; + const std::unique_ptr<Vp8FrameBufferControllerFactory> + frame_buffer_controller_factory_; + std::unique_ptr<Vp8FrameBufferController> frame_buffer_controller_; + const std::vector<VideoEncoder::ResolutionBitrateLimits> + resolution_bitrate_limits_; + std::vector<bool> key_frame_request_; + std::vector<bool> send_stream_; + std::vector<int> cpu_speed_; + std::vector<vpx_image_t> raw_images_; + std::vector<EncodedImage> encoded_images_; + std::vector<vpx_codec_ctx_t> encoders_; + std::vector<vpx_codec_enc_cfg_t> vpx_configs_; + std::vector<Vp8EncoderConfig> config_overrides_; + std::vector<vpx_rational_t> downsampling_factors_; + + // Variable frame-rate screencast related fields and methods. + const struct VariableFramerateExperiment { + bool enabled = false; + // Framerate is limited to this value in steady state. + float framerate_limit = 5.0; + // This qp or below is considered a steady state. + int steady_state_qp = 15; + // Frames of at least this percentage below ideal for configured bitrate are + // considered in a steady state. + int steady_state_undershoot_percentage = 30; + } variable_framerate_experiment_; + static VariableFramerateExperiment ParseVariableFramerateConfig( + std::string group_name); + FramerateControllerDeprecated framerate_controller_; + int num_steady_state_frames_ = 0; + + FecControllerOverride* fec_controller_override_ = nullptr; + + const LibvpxVp8EncoderInfoSettings encoder_info_override_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_LIBVPX_VP8_ENCODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_simulcast_test.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_simulcast_test.cc new file mode 100644 index 0000000000..67c9110b3c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/libvpx_vp8_simulcast_test.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "api/test/create_simulcast_test_fixture.h" +#include "api/test/simulcast_test_fixture.h" +#include "api/test/video/function_video_decoder_factory.h" +#include "api/test/video/function_video_encoder_factory.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { +std::unique_ptr<SimulcastTestFixture> CreateSpecificSimulcastTestFixture() { + std::unique_ptr<VideoEncoderFactory> encoder_factory = + std::make_unique<FunctionVideoEncoderFactory>( + []() { return VP8Encoder::Create(); }); + std::unique_ptr<VideoDecoderFactory> decoder_factory = + std::make_unique<FunctionVideoDecoderFactory>( + []() { return VP8Decoder::Create(); }); + return CreateSimulcastTestFixture(std::move(encoder_factory), + std::move(decoder_factory), + SdpVideoFormat("VP8")); +} +} // namespace + +TEST(LibvpxVp8SimulcastTest, TestKeyFrameRequestsOnAllStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestKeyFrameRequestsOnAllStreams(); +} + +TEST(LibvpxVp8SimulcastTest, TestPaddingAllStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingAllStreams(); +} + +TEST(LibvpxVp8SimulcastTest, TestPaddingTwoStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingTwoStreams(); +} + +TEST(LibvpxVp8SimulcastTest, TestPaddingTwoStreamsOneMaxedOut) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingTwoStreamsOneMaxedOut(); +} + +TEST(LibvpxVp8SimulcastTest, TestPaddingOneStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingOneStream(); +} + +TEST(LibvpxVp8SimulcastTest, TestPaddingOneStreamTwoMaxedOut) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestPaddingOneStreamTwoMaxedOut(); +} + +TEST(LibvpxVp8SimulcastTest, TestSendAllStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSendAllStreams(); +} + +TEST(LibvpxVp8SimulcastTest, TestDisablingStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestDisablingStreams(); +} + +TEST(LibvpxVp8SimulcastTest, TestActiveStreams) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestActiveStreams(); +} + +TEST(LibvpxVp8SimulcastTest, TestSwitchingToOneStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSwitchingToOneStream(); +} + +TEST(LibvpxVp8SimulcastTest, TestSwitchingToOneOddStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSwitchingToOneOddStream(); +} + +TEST(LibvpxVp8SimulcastTest, TestSwitchingToOneSmallStream) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSwitchingToOneSmallStream(); +} + +TEST(LibvpxVp8SimulcastTest, TestSpatioTemporalLayers333PatternEncoder) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestSpatioTemporalLayers333PatternEncoder(); +} + +TEST(LibvpxVp8SimulcastTest, TestStrideEncodeDecode) { + auto fixture = CreateSpecificSimulcastTestFixture(); + fixture->TestStrideEncodeDecode(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc new file mode 100644 index 0000000000..71db0b22c2 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc @@ -0,0 +1,624 @@ +/* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/screenshare_layers.h" + +#include <stdlib.h> + +#include <algorithm> +#include <memory> + +#include "modules/video_coding/include/video_codec_interface.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { +using BufferFlags = Vp8FrameConfig::BufferFlags; + +constexpr BufferFlags kNone = Vp8FrameConfig::BufferFlags::kNone; +constexpr BufferFlags kReference = Vp8FrameConfig::BufferFlags::kReference; +constexpr BufferFlags kUpdate = Vp8FrameConfig::BufferFlags::kUpdate; +constexpr BufferFlags kReferenceAndUpdate = + Vp8FrameConfig::BufferFlags::kReferenceAndUpdate; + +constexpr int kOneSecond90Khz = 90000; +constexpr int kMinTimeBetweenSyncs = kOneSecond90Khz * 2; +constexpr int kMaxTimeBetweenSyncs = kOneSecond90Khz * 4; +constexpr int kQpDeltaThresholdForSync = 8; +constexpr int kMinBitrateKbpsForQpBoost = 500; +constexpr auto kSwitch = DecodeTargetIndication::kSwitch; +} // namespace + +const double ScreenshareLayers::kMaxTL0FpsReduction = 2.5; +const double ScreenshareLayers::kAcceptableTargetOvershoot = 2.0; + +constexpr int ScreenshareLayers::kMaxNumTemporalLayers; + +// Always emit a frame with certain interval, even if bitrate targets have +// been exceeded. This prevents needless keyframe requests. +const int ScreenshareLayers::kMaxFrameIntervalMs = 2750; + +ScreenshareLayers::ScreenshareLayers(int num_temporal_layers) + : number_of_temporal_layers_( + std::min(kMaxNumTemporalLayers, num_temporal_layers)), + active_layer_(-1), + last_timestamp_(-1), + last_sync_timestamp_(-1), + last_emitted_tl0_timestamp_(-1), + last_frame_time_ms_(-1), + max_debt_bytes_(0), + encode_framerate_(1000.0f, 1000.0f), // 1 second window, second scale. + bitrate_updated_(false), + checker_(TemporalLayersChecker::CreateTemporalLayersChecker( + Vp8TemporalLayersType::kBitrateDynamic, + num_temporal_layers)) { + RTC_CHECK_GT(number_of_temporal_layers_, 0); + RTC_CHECK_LE(number_of_temporal_layers_, kMaxNumTemporalLayers); +} + +ScreenshareLayers::~ScreenshareLayers() { + UpdateHistograms(); +} + +void ScreenshareLayers::SetQpLimits(size_t stream_index, + int min_qp, + int max_qp) { + RTC_DCHECK_LT(stream_index, StreamCount()); + // 0 < min_qp <= max_qp + RTC_DCHECK_LT(0, min_qp); + RTC_DCHECK_LE(min_qp, max_qp); + + RTC_DCHECK_EQ(min_qp_.has_value(), max_qp_.has_value()); + if (!min_qp_.has_value()) { + min_qp_ = min_qp; + max_qp_ = max_qp; + } else { + RTC_DCHECK_EQ(min_qp, min_qp_.value()); + RTC_DCHECK_EQ(max_qp, max_qp_.value()); + } +} + +size_t ScreenshareLayers::StreamCount() const { + return 1; +} + +bool ScreenshareLayers::SupportsEncoderFrameDropping( + size_t stream_index) const { + RTC_DCHECK_LT(stream_index, StreamCount()); + // Frame dropping is handled internally by this class. + return false; +} + +Vp8FrameConfig ScreenshareLayers::NextFrameConfig(size_t stream_index, + uint32_t timestamp) { + RTC_DCHECK_LT(stream_index, StreamCount()); + + auto it = pending_frame_configs_.find(timestamp); + if (it != pending_frame_configs_.end()) { + // Drop and re-encode, reuse the previous config. + return it->second.frame_config; + } + + if (number_of_temporal_layers_ <= 1) { + // No flags needed for 1 layer screenshare. + // TODO(pbos): Consider updating only last, and not all buffers. + DependencyInfo dependency_info{ + "S", {kReferenceAndUpdate, kReferenceAndUpdate, kReferenceAndUpdate}}; + pending_frame_configs_[timestamp] = dependency_info; + return dependency_info.frame_config; + } + + const int64_t now_ms = rtc::TimeMillis(); + + int64_t unwrapped_timestamp = time_wrap_handler_.Unwrap(timestamp); + int64_t ts_diff; + if (last_timestamp_ == -1) { + ts_diff = kOneSecond90Khz / capture_framerate_.value_or(*target_framerate_); + } else { + ts_diff = unwrapped_timestamp - last_timestamp_; + } + + if (target_framerate_) { + // If input frame rate exceeds target frame rate, either over a one second + // averaging window, or if frame interval is below 90% of desired value, + // drop frame. + if (encode_framerate_.Rate(now_ms).value_or(0) > *target_framerate_) + return Vp8FrameConfig(kNone, kNone, kNone); + + // Primarily check if frame interval is too short using frame timestamps, + // as if they are correct they won't be affected by queuing in webrtc. + const int64_t expected_frame_interval_90khz = + kOneSecond90Khz / *target_framerate_; + if (last_timestamp_ != -1 && ts_diff > 0) { + if (ts_diff < 85 * expected_frame_interval_90khz / 100) { + return Vp8FrameConfig(kNone, kNone, kNone); + } + } else { + // Timestamps looks off, use realtime clock here instead. + const int64_t expected_frame_interval_ms = 1000 / *target_framerate_; + if (last_frame_time_ms_ != -1 && + now_ms - last_frame_time_ms_ < + (85 * expected_frame_interval_ms) / 100) { + return Vp8FrameConfig(kNone, kNone, kNone); + } + } + } + + if (stats_.first_frame_time_ms_ == -1) + stats_.first_frame_time_ms_ = now_ms; + + // Make sure both frame droppers leak out bits. + layers_[0].UpdateDebt(ts_diff / 90); + layers_[1].UpdateDebt(ts_diff / 90); + last_timestamp_ = timestamp; + last_frame_time_ms_ = now_ms; + + TemporalLayerState layer_state = TemporalLayerState::kDrop; + + if (active_layer_ == -1 || + layers_[active_layer_].state != TemporalLayer::State::kDropped) { + if (last_emitted_tl0_timestamp_ != -1 && + (unwrapped_timestamp - last_emitted_tl0_timestamp_) / 90 > + kMaxFrameIntervalMs) { + // Too long time has passed since the last frame was emitted, cancel + // enough debt to allow a single frame. + layers_[0].debt_bytes_ = max_debt_bytes_ - 1; + } + if (layers_[0].debt_bytes_ > max_debt_bytes_) { + // Must drop TL0, encode TL1 instead. + if (layers_[1].debt_bytes_ > max_debt_bytes_) { + // Must drop both TL0 and TL1. + active_layer_ = -1; + } else { + active_layer_ = 1; + } + } else { + active_layer_ = 0; + } + } + + switch (active_layer_) { + case 0: + layer_state = TemporalLayerState::kTl0; + last_emitted_tl0_timestamp_ = unwrapped_timestamp; + break; + case 1: + if (layers_[1].state != TemporalLayer::State::kDropped) { + if (TimeToSync(unwrapped_timestamp) || + layers_[1].state == TemporalLayer::State::kKeyFrame) { + last_sync_timestamp_ = unwrapped_timestamp; + layer_state = TemporalLayerState::kTl1Sync; + } else { + layer_state = TemporalLayerState::kTl1; + } + } else { + layer_state = last_sync_timestamp_ == unwrapped_timestamp + ? TemporalLayerState::kTl1Sync + : TemporalLayerState::kTl1; + } + break; + case -1: + layer_state = TemporalLayerState::kDrop; + ++stats_.num_dropped_frames_; + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + DependencyInfo dependency_info; + // TODO(pbos): Consider referencing but not updating the 'alt' buffer for all + // layers. + switch (layer_state) { + case TemporalLayerState::kDrop: + dependency_info = {"", {kNone, kNone, kNone}}; + break; + case TemporalLayerState::kTl0: + // TL0 only references and updates 'last'. + dependency_info = {"SS", {kReferenceAndUpdate, kNone, kNone}}; + dependency_info.frame_config.packetizer_temporal_idx = 0; + break; + case TemporalLayerState::kTl1: + // TL1 references both 'last' and 'golden' but only updates 'golden'. + dependency_info = {"-R", {kReference, kReferenceAndUpdate, kNone}}; + dependency_info.frame_config.packetizer_temporal_idx = 1; + break; + case TemporalLayerState::kTl1Sync: + // Predict from only TL0 to allow participants to switch to the high + // bitrate stream. Updates 'golden' so that TL1 can continue to refer to + // and update 'golden' from this point on. + dependency_info = {"-S", {kReference, kUpdate, kNone}}; + dependency_info.frame_config.packetizer_temporal_idx = 1; + dependency_info.frame_config.layer_sync = true; + break; + } + + pending_frame_configs_[timestamp] = dependency_info; + return dependency_info.frame_config; +} + +void ScreenshareLayers::OnRatesUpdated( + size_t stream_index, + const std::vector<uint32_t>& bitrates_bps, + int framerate_fps) { + RTC_DCHECK_LT(stream_index, StreamCount()); + RTC_DCHECK_GT(framerate_fps, 0); + RTC_DCHECK_GE(bitrates_bps.size(), 1); + RTC_DCHECK_LE(bitrates_bps.size(), 2); + + // `bitrates_bps` uses individual rates per layer, but we want to use the + // accumulated rate here. + uint32_t tl0_kbps = bitrates_bps[0] / 1000; + uint32_t tl1_kbps = tl0_kbps; + if (bitrates_bps.size() > 1) { + tl1_kbps += bitrates_bps[1] / 1000; + } + + if (!target_framerate_) { + // First OnRatesUpdated() is called during construction, with the + // configured targets as parameters. + target_framerate_ = framerate_fps; + capture_framerate_ = target_framerate_; + bitrate_updated_ = true; + } else { + if ((capture_framerate_ && + framerate_fps != static_cast<int>(*capture_framerate_)) || + (tl0_kbps != layers_[0].target_rate_kbps_) || + (tl1_kbps != layers_[1].target_rate_kbps_)) { + bitrate_updated_ = true; + } + + if (framerate_fps < 0) { + capture_framerate_.reset(); + } else { + capture_framerate_ = framerate_fps; + } + } + + layers_[0].target_rate_kbps_ = tl0_kbps; + layers_[1].target_rate_kbps_ = tl1_kbps; +} + +void ScreenshareLayers::OnEncodeDone(size_t stream_index, + uint32_t rtp_timestamp, + size_t size_bytes, + bool is_keyframe, + int qp, + CodecSpecificInfo* info) { + RTC_DCHECK_LT(stream_index, StreamCount()); + + if (size_bytes == 0) { + RTC_LOG(LS_WARNING) << "Empty frame; treating as dropped."; + OnFrameDropped(stream_index, rtp_timestamp); + return; + } + + absl::optional<DependencyInfo> dependency_info; + auto it = pending_frame_configs_.find(rtp_timestamp); + if (it != pending_frame_configs_.end()) { + dependency_info = it->second; + pending_frame_configs_.erase(it); + + if (checker_) { + RTC_DCHECK(checker_->CheckTemporalConfig(is_keyframe, + dependency_info->frame_config)); + } + } + + CodecSpecificInfoVP8& vp8_info = info->codecSpecific.VP8; + GenericFrameInfo& generic_frame_info = info->generic_frame_info.emplace(); + + if (number_of_temporal_layers_ == 1) { + vp8_info.temporalIdx = kNoTemporalIdx; + vp8_info.layerSync = false; + generic_frame_info.temporal_id = 0; + generic_frame_info.decode_target_indications = {kSwitch}; + generic_frame_info.encoder_buffers.emplace_back( + 0, /*referenced=*/!is_keyframe, /*updated=*/true); + } else { + int64_t unwrapped_timestamp = time_wrap_handler_.Unwrap(rtp_timestamp); + if (dependency_info) { + vp8_info.temporalIdx = + dependency_info->frame_config.packetizer_temporal_idx; + vp8_info.layerSync = dependency_info->frame_config.layer_sync; + generic_frame_info.temporal_id = vp8_info.temporalIdx; + generic_frame_info.decode_target_indications = + dependency_info->decode_target_indications; + } else { + RTC_DCHECK(is_keyframe); + } + + if (is_keyframe) { + vp8_info.temporalIdx = 0; + last_sync_timestamp_ = unwrapped_timestamp; + vp8_info.layerSync = true; + layers_[0].state = TemporalLayer::State::kKeyFrame; + layers_[1].state = TemporalLayer::State::kKeyFrame; + active_layer_ = 1; + info->template_structure = + GetTemplateStructure(number_of_temporal_layers_); + generic_frame_info.temporal_id = vp8_info.temporalIdx; + generic_frame_info.decode_target_indications = {kSwitch, kSwitch}; + } else if (active_layer_ >= 0 && layers_[active_layer_].state == + TemporalLayer::State::kKeyFrame) { + layers_[active_layer_].state = TemporalLayer::State::kNormal; + } + + vp8_info.useExplicitDependencies = true; + RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); + RTC_DCHECK_EQ(vp8_info.updatedBuffersCount, 0u); + + // Note that `frame_config` is not derefernced if `is_keyframe`, + // meaning it's never dereferenced if the optional may be unset. + for (int i = 0; i < static_cast<int>(Vp8FrameConfig::Buffer::kCount); ++i) { + bool references = false; + bool updates = is_keyframe; + if (!is_keyframe && dependency_info->frame_config.References( + static_cast<Vp8FrameConfig::Buffer>(i))) { + RTC_DCHECK_LT(vp8_info.referencedBuffersCount, + arraysize(CodecSpecificInfoVP8::referencedBuffers)); + references = true; + vp8_info.referencedBuffers[vp8_info.referencedBuffersCount++] = i; + } + + if (is_keyframe || dependency_info->frame_config.Updates( + static_cast<Vp8FrameConfig::Buffer>(i))) { + RTC_DCHECK_LT(vp8_info.updatedBuffersCount, + arraysize(CodecSpecificInfoVP8::updatedBuffers)); + updates = true; + vp8_info.updatedBuffers[vp8_info.updatedBuffersCount++] = i; + } + + if (references || updates) + generic_frame_info.encoder_buffers.emplace_back(i, references, updates); + } + } + + encode_framerate_.Update(1, rtc::TimeMillis()); + + if (number_of_temporal_layers_ == 1) + return; + + RTC_DCHECK_NE(-1, active_layer_); + if (layers_[active_layer_].state == TemporalLayer::State::kDropped) { + layers_[active_layer_].state = TemporalLayer::State::kQualityBoost; + } + + if (qp != -1) + layers_[active_layer_].last_qp = qp; + + if (active_layer_ == 0) { + layers_[0].debt_bytes_ += size_bytes; + layers_[1].debt_bytes_ += size_bytes; + ++stats_.num_tl0_frames_; + stats_.tl0_target_bitrate_sum_ += layers_[0].target_rate_kbps_; + stats_.tl0_qp_sum_ += qp; + } else if (active_layer_ == 1) { + layers_[1].debt_bytes_ += size_bytes; + ++stats_.num_tl1_frames_; + stats_.tl1_target_bitrate_sum_ += layers_[1].target_rate_kbps_; + stats_.tl1_qp_sum_ += qp; + } +} + +void ScreenshareLayers::OnFrameDropped(size_t stream_index, + uint32_t rtp_timestamp) { + layers_[active_layer_].state = TemporalLayer::State::kDropped; + ++stats_.num_overshoots_; +} + +void ScreenshareLayers::OnPacketLossRateUpdate(float packet_loss_rate) {} + +void ScreenshareLayers::OnRttUpdate(int64_t rtt_ms) {} + +void ScreenshareLayers::OnLossNotification( + const VideoEncoder::LossNotification& loss_notification) {} + +FrameDependencyStructure ScreenshareLayers::GetTemplateStructure( + int num_layers) const { + RTC_CHECK_LT(num_layers, 3); + RTC_CHECK_GT(num_layers, 0); + + FrameDependencyStructure template_structure; + template_structure.num_decode_targets = num_layers; + + switch (num_layers) { + case 1: { + template_structure.templates.resize(2); + template_structure.templates[0].T(0).Dtis("S"); + template_structure.templates[1].T(0).Dtis("S").FrameDiffs({1}); + return template_structure; + } + case 2: { + template_structure.templates.resize(3); + template_structure.templates[0].T(0).Dtis("SS"); + template_structure.templates[1].T(0).Dtis("SS").FrameDiffs({1}); + template_structure.templates[2].T(1).Dtis("-S").FrameDiffs({1}); + return template_structure; + } + default: + RTC_DCHECK_NOTREACHED(); + // To make the compiler happy! + return template_structure; + } +} + +bool ScreenshareLayers::TimeToSync(int64_t timestamp) const { + RTC_DCHECK_EQ(1, active_layer_); + RTC_DCHECK_NE(-1, layers_[0].last_qp); + if (layers_[1].last_qp == -1) { + // First frame in TL1 should only depend on TL0 since there are no + // previous frames in TL1. + return true; + } + + RTC_DCHECK_NE(-1, last_sync_timestamp_); + int64_t timestamp_diff = timestamp - last_sync_timestamp_; + if (timestamp_diff > kMaxTimeBetweenSyncs) { + // After a certain time, force a sync frame. + return true; + } else if (timestamp_diff < kMinTimeBetweenSyncs) { + // If too soon from previous sync frame, don't issue a new one. + return false; + } + // Issue a sync frame if difference in quality between TL0 and TL1 isn't too + // large. + if (layers_[0].last_qp - layers_[1].last_qp < kQpDeltaThresholdForSync) + return true; + return false; +} + +uint32_t ScreenshareLayers::GetCodecTargetBitrateKbps() const { + uint32_t target_bitrate_kbps = layers_[0].target_rate_kbps_; + + if (number_of_temporal_layers_ > 1) { + // Calculate a codec target bitrate. This may be higher than TL0, gaining + // quality at the expense of frame rate at TL0. Constraints: + // - TL0 frame rate no less than framerate / kMaxTL0FpsReduction. + // - Target rate * kAcceptableTargetOvershoot should not exceed TL1 rate. + target_bitrate_kbps = + std::min(layers_[0].target_rate_kbps_ * kMaxTL0FpsReduction, + layers_[1].target_rate_kbps_ / kAcceptableTargetOvershoot); + } + + return std::max(layers_[0].target_rate_kbps_, target_bitrate_kbps); +} + +Vp8EncoderConfig ScreenshareLayers::UpdateConfiguration(size_t stream_index) { + RTC_DCHECK_LT(stream_index, StreamCount()); + RTC_DCHECK(min_qp_.has_value()); + RTC_DCHECK(max_qp_.has_value()); + + const uint32_t target_bitrate_kbps = GetCodecTargetBitrateKbps(); + + // TODO(sprang): We _really_ need to make an overhaul of this class. :( + // If we're dropping frames in order to meet a target framerate, adjust the + // bitrate assigned to the encoder so the total average bitrate is correct. + float encoder_config_bitrate_kbps = target_bitrate_kbps; + if (target_framerate_ && capture_framerate_ && + *target_framerate_ < *capture_framerate_) { + encoder_config_bitrate_kbps *= + static_cast<float>(*capture_framerate_) / *target_framerate_; + } + + if (bitrate_updated_ || + encoder_config_.rc_target_bitrate != + absl::make_optional(encoder_config_bitrate_kbps)) { + encoder_config_.rc_target_bitrate = encoder_config_bitrate_kbps; + + // Don't reconfigure qp limits during quality boost frames. + if (active_layer_ == -1 || + layers_[active_layer_].state != TemporalLayer::State::kQualityBoost) { + const int min_qp = min_qp_.value(); + const int max_qp = max_qp_.value(); + + // After a dropped frame, a frame with max qp will be encoded and the + // quality will then ramp up from there. To boost the speed of recovery, + // encode the next frame with lower max qp, if there is sufficient + // bandwidth to do so without causing excessive delay. + // TL0 is the most important to improve since the errors in this layer + // will propagate to TL1. + // Currently, reduce max qp by 20% for TL0 and 15% for TL1. + if (layers_[1].target_rate_kbps_ >= kMinBitrateKbpsForQpBoost) { + layers_[0].enhanced_max_qp = min_qp + (((max_qp - min_qp) * 80) / 100); + layers_[1].enhanced_max_qp = min_qp + (((max_qp - min_qp) * 85) / 100); + } else { + layers_[0].enhanced_max_qp = -1; + layers_[1].enhanced_max_qp = -1; + } + } + + if (capture_framerate_) { + int avg_frame_size = + (target_bitrate_kbps * 1000) / (8 * *capture_framerate_); + // Allow max debt to be the size of a single optimal frame. + // TODO(sprang): Determine if this needs to be adjusted by some factor. + // (Lower values may cause more frame drops, higher may lead to queuing + // delays.) + max_debt_bytes_ = avg_frame_size; + } + + bitrate_updated_ = false; + } + + // Don't try to update boosts state if not active yet. + if (active_layer_ == -1) + return encoder_config_; + + if (number_of_temporal_layers_ <= 1) + return encoder_config_; + + // If layer is in the quality boost state (following a dropped frame), update + // the configuration with the adjusted (lower) qp and set the state back to + // normal. + unsigned int adjusted_max_qp = max_qp_.value(); // Set the normal max qp. + if (layers_[active_layer_].state == TemporalLayer::State::kQualityBoost) { + if (layers_[active_layer_].enhanced_max_qp != -1) { + // Bitrate is high enough for quality boost, update max qp. + adjusted_max_qp = layers_[active_layer_].enhanced_max_qp; + } + // Regardless of qp, reset the boost state for the next frame. + layers_[active_layer_].state = TemporalLayer::State::kNormal; + } + encoder_config_.rc_max_quantizer = adjusted_max_qp; + + return encoder_config_; +} + +void ScreenshareLayers::TemporalLayer::UpdateDebt(int64_t delta_ms) { + uint32_t debt_reduction_bytes = target_rate_kbps_ * delta_ms / 8; + if (debt_reduction_bytes >= debt_bytes_) { + debt_bytes_ = 0; + } else { + debt_bytes_ -= debt_reduction_bytes; + } +} + +void ScreenshareLayers::UpdateHistograms() { + if (stats_.first_frame_time_ms_ == -1) + return; + int64_t duration_sec = + (rtc::TimeMillis() - stats_.first_frame_time_ms_ + 500) / 1000; + if (duration_sec >= metrics::kMinRunTimeInSeconds) { + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.Video.Screenshare.Layer0.FrameRate", + (stats_.num_tl0_frames_ + (duration_sec / 2)) / duration_sec); + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.Video.Screenshare.Layer1.FrameRate", + (stats_.num_tl1_frames_ + (duration_sec / 2)) / duration_sec); + int total_frames = stats_.num_tl0_frames_ + stats_.num_tl1_frames_; + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.Video.Screenshare.FramesPerDrop", + (stats_.num_dropped_frames_ == 0 + ? 0 + : total_frames / stats_.num_dropped_frames_)); + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.Video.Screenshare.FramesPerOvershoot", + (stats_.num_overshoots_ == 0 ? 0 + : total_frames / stats_.num_overshoots_)); + if (stats_.num_tl0_frames_ > 0) { + RTC_HISTOGRAM_COUNTS_10000("WebRTC.Video.Screenshare.Layer0.Qp", + stats_.tl0_qp_sum_ / stats_.num_tl0_frames_); + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.Video.Screenshare.Layer0.TargetBitrate", + stats_.tl0_target_bitrate_sum_ / stats_.num_tl0_frames_); + } + if (stats_.num_tl1_frames_ > 0) { + RTC_HISTOGRAM_COUNTS_10000("WebRTC.Video.Screenshare.Layer1.Qp", + stats_.tl1_qp_sum_ / stats_.num_tl1_frames_); + RTC_HISTOGRAM_COUNTS_10000( + "WebRTC.Video.Screenshare.Layer1.TargetBitrate", + stats_.tl1_target_bitrate_sum_ / stats_.num_tl1_frames_); + } + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.h new file mode 100644 index 0000000000..39477f12f1 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers.h @@ -0,0 +1,164 @@ +/* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_SCREENSHARE_LAYERS_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_SCREENSHARE_LAYERS_H_ + +#include <map> +#include <memory> +#include <utility> +#include <vector> + +#include "api/video_codecs/vp8_frame_config.h" +#include "api/video_codecs/vp8_temporal_layers.h" +#include "modules/video_coding/codecs/vp8/include/temporal_layers_checker.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/utility/frame_dropper.h" +#include "rtc_base/rate_statistics.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +struct CodecSpecificInfoVP8; +class Clock; + +class ScreenshareLayers final : public Vp8FrameBufferController { + public: + static const double kMaxTL0FpsReduction; + static const double kAcceptableTargetOvershoot; + static const int kMaxFrameIntervalMs; + + explicit ScreenshareLayers(int num_temporal_layers); + ~ScreenshareLayers() override; + + void SetQpLimits(size_t stream_index, int min_qp, int max_qp) override; + + size_t StreamCount() const override; + + bool SupportsEncoderFrameDropping(size_t stream_index) const override; + + // Returns the recommended VP8 encode flags needed. May refresh the decoder + // and/or update the reference buffers. + Vp8FrameConfig NextFrameConfig(size_t stream_index, + uint32_t rtp_timestamp) override; + + // New target bitrate, per temporal layer. + void OnRatesUpdated(size_t stream_index, + const std::vector<uint32_t>& bitrates_bps, + int framerate_fps) override; + + Vp8EncoderConfig UpdateConfiguration(size_t stream_index) override; + + void OnEncodeDone(size_t stream_index, + uint32_t rtp_timestamp, + size_t size_bytes, + bool is_keyframe, + int qp, + CodecSpecificInfo* info) override; + + void OnFrameDropped(size_t stream_index, uint32_t rtp_timestamp) override; + + void OnPacketLossRateUpdate(float packet_loss_rate) override; + + void OnRttUpdate(int64_t rtt_ms) override; + + void OnLossNotification( + const VideoEncoder::LossNotification& loss_notification) override; + + private: + enum class TemporalLayerState : int { kDrop, kTl0, kTl1, kTl1Sync }; + + struct DependencyInfo { + DependencyInfo() = default; + DependencyInfo(absl::string_view indication_symbols, + Vp8FrameConfig frame_config) + : decode_target_indications( + webrtc_impl::StringToDecodeTargetIndications(indication_symbols)), + frame_config(frame_config) {} + + absl::InlinedVector<DecodeTargetIndication, 10> decode_target_indications; + Vp8FrameConfig frame_config; + }; + + bool TimeToSync(int64_t timestamp) const; + uint32_t GetCodecTargetBitrateKbps() const; + + const int number_of_temporal_layers_; + + // TODO(eladalon/sprang): These should be made into const-int set in the ctor. + absl::optional<int> min_qp_; + absl::optional<int> max_qp_; + + int active_layer_; + int64_t last_timestamp_; + int64_t last_sync_timestamp_; + int64_t last_emitted_tl0_timestamp_; + int64_t last_frame_time_ms_; + rtc::TimestampWrapAroundHandler time_wrap_handler_; + uint32_t max_debt_bytes_; + + std::map<uint32_t, DependencyInfo> pending_frame_configs_; + + // Configured max framerate. + absl::optional<uint32_t> target_framerate_; + // Incoming framerate from capturer. + absl::optional<uint32_t> capture_framerate_; + + // Tracks what framerate we actually encode, and drops frames on overshoot. + RateStatistics encode_framerate_; + bool bitrate_updated_; + + static constexpr int kMaxNumTemporalLayers = 2; + struct TemporalLayer { + TemporalLayer() + : state(State::kNormal), + enhanced_max_qp(-1), + last_qp(-1), + debt_bytes_(0), + target_rate_kbps_(0) {} + + enum class State { + kNormal, + kDropped, + kReencoded, + kQualityBoost, + kKeyFrame + } state; + + int enhanced_max_qp; + int last_qp; + uint32_t debt_bytes_; + uint32_t target_rate_kbps_; + + void UpdateDebt(int64_t delta_ms); + } layers_[kMaxNumTemporalLayers]; + + void UpdateHistograms(); + FrameDependencyStructure GetTemplateStructure(int num_layers) const; + + // Data for histogram statistics. + struct Stats { + int64_t first_frame_time_ms_ = -1; + int64_t num_tl0_frames_ = 0; + int64_t num_tl1_frames_ = 0; + int64_t num_dropped_frames_ = 0; + int64_t num_overshoots_ = 0; + int64_t tl0_qp_sum_ = 0; + int64_t tl1_qp_sum_ = 0; + int64_t tl0_target_bitrate_sum_ = 0; + int64_t tl1_target_bitrate_sum_ = 0; + } stats_; + + Vp8EncoderConfig encoder_config_; + + // Optional utility used to verify reference validity. + std::unique_ptr<TemporalLayersChecker> checker_; +}; +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_SCREENSHARE_LAYERS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc new file mode 100644 index 0000000000..e5b3bd4fdf --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/screenshare_layers.h" + +#include <stdlib.h> +#include <string.h> + +#include <cstdint> +#include <memory> +#include <vector> + +#include "api/video_codecs/vp8_frame_config.h" +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "rtc_base/checks.h" +#include "rtc_base/fake_clock.h" +#include "system_wrappers/include/metrics.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "vpx/vp8cx.h" + +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::NiceMock; + +namespace webrtc { +namespace { +// 5 frames per second at 90 kHz. +const uint32_t kTimestampDelta5Fps = 90000 / 5; +const int kDefaultQp = 54; +const int kDefaultTl0BitrateKbps = 200; +const int kDefaultTl1BitrateKbps = 2000; +const int kFrameRate = 5; +const int kSyncPeriodSeconds = 2; +const int kMaxSyncPeriodSeconds = 4; + +// Expected flags for corresponding temporal layers. +const int kTl0Flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; +const int kTl1Flags = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; +const int kTl1SyncFlags = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; +const std::vector<uint32_t> kDefault2TlBitratesBps = { + kDefaultTl0BitrateKbps * 1000, + (kDefaultTl1BitrateKbps - kDefaultTl0BitrateKbps) * 1000}; + +} // namespace + +class ScreenshareLayerTest : public ::testing::Test { + protected: + ScreenshareLayerTest() + : min_qp_(2), + max_qp_(kDefaultQp), + frame_size_(-1), + timestamp_(90), + config_updated_(false) {} + virtual ~ScreenshareLayerTest() {} + + void SetUp() override { + layers_.reset(new ScreenshareLayers(2)); + cfg_ = ConfigureBitrates(); + } + + int EncodeFrame(bool base_sync, CodecSpecificInfo* info = nullptr) { + CodecSpecificInfo ignored_info; + if (!info) { + info = &ignored_info; + } + + int flags = ConfigureFrame(base_sync); + if (flags != -1) + layers_->OnEncodeDone(0, timestamp_, frame_size_, base_sync, kDefaultQp, + info); + return flags; + } + + int ConfigureFrame(bool key_frame) { + tl_config_ = NextFrameConfig(0, timestamp_); + EXPECT_EQ(0, tl_config_.encoder_layer_id) + << "ScreenshareLayers always encodes using the bitrate allocator for " + "layer 0, but may reference different buffers and packetize " + "differently."; + if (tl_config_.drop_frame) { + return -1; + } + const uint32_t prev_rc_target_bitrate = cfg_.rc_target_bitrate.value_or(-1); + const uint32_t prev_rc_max_quantizer = cfg_.rc_max_quantizer.value_or(-1); + + cfg_ = layers_->UpdateConfiguration(0); + + config_updated_ = + cfg_.temporal_layer_config.has_value() || + (cfg_.rc_target_bitrate.has_value() && + cfg_.rc_target_bitrate.value() != prev_rc_target_bitrate) || + (cfg_.rc_max_quantizer.has_value() && + cfg_.rc_max_quantizer.value() != prev_rc_max_quantizer) || + cfg_.g_error_resilient.has_value(); + + int flags = LibvpxVp8Encoder::EncodeFlags(tl_config_); + EXPECT_NE(-1, frame_size_); + return flags; + } + + Vp8FrameConfig NextFrameConfig(size_t stream_index, uint32_t timestamp) { + int64_t timestamp_ms = timestamp / 90; + clock_.AdvanceTime(TimeDelta::Millis(timestamp_ms - rtc::TimeMillis())); + return layers_->NextFrameConfig(stream_index, timestamp); + } + + int FrameSizeForBitrate(int bitrate_kbps) { + return ((bitrate_kbps * 1000) / 8) / kFrameRate; + } + + Vp8EncoderConfig ConfigureBitrates() { + layers_->SetQpLimits(0, min_qp_, max_qp_); + layers_->OnRatesUpdated(0, kDefault2TlBitratesBps, kFrameRate); + const Vp8EncoderConfig vp8_cfg = layers_->UpdateConfiguration(0); + EXPECT_TRUE(vp8_cfg.rc_target_bitrate.has_value()); + frame_size_ = FrameSizeForBitrate(vp8_cfg.rc_target_bitrate.value()); + return vp8_cfg; + } + + void WithQpLimits(int min_qp, int max_qp) { + min_qp_ = min_qp; + max_qp_ = max_qp; + } + + // Runs a few initial frames and makes sure we have seen frames on both + // temporal layers, including sync and non-sync frames. + bool RunGracePeriod() { + bool got_tl0 = false; + bool got_tl1 = false; + bool got_tl1_sync = false; + for (int i = 0; i < 10; ++i) { + CodecSpecificInfo info; + EXPECT_NE(-1, EncodeFrame(false, &info)); + timestamp_ += kTimestampDelta5Fps; + if (info.codecSpecific.VP8.temporalIdx == 0) { + got_tl0 = true; + } else if (info.codecSpecific.VP8.layerSync) { + got_tl1_sync = true; + } else { + got_tl1 = true; + } + if (got_tl0 && got_tl1 && got_tl1_sync) + return true; + } + return false; + } + + // Adds frames until we get one in the specified temporal layer. The last + // FrameEncoded() call will be omitted and needs to be done by the caller. + // Returns the flags for the last frame. + int SkipUntilTl(int layer) { + return SkipUntilTlAndSync(layer, absl::nullopt); + } + + // Same as SkipUntilTl, but also waits until the sync bit condition is met. + int SkipUntilTlAndSync(int layer, absl::optional<bool> sync) { + int flags = 0; + const int kMaxFramesToSkip = + 1 + (sync.value_or(false) ? kMaxSyncPeriodSeconds : 1) * kFrameRate; + for (int i = 0; i < kMaxFramesToSkip; ++i) { + flags = ConfigureFrame(false); + if (tl_config_.packetizer_temporal_idx != layer || + (sync && *sync != tl_config_.layer_sync)) { + if (flags != -1) { + // If flags do not request a frame drop, report some default values + // for frame size etc. + CodecSpecificInfo info; + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + &info); + } + timestamp_ += kTimestampDelta5Fps; + } else { + // Found frame from sought after layer. + return flags; + } + } + ADD_FAILURE() << "Did not get a frame of TL" << layer << " in time."; + return -1; + } + + int min_qp_; + uint32_t max_qp_; + int frame_size_; + rtc::ScopedFakeClock clock_; + std::unique_ptr<ScreenshareLayers> layers_; + + uint32_t timestamp_; + Vp8FrameConfig tl_config_; + Vp8EncoderConfig cfg_; + bool config_updated_; + + CodecSpecificInfo* IgnoredCodecSpecificInfo() { + ignored_codec_specific_info_ = std::make_unique<CodecSpecificInfo>(); + return ignored_codec_specific_info_.get(); + } + + private: + std::unique_ptr<CodecSpecificInfo> ignored_codec_specific_info_; +}; + +TEST_F(ScreenshareLayerTest, 1Layer) { + layers_.reset(new ScreenshareLayers(1)); + ConfigureBitrates(); + // One layer screenshare should not use the frame dropper as all frames will + // belong to the base layer. + const int kSingleLayerFlags = 0; + auto info = std::make_unique<CodecSpecificInfo>(); + int flags = EncodeFrame(/*base_sync=*/false, info.get()); + timestamp_ += kTimestampDelta5Fps; + EXPECT_EQ(static_cast<uint8_t>(kNoTemporalIdx), + info->codecSpecific.VP8.temporalIdx); + EXPECT_FALSE(info->codecSpecific.VP8.layerSync); + EXPECT_EQ(info->generic_frame_info->temporal_id, 0); + + info = std::make_unique<CodecSpecificInfo>(); + flags = EncodeFrame(/*base_sync=*/false, info.get()); + EXPECT_EQ(kSingleLayerFlags, flags); + EXPECT_EQ(static_cast<uint8_t>(kNoTemporalIdx), + info->codecSpecific.VP8.temporalIdx); + EXPECT_FALSE(info->codecSpecific.VP8.layerSync); + EXPECT_EQ(info->generic_frame_info->temporal_id, 0); +} + +TEST_F(ScreenshareLayerTest, 2LayersPeriodicSync) { + std::vector<int> sync_times; + const int kNumFrames = kSyncPeriodSeconds * kFrameRate * 2 - 1; + for (int i = 0; i < kNumFrames; ++i) { + CodecSpecificInfo info; + EncodeFrame(false, &info); + timestamp_ += kTimestampDelta5Fps; + if (info.codecSpecific.VP8.temporalIdx == 1 && + info.codecSpecific.VP8.layerSync) { + sync_times.push_back(timestamp_); + } + } + + ASSERT_EQ(2u, sync_times.size()); + EXPECT_GE(sync_times[1] - sync_times[0], 90000 * kSyncPeriodSeconds); +} + +TEST_F(ScreenshareLayerTest, 2LayersSyncAfterTimeout) { + std::vector<int> sync_times; + const int kNumFrames = kMaxSyncPeriodSeconds * kFrameRate * 2 - 1; + for (int i = 0; i < kNumFrames; ++i) { + CodecSpecificInfo info; + + tl_config_ = NextFrameConfig(0, timestamp_); + cfg_ = layers_->UpdateConfiguration(0); + + // Simulate TL1 being at least 8 qp steps better. + if (tl_config_.packetizer_temporal_idx == 0) { + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + &info); + } else { + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp - 8, + &info); + } + + if (info.codecSpecific.VP8.temporalIdx == 1 && + info.codecSpecific.VP8.layerSync) + sync_times.push_back(timestamp_); + + timestamp_ += kTimestampDelta5Fps; + } + + ASSERT_EQ(2u, sync_times.size()); + EXPECT_GE(sync_times[1] - sync_times[0], 90000 * kMaxSyncPeriodSeconds); +} + +TEST_F(ScreenshareLayerTest, 2LayersSyncAfterSimilarQP) { + std::vector<int> sync_times; + + const int kNumFrames = (kSyncPeriodSeconds + + ((kMaxSyncPeriodSeconds - kSyncPeriodSeconds) / 2)) * + kFrameRate; + for (int i = 0; i < kNumFrames; ++i) { + CodecSpecificInfo info; + + ConfigureFrame(false); + + // Simulate TL1 being at least 8 qp steps better. + if (tl_config_.packetizer_temporal_idx == 0) { + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + &info); + } else { + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp - 8, + &info); + } + + if (info.codecSpecific.VP8.temporalIdx == 1 && + info.codecSpecific.VP8.layerSync) + sync_times.push_back(timestamp_); + + timestamp_ += kTimestampDelta5Fps; + } + + ASSERT_EQ(1u, sync_times.size()); + + bool bumped_tl0_quality = false; + for (int i = 0; i < 3; ++i) { + CodecSpecificInfo info; + + int flags = ConfigureFrame(false); + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp - 8, + &info); + if (info.codecSpecific.VP8.temporalIdx == 0) { + // Bump TL0 to same quality as TL1. + bumped_tl0_quality = true; + } else { + if (bumped_tl0_quality) { + EXPECT_TRUE(info.codecSpecific.VP8.layerSync); + EXPECT_EQ(kTl1SyncFlags, flags); + return; + } + } + timestamp_ += kTimestampDelta5Fps; + } + ADD_FAILURE() << "No TL1 frame arrived within time limit."; +} + +TEST_F(ScreenshareLayerTest, 2LayersToggling) { + EXPECT_TRUE(RunGracePeriod()); + + // Insert 50 frames. 2/5 should be TL0. + int tl0_frames = 0; + int tl1_frames = 0; + for (int i = 0; i < 50; ++i) { + CodecSpecificInfo info; + EncodeFrame(/*base_sync=*/false, &info); + EXPECT_EQ(info.codecSpecific.VP8.temporalIdx, + info.generic_frame_info->temporal_id); + timestamp_ += kTimestampDelta5Fps; + switch (info.codecSpecific.VP8.temporalIdx) { + case 0: + ++tl0_frames; + break; + case 1: + ++tl1_frames; + break; + default: + abort(); + } + } + EXPECT_EQ(20, tl0_frames); + EXPECT_EQ(30, tl1_frames); +} + +TEST_F(ScreenshareLayerTest, AllFitsLayer0) { + frame_size_ = FrameSizeForBitrate(kDefaultTl0BitrateKbps); + + // Insert 50 frames, small enough that all fits in TL0. + for (int i = 0; i < 50; ++i) { + CodecSpecificInfo info; + int flags = EncodeFrame(false, &info); + timestamp_ += kTimestampDelta5Fps; + EXPECT_EQ(kTl0Flags, flags); + EXPECT_EQ(0, info.codecSpecific.VP8.temporalIdx); + } +} + +TEST_F(ScreenshareLayerTest, TooHighBitrate) { + frame_size_ = 2 * FrameSizeForBitrate(kDefaultTl1BitrateKbps); + + // Insert 100 frames. Half should be dropped. + int tl0_frames = 0; + int tl1_frames = 0; + int dropped_frames = 0; + for (int i = 0; i < 100; ++i) { + CodecSpecificInfo info; + int flags = EncodeFrame(false, &info); + timestamp_ += kTimestampDelta5Fps; + if (flags == -1) { + ++dropped_frames; + } else { + switch (info.codecSpecific.VP8.temporalIdx) { + case 0: + ++tl0_frames; + break; + case 1: + ++tl1_frames; + break; + default: + ADD_FAILURE() << "Unexpected temporal id"; + } + } + } + + EXPECT_NEAR(50, tl0_frames + tl1_frames, 1); + EXPECT_NEAR(50, dropped_frames, 1); +} + +TEST_F(ScreenshareLayerTest, TargetBitrateCappedByTL0) { + const int kTl0_kbps = 100; + const int kTl1_kbps = 1000; + const std::vector<uint32_t> layer_rates = {kTl0_kbps * 1000, + (kTl1_kbps - kTl0_kbps) * 1000}; + layers_->OnRatesUpdated(0, layer_rates, kFrameRate); + cfg_ = layers_->UpdateConfiguration(0); + + EXPECT_EQ(static_cast<unsigned int>( + ScreenshareLayers::kMaxTL0FpsReduction * kTl0_kbps + 0.5), + cfg_.rc_target_bitrate); +} + +TEST_F(ScreenshareLayerTest, TargetBitrateCappedByTL1) { + const int kTl0_kbps = 100; + const int kTl1_kbps = 450; + const std::vector<uint32_t> layer_rates = {kTl0_kbps * 1000, + (kTl1_kbps - kTl0_kbps) * 1000}; + layers_->OnRatesUpdated(0, layer_rates, kFrameRate); + cfg_ = layers_->UpdateConfiguration(0); + + EXPECT_EQ(static_cast<unsigned int>( + kTl1_kbps / ScreenshareLayers::kAcceptableTargetOvershoot), + cfg_.rc_target_bitrate); +} + +TEST_F(ScreenshareLayerTest, TargetBitrateBelowTL0) { + const int kTl0_kbps = 100; + const std::vector<uint32_t> layer_rates = {kTl0_kbps * 1000}; + layers_->OnRatesUpdated(0, layer_rates, kFrameRate); + cfg_ = layers_->UpdateConfiguration(0); + + EXPECT_EQ(static_cast<uint32_t>(kTl0_kbps), cfg_.rc_target_bitrate); +} + +TEST_F(ScreenshareLayerTest, EncoderDrop) { + EXPECT_TRUE(RunGracePeriod()); + SkipUntilTl(0); + + // Size 0 indicates dropped frame. + layers_->OnEncodeDone(0, timestamp_, 0, false, 0, IgnoredCodecSpecificInfo()); + + // Re-encode frame (so don't advance timestamp). + int flags = EncodeFrame(false); + timestamp_ += kTimestampDelta5Fps; + EXPECT_FALSE(config_updated_); + EXPECT_EQ(kTl0Flags, flags); + + // Next frame should have boosted quality... + SkipUntilTl(0); + EXPECT_TRUE(config_updated_); + EXPECT_LT(cfg_.rc_max_quantizer, static_cast<unsigned int>(kDefaultQp)); + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + timestamp_ += kTimestampDelta5Fps; + + // ...then back to standard setup. + SkipUntilTl(0); + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + timestamp_ += kTimestampDelta5Fps; + EXPECT_EQ(cfg_.rc_max_quantizer, static_cast<unsigned int>(kDefaultQp)); + + // Next drop in TL1. + SkipUntilTl(1); + layers_->OnEncodeDone(0, timestamp_, 0, false, 0, IgnoredCodecSpecificInfo()); + + // Re-encode frame (so don't advance timestamp). + flags = EncodeFrame(false); + timestamp_ += kTimestampDelta5Fps; + EXPECT_FALSE(config_updated_); + EXPECT_EQ(kTl1Flags, flags); + + // Next frame should have boosted QP. + SkipUntilTl(1); + EXPECT_TRUE(config_updated_); + EXPECT_LT(cfg_.rc_max_quantizer, static_cast<unsigned int>(kDefaultQp)); + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + timestamp_ += kTimestampDelta5Fps; + + // ...and back to normal. + SkipUntilTl(1); + EXPECT_EQ(cfg_.rc_max_quantizer, static_cast<unsigned int>(kDefaultQp)); + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + timestamp_ += kTimestampDelta5Fps; +} + +TEST_F(ScreenshareLayerTest, RespectsMaxIntervalBetweenFrames) { + const int kLowBitrateKbps = 50; + const int kLargeFrameSizeBytes = 100000; + const uint32_t kStartTimestamp = 1234; + + const std::vector<uint32_t> layer_rates = {kLowBitrateKbps * 1000}; + layers_->OnRatesUpdated(0, layer_rates, kFrameRate); + cfg_ = layers_->UpdateConfiguration(0); + + EXPECT_EQ(kTl0Flags, + LibvpxVp8Encoder::EncodeFlags(NextFrameConfig(0, kStartTimestamp))); + layers_->OnEncodeDone(0, kStartTimestamp, kLargeFrameSizeBytes, false, + kDefaultQp, IgnoredCodecSpecificInfo()); + + const uint32_t kTwoSecondsLater = + kStartTimestamp + (ScreenshareLayers::kMaxFrameIntervalMs * 90); + + // Sanity check, repayment time should exceed kMaxFrameIntervalMs. + ASSERT_GT(kStartTimestamp + 90 * (kLargeFrameSizeBytes * 8) / kLowBitrateKbps, + kStartTimestamp + (ScreenshareLayers::kMaxFrameIntervalMs * 90)); + + // Expect drop one frame interval before the two second timeout. If we try + // any later, the frame will be dropped anyway by the frame rate throttling + // logic. + EXPECT_TRUE( + NextFrameConfig(0, kTwoSecondsLater - kTimestampDelta5Fps).drop_frame); + + // More than two seconds has passed since last frame, one should be emitted + // even if bitrate target is then exceeded. + EXPECT_EQ(kTl0Flags, LibvpxVp8Encoder::EncodeFlags( + NextFrameConfig(0, kTwoSecondsLater + 90))); +} + +TEST_F(ScreenshareLayerTest, UpdatesHistograms) { + metrics::Reset(); + bool trigger_drop = false; + bool dropped_frame = false; + bool overshoot = false; + const int kTl0Qp = 35; + const int kTl1Qp = 30; + for (int64_t timestamp = 0; + timestamp < kTimestampDelta5Fps * 5 * metrics::kMinRunTimeInSeconds; + timestamp += kTimestampDelta5Fps) { + tl_config_ = NextFrameConfig(0, timestamp); + if (tl_config_.drop_frame) { + dropped_frame = true; + continue; + } + int flags = LibvpxVp8Encoder::EncodeFlags(tl_config_); + if (flags != -1) + cfg_ = layers_->UpdateConfiguration(0); + + if (timestamp >= kTimestampDelta5Fps * 5 && !overshoot && flags != -1) { + // Simulate one overshoot. + layers_->OnEncodeDone(0, timestamp, 0, false, 0, nullptr); + overshoot = true; + } + + if (flags == kTl0Flags) { + if (timestamp >= kTimestampDelta5Fps * 20 && !trigger_drop) { + // Simulate a too large frame, to cause frame drop. + layers_->OnEncodeDone(0, timestamp, frame_size_ * 10, false, kTl0Qp, + IgnoredCodecSpecificInfo()); + trigger_drop = true; + } else { + layers_->OnEncodeDone(0, timestamp, frame_size_, false, kTl0Qp, + IgnoredCodecSpecificInfo()); + } + } else if (flags == kTl1Flags || flags == kTl1SyncFlags) { + layers_->OnEncodeDone(0, timestamp, frame_size_, false, kTl1Qp, + IgnoredCodecSpecificInfo()); + } else if (flags == -1) { + dropped_frame = true; + } else { + RTC_DCHECK_NOTREACHED() << "Unexpected flags"; + } + clock_.AdvanceTime(TimeDelta::Millis(1000 / 5)); + } + + EXPECT_TRUE(overshoot); + EXPECT_TRUE(dropped_frame); + + layers_.reset(); // Histograms are reported on destruction. + + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Video.Screenshare.Layer0.FrameRate")); + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Video.Screenshare.Layer1.FrameRate")); + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Video.Screenshare.FramesPerDrop")); + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Video.Screenshare.FramesPerOvershoot")); + EXPECT_METRIC_EQ(1, + metrics::NumSamples("WebRTC.Video.Screenshare.Layer0.Qp")); + EXPECT_METRIC_EQ(1, + metrics::NumSamples("WebRTC.Video.Screenshare.Layer1.Qp")); + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Video.Screenshare.Layer0.TargetBitrate")); + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Video.Screenshare.Layer1.TargetBitrate")); + + EXPECT_METRIC_GT( + metrics::MinSample("WebRTC.Video.Screenshare.Layer0.FrameRate"), 1); + EXPECT_METRIC_GT( + metrics::MinSample("WebRTC.Video.Screenshare.Layer1.FrameRate"), 1); + EXPECT_METRIC_GT(metrics::MinSample("WebRTC.Video.Screenshare.FramesPerDrop"), + 1); + EXPECT_METRIC_GT( + metrics::MinSample("WebRTC.Video.Screenshare.FramesPerOvershoot"), 1); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Video.Screenshare.Layer0.Qp", kTl0Qp)); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Video.Screenshare.Layer1.Qp", kTl1Qp)); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Video.Screenshare.Layer0.TargetBitrate", + kDefaultTl0BitrateKbps)); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Video.Screenshare.Layer1.TargetBitrate", + kDefaultTl1BitrateKbps)); +} + +TEST_F(ScreenshareLayerTest, RespectsConfiguredFramerate) { + int64_t kTestSpanMs = 2000; + int64_t kFrameIntervalsMs = 1000 / kFrameRate; + + uint32_t timestamp = 1234; + int num_input_frames = 0; + int num_discarded_frames = 0; + + // Send at regular rate - no drops expected. + for (int64_t i = 0; i < kTestSpanMs; i += kFrameIntervalsMs) { + if (NextFrameConfig(0, timestamp).drop_frame) { + ++num_discarded_frames; + } else { + size_t frame_size_bytes = kDefaultTl0BitrateKbps * kFrameIntervalsMs / 8; + layers_->OnEncodeDone(0, timestamp, frame_size_bytes, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + } + timestamp += kFrameIntervalsMs * 90; + clock_.AdvanceTime(TimeDelta::Millis(kFrameIntervalsMs)); + + ++num_input_frames; + } + EXPECT_EQ(0, num_discarded_frames); + + // Send at twice the configured rate - drop every other frame. + num_input_frames = 0; + num_discarded_frames = 0; + for (int64_t i = 0; i < kTestSpanMs; i += kFrameIntervalsMs / 2) { + if (NextFrameConfig(0, timestamp).drop_frame) { + ++num_discarded_frames; + } else { + size_t frame_size_bytes = kDefaultTl0BitrateKbps * kFrameIntervalsMs / 8; + layers_->OnEncodeDone(0, timestamp, frame_size_bytes, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + } + timestamp += kFrameIntervalsMs * 90 / 2; + clock_.AdvanceTime(TimeDelta::Millis(kFrameIntervalsMs)); + ++num_input_frames; + } + + // Allow for some rounding errors in the measurements. + EXPECT_NEAR(num_discarded_frames, num_input_frames / 2, 2); +} + +TEST_F(ScreenshareLayerTest, 2LayersSyncAtOvershootDrop) { + // Run grace period so we have existing frames in both TL0 and Tl1. + EXPECT_TRUE(RunGracePeriod()); + + // Move ahead until we have a sync frame in TL1. + EXPECT_EQ(kTl1SyncFlags, SkipUntilTlAndSync(1, true)); + ASSERT_TRUE(tl_config_.layer_sync); + + // Simulate overshoot of this frame. + layers_->OnEncodeDone(0, timestamp_, 0, false, 0, nullptr); + + cfg_ = layers_->UpdateConfiguration(0); + EXPECT_EQ(kTl1SyncFlags, LibvpxVp8Encoder::EncodeFlags(tl_config_)); + + CodecSpecificInfo new_info; + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + &new_info); + EXPECT_TRUE(new_info.codecSpecific.VP8.layerSync); +} + +TEST_F(ScreenshareLayerTest, DropOnTooShortFrameInterval) { + // Run grace period so we have existing frames in both TL0 and Tl1. + EXPECT_TRUE(RunGracePeriod()); + + // Add a large gap, so there's plenty of room in the rate tracker. + timestamp_ += kTimestampDelta5Fps * 3; + EXPECT_FALSE(NextFrameConfig(0, timestamp_).drop_frame); + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, kDefaultQp, + IgnoredCodecSpecificInfo()); + + // Frame interval below 90% if desired time is not allowed, try inserting + // frame just before this limit. + const int64_t kMinFrameInterval = (kTimestampDelta5Fps * 85) / 100; + timestamp_ += kMinFrameInterval - 90; + EXPECT_TRUE(NextFrameConfig(0, timestamp_).drop_frame); + + // Try again at the limit, now it should pass. + timestamp_ += 90; + EXPECT_FALSE(NextFrameConfig(0, timestamp_).drop_frame); +} + +TEST_F(ScreenshareLayerTest, AdjustsBitrateWhenDroppingFrames) { + const uint32_t kTimestampDelta10Fps = kTimestampDelta5Fps / 2; + const int kNumFrames = 30; + ASSERT_TRUE(cfg_.rc_target_bitrate.has_value()); + const uint32_t default_bitrate = cfg_.rc_target_bitrate.value(); + layers_->OnRatesUpdated(0, kDefault2TlBitratesBps, 10); + + int num_dropped_frames = 0; + for (int i = 0; i < kNumFrames; ++i) { + if (EncodeFrame(false) == -1) + ++num_dropped_frames; + timestamp_ += kTimestampDelta10Fps; + } + cfg_ = layers_->UpdateConfiguration(0); + + EXPECT_EQ(num_dropped_frames, kNumFrames / 2); + EXPECT_EQ(cfg_.rc_target_bitrate, default_bitrate * 2); +} + +TEST_F(ScreenshareLayerTest, UpdatesConfigurationAfterRateChange) { + // Set inital rate again, no need to update configuration. + layers_->OnRatesUpdated(0, kDefault2TlBitratesBps, kFrameRate); + cfg_ = layers_->UpdateConfiguration(0); + + // Rate changed, now update config. + std::vector<uint32_t> bitrates = kDefault2TlBitratesBps; + bitrates[1] -= 100000; + layers_->OnRatesUpdated(0, bitrates, 5); + cfg_ = layers_->UpdateConfiguration(0); + + // Changed rate, but then set changed rate again before trying to update + // configuration, update should still apply. + bitrates[1] -= 100000; + layers_->OnRatesUpdated(0, bitrates, 5); + layers_->OnRatesUpdated(0, bitrates, 5); + cfg_ = layers_->UpdateConfiguration(0); +} + +TEST_F(ScreenshareLayerTest, MaxQpRestoredAfterDoubleDrop) { + // Run grace period so we have existing frames in both TL0 and Tl1. + EXPECT_TRUE(RunGracePeriod()); + + // Move ahead until we have a sync frame in TL1. + EXPECT_EQ(kTl1SyncFlags, SkipUntilTlAndSync(1, true)); + ASSERT_TRUE(tl_config_.layer_sync); + + // Simulate overshoot of this frame. + layers_->OnEncodeDone(0, timestamp_, 0, false, -1, nullptr); + + // Simulate re-encoded frame. + layers_->OnEncodeDone(0, timestamp_, 1, false, max_qp_, + IgnoredCodecSpecificInfo()); + + // Next frame, expect boosted quality. + // Slightly alter bitrate between each frame. + std::vector<uint32_t> kDefault2TlBitratesBpsAlt = kDefault2TlBitratesBps; + kDefault2TlBitratesBpsAlt[1] += 4000; + layers_->OnRatesUpdated(0, kDefault2TlBitratesBpsAlt, kFrameRate); + EXPECT_EQ(kTl1Flags, SkipUntilTlAndSync(1, false)); + EXPECT_TRUE(config_updated_); + EXPECT_LT(cfg_.rc_max_quantizer, max_qp_); + ASSERT_TRUE(cfg_.rc_max_quantizer.has_value()); + const uint32_t adjusted_qp = cfg_.rc_max_quantizer.value(); + + // Simulate overshoot of this frame. + layers_->OnEncodeDone(0, timestamp_, 0, false, -1, nullptr); + + // Simulate re-encoded frame. + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, max_qp_, + IgnoredCodecSpecificInfo()); + + // A third frame, expect boosted quality. + layers_->OnRatesUpdated(0, kDefault2TlBitratesBps, kFrameRate); + EXPECT_EQ(kTl1Flags, SkipUntilTlAndSync(1, false)); + EXPECT_TRUE(config_updated_); + EXPECT_LT(cfg_.rc_max_quantizer, max_qp_); + EXPECT_EQ(adjusted_qp, cfg_.rc_max_quantizer); + + // Frame encoded. + layers_->OnEncodeDone(0, timestamp_, frame_size_, false, max_qp_, + IgnoredCodecSpecificInfo()); + + // A fourth frame, max qp should be restored. + layers_->OnRatesUpdated(0, kDefault2TlBitratesBpsAlt, kFrameRate); + EXPECT_EQ(kTl1Flags, SkipUntilTlAndSync(1, false)); + EXPECT_EQ(cfg_.rc_max_quantizer, max_qp_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers.h new file mode 100644 index 0000000000..9576fb27be --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_TEMPORAL_LAYERS_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_TEMPORAL_LAYERS_H_ + +// TODO(webrtc:9012) Remove this file when downstream projects have updated. +#include "api/video_codecs/vp8_temporal_layers.h" + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_TEMPORAL_LAYERS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers_checker.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers_checker.cc new file mode 100644 index 0000000000..5aebd2c526 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/temporal_layers_checker.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/include/temporal_layers_checker.h" + +#include <memory> + +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "modules/video_coding/codecs/vp8/default_temporal_layers.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +std::unique_ptr<TemporalLayersChecker> +TemporalLayersChecker::CreateTemporalLayersChecker(Vp8TemporalLayersType type, + int num_temporal_layers) { + switch (type) { + case Vp8TemporalLayersType::kFixedPattern: + return std::make_unique<DefaultTemporalLayersChecker>( + num_temporal_layers); + case Vp8TemporalLayersType::kBitrateDynamic: + // Conference mode temporal layering for screen content in base stream. + return std::make_unique<TemporalLayersChecker>(num_temporal_layers); + } + RTC_CHECK_NOTREACHED(); +} + +TemporalLayersChecker::TemporalLayersChecker(int num_temporal_layers) + : num_temporal_layers_(num_temporal_layers), + sequence_number_(0), + last_sync_sequence_number_(0), + last_tl0_sequence_number_(0) {} + +bool TemporalLayersChecker::CheckAndUpdateBufferState( + BufferState* state, + bool* need_sync, + bool frame_is_keyframe, + uint8_t temporal_layer, + Vp8FrameConfig::BufferFlags flags, + uint32_t sequence_number, + uint32_t* lowest_sequence_referenced) { + if (flags & Vp8FrameConfig::BufferFlags::kReference) { + if (state->temporal_layer > 0 && !state->is_keyframe) { + *need_sync = false; + } + if (!state->is_keyframe && !frame_is_keyframe && + state->sequence_number < *lowest_sequence_referenced) { + *lowest_sequence_referenced = state->sequence_number; + } + if (!frame_is_keyframe && !state->is_keyframe && + state->temporal_layer > temporal_layer) { + RTC_LOG(LS_ERROR) << "Frame is referencing higher temporal layer."; + return false; + } + } + if ((flags & Vp8FrameConfig::BufferFlags::kUpdate)) { + state->temporal_layer = temporal_layer; + state->sequence_number = sequence_number; + state->is_keyframe = frame_is_keyframe; + } + if (frame_is_keyframe) + state->is_keyframe = true; + return true; +} + +bool TemporalLayersChecker::CheckTemporalConfig( + bool frame_is_keyframe, + const Vp8FrameConfig& frame_config) { + if (frame_config.drop_frame || + frame_config.packetizer_temporal_idx == kNoTemporalIdx) { + return true; + } + ++sequence_number_; + if (frame_config.packetizer_temporal_idx >= num_temporal_layers_ || + (frame_config.packetizer_temporal_idx == kNoTemporalIdx && + num_temporal_layers_ > 1)) { + RTC_LOG(LS_ERROR) << "Incorrect temporal layer set for frame: " + << frame_config.packetizer_temporal_idx + << " num_temporal_layers: " << num_temporal_layers_; + return false; + } + + uint32_t lowest_sequence_referenced = sequence_number_; + bool need_sync = frame_config.packetizer_temporal_idx > 0 && + frame_config.packetizer_temporal_idx != kNoTemporalIdx; + + if (!CheckAndUpdateBufferState( + &last_, &need_sync, frame_is_keyframe, + frame_config.packetizer_temporal_idx, frame_config.last_buffer_flags, + sequence_number_, &lowest_sequence_referenced)) { + RTC_LOG(LS_ERROR) << "Error in the Last buffer"; + return false; + } + if (!CheckAndUpdateBufferState(&golden_, &need_sync, frame_is_keyframe, + frame_config.packetizer_temporal_idx, + frame_config.golden_buffer_flags, + sequence_number_, + &lowest_sequence_referenced)) { + RTC_LOG(LS_ERROR) << "Error in the Golden buffer"; + return false; + } + if (!CheckAndUpdateBufferState( + &arf_, &need_sync, frame_is_keyframe, + frame_config.packetizer_temporal_idx, frame_config.arf_buffer_flags, + sequence_number_, &lowest_sequence_referenced)) { + RTC_LOG(LS_ERROR) << "Error in the Arf buffer"; + return false; + } + + if (lowest_sequence_referenced < last_sync_sequence_number_ && + !frame_is_keyframe) { + RTC_LOG(LS_ERROR) << "Reference past the last sync frame. Referenced " + << lowest_sequence_referenced << ", but sync was at " + << last_sync_sequence_number_; + return false; + } + + if (frame_config.packetizer_temporal_idx == 0) { + last_tl0_sequence_number_ = sequence_number_; + } + + if (frame_is_keyframe) { + last_sync_sequence_number_ = sequence_number_; + } + + if (need_sync) { + last_sync_sequence_number_ = last_tl0_sequence_number_; + } + + // Ignore sync flag on key-frames as it really doesn't matter. + if (need_sync != frame_config.layer_sync && !frame_is_keyframe) { + RTC_LOG(LS_ERROR) << "Sync bit is set incorrectly on a frame. Expected: " + << need_sync << " Actual: " << frame_config.layer_sync; + return false; + } + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc new file mode 100644 index 0000000000..8cf761742e --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc @@ -0,0 +1,913 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#include <memory> + +#include "api/test/create_frame_generator.h" +#include "api/test/frame_generator_interface.h" +#include "api/test/mock_video_decoder.h" +#include "api/test/mock_video_encoder.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp8_temporal_layers.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "common_video/test/utilities.h" +#include "modules/video_coding/codecs/interface/mock_libvpx_interface.h" +#include "modules/video_coding/codecs/test/video_codec_unittest.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h" +#include "modules/video_coding/utility/vp8_header_parser.h" +#include "rtc_base/time_utils.h" +#include "test/field_trial.h" +#include "test/mappable_native_buffer.h" +#include "test/video_codec_settings.h" + +namespace webrtc { + +using ::testing::_; +using ::testing::AllOf; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Field; +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::Return; +using EncoderInfo = webrtc::VideoEncoder::EncoderInfo; +using FramerateFractions = + absl::InlinedVector<uint8_t, webrtc::kMaxTemporalStreams>; + +namespace { +constexpr uint32_t kLegacyScreenshareTl0BitrateKbps = 200; +constexpr uint32_t kLegacyScreenshareTl1BitrateKbps = 1000; +constexpr uint32_t kInitialTimestampRtp = 123; +constexpr int64_t kTestNtpTimeMs = 456; +constexpr int64_t kInitialTimestampMs = 789; +constexpr int kNumCores = 1; +constexpr size_t kMaxPayloadSize = 1440; +constexpr int kWidth = 172; +constexpr int kHeight = 144; +constexpr float kFramerateFps = 30; + +const VideoEncoder::Capabilities kCapabilities(false); +const VideoEncoder::Settings kSettings(kCapabilities, + kNumCores, + kMaxPayloadSize); +} // namespace + +class TestVp8Impl : public VideoCodecUnitTest { + protected: + std::unique_ptr<VideoEncoder> CreateEncoder() override { + return VP8Encoder::Create(); + } + + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return VP8Decoder::Create(); + } + + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kVideoCodecVP8, codec_settings); + codec_settings->width = kWidth; + codec_settings->height = kHeight; + codec_settings->SetVideoEncoderComplexity( + VideoCodecComplexity::kComplexityNormal); + } + + void EncodeAndWaitForFrame(const VideoFrame& input_frame, + EncodedImage* encoded_frame, + CodecSpecificInfo* codec_specific_info, + bool keyframe = false) { + std::vector<VideoFrameType> frame_types; + if (keyframe) { + frame_types.emplace_back(VideoFrameType::kVideoFrameKey); + } else { + frame_types.emplace_back(VideoFrameType::kVideoFrameDelta); + } + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(input_frame, &frame_types)); + ASSERT_TRUE(WaitForEncodedFrame(encoded_frame, codec_specific_info)); + VerifyQpParser(*encoded_frame); + EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType); + EXPECT_EQ(0, encoded_frame->SpatialIndex()); + } + + void EncodeAndExpectFrameWith(const VideoFrame& input_frame, + uint8_t temporal_idx, + bool keyframe = false) { + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info, + keyframe); + EXPECT_EQ(temporal_idx, codec_specific_info.codecSpecific.VP8.temporalIdx); + } + + void VerifyQpParser(const EncodedImage& encoded_frame) const { + int qp; + EXPECT_GT(encoded_frame.size(), 0u); + ASSERT_TRUE(vp8::GetQp(encoded_frame.data(), encoded_frame.size(), &qp)); + EXPECT_EQ(encoded_frame.qp_, qp) << "Encoder QP != parsed bitstream QP."; + } +}; + +TEST_F(TestVp8Impl, ErrorResilienceDisabledForNoTemporalLayers) { + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 1; + + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + EXPECT_CALL(*vpx, + codec_enc_init( + _, _, Field(&vpx_codec_enc_cfg_t::g_error_resilient, 0), _)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings_, kSettings)); +} + +TEST_F(TestVp8Impl, DefaultErrorResilienceEnabledForTemporalLayers) { + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 2; + codec_settings_.VP8()->numberOfTemporalLayers = 2; + + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + EXPECT_CALL(*vpx, + codec_enc_init(_, _, + Field(&vpx_codec_enc_cfg_t::g_error_resilient, + VPX_ERROR_RESILIENT_DEFAULT), + _)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings_, kSettings)); +} + +TEST_F(TestVp8Impl, + PartitionErrorResilienceEnabledForTemporalLayersWithFieldTrial) { + test::ScopedFieldTrials field_trials( + "WebRTC-VP8-ForcePartitionResilience/Enabled/"); + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 2; + codec_settings_.VP8()->numberOfTemporalLayers = 2; + + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + EXPECT_CALL(*vpx, + codec_enc_init(_, _, + Field(&vpx_codec_enc_cfg_t::g_error_resilient, + VPX_ERROR_RESILIENT_PARTITIONS), + _)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings_, kSettings)); +} + +TEST_F(TestVp8Impl, SetRates) { + codec_settings_.SetFrameDropEnabled(true); + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings_, + VideoEncoder::Settings(kCapabilities, 1, 1000))); + + const uint32_t kBitrateBps = 300000; + VideoBitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate(0, 0, kBitrateBps); + EXPECT_CALL( + *vpx, + codec_enc_config_set( + _, AllOf(Field(&vpx_codec_enc_cfg_t::rc_target_bitrate, + kBitrateBps / 1000), + Field(&vpx_codec_enc_cfg_t::rc_undershoot_pct, 100u), + Field(&vpx_codec_enc_cfg_t::rc_overshoot_pct, 15u), + Field(&vpx_codec_enc_cfg_t::rc_buf_sz, 1000u), + Field(&vpx_codec_enc_cfg_t::rc_buf_optimal_sz, 600u), + Field(&vpx_codec_enc_cfg_t::rc_dropframe_thresh, 30u)))) + .WillOnce(Return(VPX_CODEC_OK)); + encoder.SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, static_cast<double>(codec_settings_.maxFramerate))); +} + +TEST_F(TestVp8Impl, EncodeFrameAndRelease) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_UNINITIALIZED, + encoder_->Encode(NextInputFrame(), nullptr)); +} + +TEST_F(TestVp8Impl, EncodeNv12FrameSimulcast) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12, + absl::nullopt); + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_UNINITIALIZED, + encoder_->Encode(NextInputFrame(), nullptr)); +} + +TEST_F(TestVp8Impl, EncodeI420FrameAfterNv12Frame) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12, + absl::nullopt); + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, + absl::nullopt); + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_UNINITIALIZED, + encoder_->Encode(NextInputFrame(), nullptr)); +} + +TEST_F(TestVp8Impl, Configure) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Release()); + EXPECT_TRUE(decoder_->Configure({})); +} + +TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) { + VideoFrame input_frame = NextInputFrame(); + input_frame.set_timestamp(kInitialTimestampRtp); + input_frame.set_timestamp_us(kInitialTimestampMs * + rtc::kNumMicrosecsPerMillisec); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info); + + EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp()); + EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth)); + EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight)); +} + +TEST_F(TestVp8Impl, + EncoderFillsResolutionInCodecAgnosticSectionOfCodecSpecificInfo) { + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + ASSERT_TRUE(codec_specific_info.template_structure); + EXPECT_THAT(codec_specific_info.template_structure->resolutions, + ElementsAre(RenderResolution(kWidth, kHeight))); +} + +TEST_F(TestVp8Impl, DecodedQpEqualsEncodedQp) { + VideoFrame input_frame = NextInputFrame(); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info); + + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, -1)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + ASSERT_TRUE(decoded_qp); + EXPECT_GT(I420PSNR(&input_frame, decoded_frame.get()), 36); + EXPECT_EQ(encoded_frame.qp_, *decoded_qp); +} + +TEST_F(TestVp8Impl, ChecksSimulcastSettings) { + codec_settings_.numberOfSimulcastStreams = 2; + // Resolutions are not in ascending order, temporal layers do not match. + codec_settings_.simulcastStream[0] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 2, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth / 2, + .height = kHeight / 2, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + codec_settings_.numberOfSimulcastStreams = 3; + // Resolutions are not in ascending order. + codec_settings_.simulcastStream[0] = {.width = kWidth / 2, + .height = kHeight / 2, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth / 2 - 1, + .height = kHeight / 2 - 1, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[2] = {.width = kWidth, + .height = kHeight, + .maxFramerate = 30, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + // Resolutions are not in ascending order. + codec_settings_.simulcastStream[0] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[2] = {.width = kWidth - 1, + .height = kHeight - 1, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + // Temporal layers do not match. + codec_settings_.simulcastStream[0] = {.width = kWidth / 4, + .height = kHeight / 4, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth / 2, + .height = kHeight / 2, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 2, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[2] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 3, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + // Resolutions do not match codec config. + codec_settings_.simulcastStream[0] = {.width = kWidth / 4 + 1, + .height = kHeight / 4 + 1, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth / 2 + 2, + .height = kHeight / 2 + 2, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[2] = {.width = kWidth + 4, + .height = kHeight + 4, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + // Everything fine: scaling by 2, top resolution matches video, temporal + // settings are the same for all layers. + codec_settings_.simulcastStream[0] = {.width = kWidth / 4, + .height = kHeight / 4, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth / 2, + .height = kHeight / 2, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[2] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + // Everything fine: custom scaling, top resolution matches video, temporal + // settings are the same for all layers. + codec_settings_.simulcastStream[0] = {.width = kWidth / 4, + .height = kHeight / 4, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[1] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + codec_settings_.simulcastStream[2] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_AlignedStrideEncodeDecode DISABLED_AlignedStrideEncodeDecode +#else +#define MAYBE_AlignedStrideEncodeDecode AlignedStrideEncodeDecode +#endif +TEST_F(TestVp8Impl, MAYBE_AlignedStrideEncodeDecode) { + VideoFrame input_frame = NextInputFrame(); + input_frame.set_timestamp(kInitialTimestampRtp); + input_frame.set_timestamp_us(kInitialTimestampMs * + rtc::kNumMicrosecsPerMillisec); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info); + + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + encoded_frame.ntp_time_ms_ = kTestNtpTimeMs; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, -1)); + + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + // Compute PSNR on all planes (faster than SSIM). + EXPECT_GT(I420PSNR(&input_frame, decoded_frame.get()), 36); + EXPECT_EQ(kInitialTimestampRtp, decoded_frame->timestamp()); +} + +TEST_F(TestVp8Impl, EncoderWith2TemporalLayers) { + codec_settings_.VP8()->numberOfTemporalLayers = 2; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Temporal layer 0. + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + EXPECT_EQ(0, codec_specific_info.codecSpecific.VP8.temporalIdx); + // Temporal layer 1. + EncodeAndExpectFrameWith(NextInputFrame(), 1); + // Temporal layer 0. + EncodeAndExpectFrameWith(NextInputFrame(), 0); + // Temporal layer 1. + EncodeAndExpectFrameWith(NextInputFrame(), 1); +} + +TEST_F(TestVp8Impl, ScalingDisabledIfAutomaticResizeOff) { + codec_settings_.VP8()->automaticResizeOn = false; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoEncoder::ScalingSettings settings = + encoder_->GetEncoderInfo().scaling_settings; + EXPECT_FALSE(settings.thresholds.has_value()); +} + +TEST_F(TestVp8Impl, ScalingEnabledIfAutomaticResizeOn) { + codec_settings_.SetFrameDropEnabled(true); + codec_settings_.VP8()->automaticResizeOn = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoEncoder::ScalingSettings settings = + encoder_->GetEncoderInfo().scaling_settings; + EXPECT_TRUE(settings.thresholds.has_value()); + EXPECT_EQ(kDefaultMinPixelsPerFrame, settings.min_pixels_per_frame); +} + +TEST_F(TestVp8Impl, DontDropKeyframes) { + // Set very high resolution to trigger overuse more easily. + const int kScreenWidth = 1920; + const int kScreenHeight = 1080; + + codec_settings_.width = kScreenWidth; + codec_settings_.height = kScreenHeight; + + // Screensharing has the internal frame dropper off, and instead per frame + // asks ScreenshareLayers to decide if it should be dropped or not. + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + // ScreenshareLayers triggers on 2 temporal layers and 1000kbps max bitrate. + codec_settings_.VP8()->numberOfTemporalLayers = 2; + codec_settings_.maxBitrate = 1000; + + // Reset the frame generator with large number of squares, leading to lots of + // details and high probability of overshoot. + input_frame_generator_ = test::CreateSquareFrameGenerator( + codec_settings_.width, codec_settings_.height, + test::FrameGeneratorInterface::OutputType::kI420, + /* num_squares = */ absl::optional<int>(300)); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + // Bitrate only enough for TL0. + bitrate_allocation.SetBitrate(0, 0, 200000); + encoder_->SetRates( + VideoEncoder::RateControlParameters(bitrate_allocation, 5.0)); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info, + true); + EncodeAndExpectFrameWith(NextInputFrame(), 0, true); + EncodeAndExpectFrameWith(NextInputFrame(), 0, true); + EncodeAndExpectFrameWith(NextInputFrame(), 0, true); +} + +TEST_F(TestVp8Impl, KeepsTimestampOnReencode) { + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + + // Settings needed to trigger ScreenshareLayers usage, which is required for + // overshoot-drop-reencode logic. + codec_settings_.maxBitrate = 1000; + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP8()->numberOfTemporalLayers = 2; + codec_settings_.legacy_conference_mode = true; + + EXPECT_CALL(*vpx, img_wrap(_, _, _, _, _, _)) + .WillOnce(Invoke([](vpx_image_t* img, vpx_img_fmt_t fmt, unsigned int d_w, + unsigned int d_h, unsigned int stride_align, + unsigned char* img_data) { + img->fmt = fmt; + img->d_w = d_w; + img->d_h = d_h; + img->img_data = img_data; + return img; + })); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder.InitEncode(&codec_settings_, + VideoEncoder::Settings(kCapabilities, 1, 1000))); + MockEncodedImageCallback callback; + encoder.RegisterEncodeCompleteCallback(&callback); + + // Simulate overshoot drop, re-encode: encode function will be called twice + // with the same parameters. codec_get_cx_data() will by default return no + // image data and be interpreted as drop. + EXPECT_CALL(*vpx, codec_encode(_, _, /* pts = */ 0, _, _, _)) + .Times(2) + .WillRepeatedly(Return(vpx_codec_err_t::VPX_CODEC_OK)); + + auto delta_frame = + std::vector<VideoFrameType>{VideoFrameType::kVideoFrameDelta}; + encoder.Encode(NextInputFrame(), &delta_frame); +} + +TEST(LibvpxVp8EncoderTest, GetEncoderInfoReturnsStaticInformation) { + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + + const auto info = encoder.GetEncoderInfo(); + + EXPECT_FALSE(info.supports_native_handle); + EXPECT_FALSE(info.is_hardware_accelerated); + EXPECT_TRUE(info.supports_simulcast); + EXPECT_EQ(info.implementation_name, "libvpx"); + EXPECT_EQ(info.requested_resolution_alignment, 1); + EXPECT_THAT(info.preferred_pixel_formats, + testing::UnorderedElementsAre(VideoFrameBuffer::Type::kNV12, + VideoFrameBuffer::Type::kI420)); +} + +TEST(LibvpxVp8EncoderTest, RequestedResolutionAlignmentFromFieldTrial) { + test::ScopedFieldTrials field_trials( + "WebRTC-VP8-GetEncoderInfoOverride/" + "requested_resolution_alignment:10/"); + + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + + EXPECT_EQ(encoder.GetEncoderInfo().requested_resolution_alignment, 10); + EXPECT_FALSE( + encoder.GetEncoderInfo().apply_alignment_to_all_simulcast_layers); + EXPECT_TRUE(encoder.GetEncoderInfo().resolution_bitrate_limits.empty()); +} + +TEST(LibvpxVp8EncoderTest, ResolutionBitrateLimitsFromFieldTrial) { + test::ScopedFieldTrials field_trials( + "WebRTC-VP8-GetEncoderInfoOverride/" + "frame_size_pixels:123|456|789," + "min_start_bitrate_bps:11000|22000|33000," + "min_bitrate_bps:44000|55000|66000," + "max_bitrate_bps:77000|88000|99000/"); + + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + + EXPECT_THAT( + encoder.GetEncoderInfo().resolution_bitrate_limits, + ::testing::ElementsAre( + VideoEncoder::ResolutionBitrateLimits{123, 11000, 44000, 77000}, + VideoEncoder::ResolutionBitrateLimits{456, 22000, 55000, 88000}, + VideoEncoder::ResolutionBitrateLimits{789, 33000, 66000, 99000})); +} + +TEST(LibvpxVp8EncoderTest, + GetEncoderInfoReturnsEmptyResolutionBitrateLimitsByDefault) { + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + VP8Encoder::Settings()); + + const auto info = encoder.GetEncoderInfo(); + + EXPECT_TRUE(info.resolution_bitrate_limits.empty()); +} + +TEST(LibvpxVp8EncoderTest, + GetEncoderInfoReturnsResolutionBitrateLimitsAsConfigured) { + std::vector<VideoEncoder::ResolutionBitrateLimits> resolution_bitrate_limits = + {VideoEncoder::ResolutionBitrateLimits(/*frame_size_pixels=*/640 * 360, + /*min_start_bitrate_bps=*/300, + /*min_bitrate_bps=*/100, + /*max_bitrate_bps=*/1000), + VideoEncoder::ResolutionBitrateLimits(320 * 180, 100, 30, 500)}; + VP8Encoder::Settings settings; + settings.resolution_bitrate_limits = resolution_bitrate_limits; + + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp8Encoder encoder((std::unique_ptr<LibvpxInterface>(vpx)), + std::move(settings)); + + const auto info = encoder.GetEncoderInfo(); + + EXPECT_EQ(info.resolution_bitrate_limits, resolution_bitrate_limits); +} + +TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationNoLayers) { + FramerateFractions expected_fps_allocation[kMaxSpatialLayers] = { + FramerateFractions(1, EncoderInfo::kMaxFramerateFraction)}; + + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationTwoTemporalLayers) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + codec_settings_.numberOfSimulcastStreams = 1; + codec_settings_.simulcastStream[0].active = true; + codec_settings_.simulcastStream[0].targetBitrate = 100; + codec_settings_.simulcastStream[0].maxBitrate = 100; + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 2; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction); + + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationThreeTemporalLayers) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + codec_settings_.numberOfSimulcastStreams = 1; + codec_settings_.simulcastStream[0].active = true; + codec_settings_.simulcastStream[0].targetBitrate = 100; + codec_settings_.simulcastStream[0].maxBitrate = 100; + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 4); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction); + + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationScreenshareLayers) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + codec_settings_.numberOfSimulcastStreams = 1; + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.simulcastStream[0].active = true; + codec_settings_.simulcastStream[0].minBitrate = 30; + codec_settings_.simulcastStream[0].targetBitrate = + kLegacyScreenshareTl0BitrateKbps; + codec_settings_.simulcastStream[0].maxBitrate = + kLegacyScreenshareTl1BitrateKbps; + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 2; + codec_settings_.legacy_conference_mode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Expect empty vector, since this mode doesn't have a fixed framerate. + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationSimulcastVideo) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + + // Set up three simulcast streams with three temporal layers each. + codec_settings_.numberOfSimulcastStreams = 3; + for (int i = 0; i < codec_settings_.numberOfSimulcastStreams; ++i) { + codec_settings_.simulcastStream[i].active = true; + codec_settings_.simulcastStream[i].minBitrate = 30; + codec_settings_.simulcastStream[i].targetBitrate = 30; + codec_settings_.simulcastStream[i].maxBitrate = 30; + codec_settings_.simulcastStream[i].numberOfTemporalLayers = 3; + codec_settings_.simulcastStream[i].width = + codec_settings_.width >> + (codec_settings_.numberOfSimulcastStreams - i - 1); + codec_settings_.simulcastStream[i].height = + codec_settings_.height >> + (codec_settings_.numberOfSimulcastStreams - i - 1); + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 4); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction); + expected_fps_allocation[1] = expected_fps_allocation[0]; + expected_fps_allocation[2] = expected_fps_allocation[0]; + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); + + // Release encoder and re-init without temporal layers. + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + + // Sanity check fps allocation when not inited. + FramerateFractions default_fps_fraction[kMaxSpatialLayers]; + default_fps_fraction[0].push_back(EncoderInfo::kMaxFramerateFraction); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(default_fps_fraction)); + + for (int i = 0; i < codec_settings_.numberOfSimulcastStreams; ++i) { + codec_settings_.simulcastStream[i].numberOfTemporalLayers = 1; + } + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + for (size_t i = 0; i < 3; ++i) { + expected_fps_allocation[i].clear(); + expected_fps_allocation[i].push_back(EncoderInfo::kMaxFramerateFraction); + } + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +class TestVp8ImplForPixelFormat + : public TestVp8Impl, + public ::testing::WithParamInterface<VideoFrameBuffer::Type> { + public: + TestVp8ImplForPixelFormat() : TestVp8Impl(), mappable_type_(GetParam()) {} + + protected: + VideoFrameBuffer::Type mappable_type_; +}; + +TEST_P(TestVp8ImplForPixelFormat, EncodeNativeFrameSimulcast) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + + // Configure simulcast. + codec_settings_.numberOfSimulcastStreams = 3; + codec_settings_.simulcastStream[0] = {.width = kWidth / 4, + .height = kHeight / 4, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80, + .active = true}; + codec_settings_.simulcastStream[1] = {.width = kWidth / 2, + .height = kHeight / 2, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80, + .active = true}; + codec_settings_.simulcastStream[2] = {.width = kWidth, + .height = kHeight, + .maxFramerate = kFramerateFps, + .numberOfTemporalLayers = 1, + .maxBitrate = 4000, + .targetBitrate = 3000, + .minBitrate = 2000, + .qpMax = 80, + .active = true}; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Create a zero-conversion NV12 frame (calling ToI420 on it crashes). + VideoFrame input_frame = + test::CreateMappableNativeFrame(1, mappable_type_, kWidth, kHeight); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info); + + // After encoding, we expect one mapping per simulcast layer. + rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer = + test::GetMappableNativeBufferFromVideoFrame(input_frame); + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers = + mappable_buffer->GetMappedFramedBuffers(); + ASSERT_EQ(mapped_buffers.size(), 3u); + EXPECT_EQ(mapped_buffers[0]->type(), mappable_type_); + EXPECT_EQ(mapped_buffers[0]->width(), kWidth); + EXPECT_EQ(mapped_buffers[0]->height(), kHeight); + EXPECT_EQ(mapped_buffers[1]->type(), mappable_type_); + EXPECT_EQ(mapped_buffers[1]->width(), kWidth / 2); + EXPECT_EQ(mapped_buffers[1]->height(), kHeight / 2); + EXPECT_EQ(mapped_buffers[2]->type(), mappable_type_); + EXPECT_EQ(mapped_buffers[2]->width(), kWidth / 4); + EXPECT_EQ(mapped_buffers[2]->height(), kHeight / 4); + EXPECT_FALSE(mappable_buffer->DidConvertToI420()); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); +} + +INSTANTIATE_TEST_SUITE_P(All, + TestVp8ImplForPixelFormat, + ::testing::Values(VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.cc new file mode 100644 index 0000000000..9c7495ddf7 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp8/vp8_scalability.h" + +namespace webrtc { + +bool VP8SupportsScalabilityMode(ScalabilityMode scalability_mode) { + for (const auto& entry : kVP8SupportedScalabilityModes) { + if (entry == scalability_mode) { + return true; + } + } + return false; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.h b/third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.h new file mode 100644 index 0000000000..923f159118 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp8/vp8_scalability.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP8_VP8_SCALABILITY_H_ +#define MODULES_VIDEO_CODING_CODECS_VP8_VP8_SCALABILITY_H_ + +#include "api/video_codecs/scalability_mode.h" + +namespace webrtc { + +inline constexpr ScalabilityMode kVP8SupportedScalabilityModes[] = { + ScalabilityMode::kL1T1, ScalabilityMode::kL1T2, ScalabilityMode::kL1T3}; +bool VP8SupportsScalabilityMode(ScalabilityMode scalability_mode); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP8_VP8_SCALABILITY_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS b/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS new file mode 100644 index 0000000000..cc5cd70142 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+media/base", +] diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h new file mode 100644 index 0000000000..79d403ded3 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_ + +#include <memory> +#include <vector> + +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/sdp_video_format.h" +#include "media/base/codec.h" +#include "modules/video_coding/include/video_codec_interface.h" + +namespace webrtc { + +// Returns a vector with all supported internal VP9 profiles that we can +// negotiate in SDP, in order of preference. +std::vector<SdpVideoFormat> SupportedVP9Codecs( + bool add_scalability_modes = false); + +// Returns a vector with all supported internal VP9 decode profiles in order of +// preference. These will be availble for receive-only connections. +std::vector<SdpVideoFormat> SupportedVP9DecoderCodecs(); + +class VP9Encoder : public VideoEncoder { + public: + // Deprecated. Returns default implementation using VP9 Profile 0. + // TODO(emircan): Remove once this is no longer used. + static std::unique_ptr<VP9Encoder> Create(); + // Parses VP9 Profile from `codec` and returns the appropriate implementation. + static std::unique_ptr<VP9Encoder> Create(const cricket::VideoCodec& codec); + static bool SupportsScalabilityMode(ScalabilityMode scalability_mode); + + ~VP9Encoder() override {} +}; + +class VP9Decoder : public VideoDecoder { + public: + static std::unique_ptr<VP9Decoder> Create(); + + ~VP9Decoder() override {} +}; +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h new file mode 100644 index 0000000000..e6f644ec11 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/include/vp9_globals.h @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains codec dependent definitions that are needed in +// order to compile the WebRTC codebase, even if this codec is not used. + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_ + +#include <stdint.h> + +#include "modules/video_coding/codecs/interface/common_constants.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits +const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits +const uint8_t kNoSpatialIdx = 0xFF; +const uint8_t kNoGofIdx = 0xFF; +const uint8_t kNumVp9Buffers = 8; +const size_t kMaxVp9RefPics = 3; +const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits +const size_t kMaxVp9NumberOfSpatialLayers = 8; + +const size_t kMinVp9SpatialLayerLongSideLength = 240; +const size_t kMinVp9SpatialLayerShortSideLength = 135; + +enum TemporalStructureMode { + kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP... + kTemporalStructureMode2, // 2 temporal layers 01... + kTemporalStructureMode3, // 3 temporal layers 0212... + kTemporalStructureMode4 // 3 temporal layers 02120212... +}; + +struct GofInfoVP9 { + void SetGofInfoVP9(TemporalStructureMode tm) { + switch (tm) { + case kTemporalStructureMode1: + num_frames_in_gof = 1; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 1; + break; + case kTemporalStructureMode2: + num_frames_in_gof = 2; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 2; + + temporal_idx[1] = 1; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + break; + case kTemporalStructureMode3: + num_frames_in_gof = 4; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 4; + + temporal_idx[1] = 2; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + + temporal_idx[2] = 1; + temporal_up_switch[2] = true; + num_ref_pics[2] = 1; + pid_diff[2][0] = 2; + + temporal_idx[3] = 2; + temporal_up_switch[3] = true; + num_ref_pics[3] = 1; + pid_diff[3][0] = 1; + break; + case kTemporalStructureMode4: + num_frames_in_gof = 8; + temporal_idx[0] = 0; + temporal_up_switch[0] = true; + num_ref_pics[0] = 1; + pid_diff[0][0] = 4; + + temporal_idx[1] = 2; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + + temporal_idx[2] = 1; + temporal_up_switch[2] = false; + num_ref_pics[2] = 1; + pid_diff[2][0] = 2; + + temporal_idx[3] = 2; + temporal_up_switch[3] = true; + num_ref_pics[3] = 2; + pid_diff[3][0] = 1; + pid_diff[3][1] = 2; + + temporal_idx[4] = 0; + temporal_up_switch[4] = false; + num_ref_pics[4] = 1; + pid_diff[4][0] = 4; + + temporal_idx[5] = 2; + temporal_up_switch[5] = true; + num_ref_pics[5] = 2; + pid_diff[5][0] = 1; + pid_diff[5][1] = 2; + + temporal_idx[6] = 1; + temporal_up_switch[6] = false; + num_ref_pics[6] = 2; + pid_diff[6][0] = 2; + pid_diff[6][1] = 4; + + temporal_idx[7] = 2; + temporal_up_switch[7] = true; + num_ref_pics[7] = 2; + pid_diff[7][0] = 1; + pid_diff[7][1] = 2; + break; + default: + RTC_DCHECK_NOTREACHED(); + } + } + + void CopyGofInfoVP9(const GofInfoVP9& src) { + num_frames_in_gof = src.num_frames_in_gof; + for (size_t i = 0; i < num_frames_in_gof; ++i) { + temporal_idx[i] = src.temporal_idx[i]; + temporal_up_switch[i] = src.temporal_up_switch[i]; + num_ref_pics[i] = src.num_ref_pics[i]; + for (uint8_t r = 0; r < num_ref_pics[i]; ++r) { + pid_diff[i][r] = src.pid_diff[i][r]; + } + } + } + + size_t num_frames_in_gof; + uint8_t temporal_idx[kMaxVp9FramesInGof]; + bool temporal_up_switch[kMaxVp9FramesInGof]; + uint8_t num_ref_pics[kMaxVp9FramesInGof]; + uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics]; + uint16_t pid_start; +}; + +struct RTPVideoHeaderVP9 { + void InitRTPVideoHeaderVP9() { + inter_pic_predicted = false; + flexible_mode = false; + beginning_of_frame = false; + end_of_frame = false; + ss_data_available = false; + non_ref_for_inter_layer_pred = false; + picture_id = kNoPictureId; + max_picture_id = kMaxTwoBytePictureId; + tl0_pic_idx = kNoTl0PicIdx; + temporal_idx = kNoTemporalIdx; + spatial_idx = kNoSpatialIdx; + temporal_up_switch = false; + inter_layer_predicted = false; + gof_idx = kNoGofIdx; + num_ref_pics = 0; + num_spatial_layers = 1; + first_active_layer = 0; + end_of_picture = true; + } + + bool inter_pic_predicted; // This layer frame is dependent on previously + // coded frame(s). + bool flexible_mode; // This frame is in flexible mode. + bool beginning_of_frame; // True if this packet is the first in a VP9 layer + // frame. + bool end_of_frame; // True if this packet is the last in a VP9 layer frame. + bool ss_data_available; // True if SS data is available in this payload + // descriptor. + bool non_ref_for_inter_layer_pred; // True for frame which is not used as + // reference for inter-layer prediction. + int16_t picture_id; // PictureID index, 15 bits; + // kNoPictureId if PictureID does not exist. + int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF; + int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits; + // kNoTl0PicIdx means no value provided. + uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx. + uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx. + bool temporal_up_switch; // True if upswitch to higher frame rate is possible + // meaning subsequent higher temporal layer pictures + // will not depend on any picture before the current + // picture (in coding order) with temporal layer ID + // greater than `temporal_idx` of this frame. + bool inter_layer_predicted; // Frame is dependent on directly lower spatial + // layer frame. + + uint8_t gof_idx; // Index to predefined temporal frame info in SS data. + + uint8_t num_ref_pics; // Number of reference pictures used by this layer + // frame. + uint8_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID + // of the reference pictures. + int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures. + + // SS data. + size_t num_spatial_layers; // Always populated. + size_t first_active_layer; // Not sent on wire, used to adjust ss data. + bool spatial_layer_resolution_present; + uint16_t width[kMaxVp9NumberOfSpatialLayers]; + uint16_t height[kMaxVp9NumberOfSpatialLayers]; + GofInfoVP9 gof; + + bool end_of_picture; // This frame is the last frame in picture. +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_INCLUDE_VP9_GLOBALS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc new file mode 100644 index 0000000000..0e39cc638a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifdef RTC_ENABLE_VP9 + +#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h" + +#include <algorithm> + +#include "absl/strings/match.h" +#include "api/transport/field_trial_based_config.h" +#include "api/video/color_space.h" +#include "api/video/i010_buffer.h" +#include "common_video/include/video_frame_buffer.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "libyuv/include/libyuv/convert.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +namespace webrtc { +namespace { + +// Helper class for extracting VP9 colorspace. +ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t, + vpx_color_range_t range_t, + unsigned int bit_depth) { + ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified; + ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified; + ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified; + switch (space_t) { + case VPX_CS_BT_601: + case VPX_CS_SMPTE_170: + primaries = ColorSpace::PrimaryID::kSMPTE170M; + transfer = ColorSpace::TransferID::kSMPTE170M; + matrix = ColorSpace::MatrixID::kSMPTE170M; + break; + case VPX_CS_SMPTE_240: + primaries = ColorSpace::PrimaryID::kSMPTE240M; + transfer = ColorSpace::TransferID::kSMPTE240M; + matrix = ColorSpace::MatrixID::kSMPTE240M; + break; + case VPX_CS_BT_709: + primaries = ColorSpace::PrimaryID::kBT709; + transfer = ColorSpace::TransferID::kBT709; + matrix = ColorSpace::MatrixID::kBT709; + break; + case VPX_CS_BT_2020: + primaries = ColorSpace::PrimaryID::kBT2020; + switch (bit_depth) { + case 8: + transfer = ColorSpace::TransferID::kBT709; + break; + case 10: + transfer = ColorSpace::TransferID::kBT2020_10; + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + matrix = ColorSpace::MatrixID::kBT2020_NCL; + break; + case VPX_CS_SRGB: + primaries = ColorSpace::PrimaryID::kBT709; + transfer = ColorSpace::TransferID::kIEC61966_2_1; + matrix = ColorSpace::MatrixID::kBT709; + break; + default: + break; + } + + ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid; + switch (range_t) { + case VPX_CR_STUDIO_RANGE: + range = ColorSpace::RangeID::kLimited; + break; + case VPX_CR_FULL_RANGE: + range = ColorSpace::RangeID::kFull; + break; + default: + break; + } + return ColorSpace(primaries, transfer, matrix, range); +} + +} // namespace + +LibvpxVp9Decoder::LibvpxVp9Decoder() + : LibvpxVp9Decoder(FieldTrialBasedConfig()) {} +LibvpxVp9Decoder::LibvpxVp9Decoder(const FieldTrialsView& trials) + : decode_complete_callback_(nullptr), + inited_(false), + decoder_(nullptr), + key_frame_required_(true), + preferred_output_format_( + absl::StartsWith(trials.Lookup("WebRTC-NV12Decode"), "Enabled") + ? VideoFrameBuffer::Type::kNV12 + : VideoFrameBuffer::Type::kI420) {} + +LibvpxVp9Decoder::~LibvpxVp9Decoder() { + inited_ = true; // in order to do the actual release + Release(); + int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse(); + if (num_buffers_in_use > 0) { + // The frame buffers are reference counted and frames are exposed after + // decoding. There may be valid usage cases where previous frames are still + // referenced after ~LibvpxVp9Decoder that is not a leak. + RTC_LOG(LS_INFO) << num_buffers_in_use + << " Vp9FrameBuffers are still " + "referenced during ~LibvpxVp9Decoder."; + } +} + +bool LibvpxVp9Decoder::Configure(const Settings& settings) { + if (Release() < 0) { + return false; + } + + if (decoder_ == nullptr) { + decoder_ = new vpx_codec_ctx_t; + memset(decoder_, 0, sizeof(*decoder_)); + } + vpx_codec_dec_cfg_t cfg; + memset(&cfg, 0, sizeof(cfg)); + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // We focus on webrtc fuzzing here, not libvpx itself. Use single thread for + // fuzzing, because: + // - libvpx's VP9 single thread decoder is more fuzzer friendly. It detects + // errors earlier than the multi-threads version. + // - Make peak CPU usage under control (not depending on input) + cfg.threads = 1; +#else + const RenderResolution& resolution = settings.max_render_resolution(); + if (!resolution.Valid()) { + // Postpone configuring number of threads until resolution is known. + cfg.threads = 1; + } else { + // We want to use multithreading when decoding high resolution videos. But + // not too many in order to avoid overhead when many stream are decoded + // concurrently. + // Set 2 thread as target for 1280x720 pixel count, and then scale up + // linearly from there - but cap at physical core count. + // For common resolutions this results in: + // 1 for 360p + // 2 for 720p + // 4 for 1080p + // 8 for 1440p + // 18 for 4K + int num_threads = std::max( + 1, 2 * resolution.Width() * resolution.Height() / (1280 * 720)); + cfg.threads = std::min(settings.number_of_cores(), num_threads); + } +#endif + + current_settings_ = settings; + + vpx_codec_flags_t flags = 0; + if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) { + return false; + } + + if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) { + return false; + } + + inited_ = true; + // Always start with a complete key frame. + key_frame_required_ = true; + if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) { + if (!libvpx_buffer_pool_.Resize(*buffer_pool_size) || + !output_buffer_pool_.Resize(*buffer_pool_size)) { + return false; + } + } + + vpx_codec_err_t status = + vpx_codec_control(decoder_, VP9D_SET_LOOP_FILTER_OPT, 1); + if (status != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Failed to enable VP9D_SET_LOOP_FILTER_OPT. " + << vpx_codec_error(decoder_); + return false; + } + + return true; +} + +int LibvpxVp9Decoder::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (decode_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + if (input_image._frameType == VideoFrameType::kVideoFrameKey) { + absl::optional<Vp9UncompressedHeader> frame_info = + ParseUncompressedVp9Header( + rtc::MakeArrayView(input_image.data(), input_image.size())); + if (frame_info) { + RenderResolution frame_resolution(frame_info->frame_width, + frame_info->frame_height); + if (frame_resolution != current_settings_.max_render_resolution()) { + // Resolution has changed, tear down and re-init a new decoder in + // order to get correct sizing. + Release(); + current_settings_.set_max_render_resolution(frame_resolution); + if (!Configure(current_settings_)) { + RTC_LOG(LS_WARNING) << "Failed to re-init decoder."; + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + } + } else { + RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame."; + } + } + + // Always start with a complete key frame. + if (key_frame_required_) { + if (input_image._frameType != VideoFrameType::kVideoFrameKey) + return WEBRTC_VIDEO_CODEC_ERROR; + key_frame_required_ = false; + } + vpx_codec_iter_t iter = nullptr; + vpx_image_t* img; + const uint8_t* buffer = input_image.data(); + if (input_image.size() == 0) { + buffer = nullptr; // Triggers full frame concealment. + } + // During decode libvpx may get and release buffers from + // `libvpx_buffer_pool_`. In practice libvpx keeps a few (~3-4) buffers alive + // at a time. + if (vpx_codec_decode(decoder_, buffer, + static_cast<unsigned int>(input_image.size()), 0, + VPX_DL_REALTIME)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + // `img->fb_priv` contains the image data, a reference counted Vp9FrameBuffer. + // It may be released by libvpx during future vpx_codec_decode or + // vpx_codec_destroy calls. + img = vpx_codec_get_frame(decoder_, &iter); + int qp; + vpx_codec_err_t vpx_ret = + vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp); + RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK); + int ret = + ReturnFrame(img, input_image.Timestamp(), qp, input_image.ColorSpace()); + if (ret != 0) { + return ret; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Decoder::ReturnFrame( + const vpx_image_t* img, + uint32_t timestamp, + int qp, + const webrtc::ColorSpace* explicit_color_space) { + if (img == nullptr) { + // Decoder OK and nullptr image => No show frame. + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + + // This buffer contains all of `img`'s image data, a reference counted + // Vp9FrameBuffer. (libvpx is done with the buffers after a few + // vpx_codec_decode calls or vpx_codec_destroy). + rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> img_buffer( + static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv)); + + // The buffer can be used directly by the VideoFrame (without copy) by + // using a Wrapped*Buffer. + rtc::scoped_refptr<VideoFrameBuffer> img_wrapped_buffer; + switch (img->fmt) { + case VPX_IMG_FMT_I420: + if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) { + rtc::scoped_refptr<NV12Buffer> nv12_buffer = + output_buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h); + if (!nv12_buffer.get()) { + // Buffer pool is full. + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + img_wrapped_buffer = nv12_buffer; + libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + nv12_buffer->MutableDataY(), nv12_buffer->StrideY(), + nv12_buffer->MutableDataUV(), + nv12_buffer->StrideUV(), img->d_w, img->d_h); + // No holding onto img_buffer as it's no longer needed and can be + // reused. + } else { + img_wrapped_buffer = WrapI420Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI420Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release `img_buffer`. + [img_buffer] {}); + } + break; + case VPX_IMG_FMT_I422: + img_wrapped_buffer = WrapI422Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI444Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release `img_buffer`. + [img_buffer] {}); + break; + case VPX_IMG_FMT_I444: + img_wrapped_buffer = WrapI444Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI444Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release `img_buffer`. + [img_buffer] {}); + break; + case VPX_IMG_FMT_I42016: + img_wrapped_buffer = WrapI010Buffer( + img->d_w, img->d_h, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]), + img->stride[VPX_PLANE_Y] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]), + img->stride[VPX_PLANE_U] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]), + img->stride[VPX_PLANE_V] / 2, [img_buffer] {}); + break; + case VPX_IMG_FMT_I42216: + img_wrapped_buffer = WrapI210Buffer( + img->d_w, img->d_h, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]), + img->stride[VPX_PLANE_Y] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]), + img->stride[VPX_PLANE_U] / 2, + reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]), + img->stride[VPX_PLANE_V] / 2, [img_buffer] {}); + break; + default: + RTC_LOG(LS_ERROR) << "Unsupported pixel format produced by the decoder: " + << static_cast<int>(img->fmt); + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + + auto builder = VideoFrame::Builder() + .set_video_frame_buffer(img_wrapped_buffer) + .set_timestamp_rtp(timestamp); + if (explicit_color_space) { + builder.set_color_space(*explicit_color_space); + } else { + builder.set_color_space( + ExtractVP9ColorSpace(img->cs, img->range, img->bit_depth)); + } + VideoFrame decoded_image = builder.build(); + + decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp); + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Decoder::RegisterDecodeCompleteCallback( + DecodedImageCallback* callback) { + decode_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Decoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + if (decoder_ != nullptr) { + if (inited_) { + // When a codec is destroyed libvpx will release any buffers of + // `libvpx_buffer_pool_` it is currently using. + if (vpx_codec_destroy(decoder_)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + delete decoder_; + decoder_ = nullptr; + } + // Releases buffers from the pool. Any buffers not in use are deleted. Buffers + // still referenced externally are deleted once fully released, not returning + // to the pool. + libvpx_buffer_pool_.ClearPool(); + output_buffer_pool_.Release(); + inited_ = false; + return ret_val; +} + +VideoDecoder::DecoderInfo LibvpxVp9Decoder::GetDecoderInfo() const { + DecoderInfo info; + info.implementation_name = "libvpx"; + info.is_hardware_accelerated = false; + return info; +} + +const char* LibvpxVp9Decoder::ImplementationName() const { + return "libvpx"; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h new file mode 100644 index 0000000000..a680441f73 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_ + +#ifdef RTC_ENABLE_VP9 + +#include "api/field_trials_view.h" +#include "api/video_codecs/video_decoder.h" +#include "common_video/include/video_frame_buffer_pool.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" +#include "vpx/vp8cx.h" + +namespace webrtc { + +class LibvpxVp9Decoder : public VP9Decoder { + public: + LibvpxVp9Decoder(); + explicit LibvpxVp9Decoder(const FieldTrialsView& trials); + + virtual ~LibvpxVp9Decoder(); + + bool Configure(const Settings& settings) override; + + int Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) override; + + int RegisterDecodeCompleteCallback(DecodedImageCallback* callback) override; + + int Release() override; + + DecoderInfo GetDecoderInfo() const override; + const char* ImplementationName() const override; + + private: + int ReturnFrame(const vpx_image_t* img, + uint32_t timestamp, + int qp, + const webrtc::ColorSpace* explicit_color_space); + + // Memory pool used to share buffers between libvpx and webrtc. + Vp9FrameBufferPool libvpx_buffer_pool_; + // Buffer pool used to allocate additionally needed NV12 buffers. + VideoFrameBufferPool output_buffer_pool_; + DecodedImageCallback* decode_complete_callback_; + bool inited_; + vpx_codec_ctx_t* decoder_; + bool key_frame_required_; + Settings current_settings_; + + // Decoder should produce this format if possible. + const VideoFrameBuffer::Type preferred_output_format_; +}; +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_DECODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc new file mode 100644 index 0000000000..fea11b5051 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc @@ -0,0 +1,2183 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifdef RTC_ENABLE_VP9 + +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" + +#include <algorithm> +#include <limits> +#include <utility> +#include <vector> + +#include "absl/algorithm/container.h" +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "api/video/color_space.h" +#include "api/video/i010_buffer.h" +#include "common_video/include/video_frame_buffer.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" +#include "modules/video_coding/svc/svc_rate_allocator.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_list.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/rate_control_settings.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "libyuv/include/libyuv/convert.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +namespace webrtc { + +namespace { +// Maps from gof_idx to encoder internal reference frame buffer index. These +// maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames. +uint8_t kRefBufIdx[4] = {0, 0, 0, 1}; +uint8_t kUpdBufIdx[4] = {0, 0, 1, 0}; + +// Maximum allowed PID difference for differnet per-layer frame-rate case. +const int kMaxAllowedPidDiff = 30; + +// TODO(ilink): Tune these thresholds further. +// Selected using ConverenceMotion_1280_720_50.yuv clip. +// No toggling observed on any link capacity from 100-2000kbps. +// HD was reached consistently when link capacity was 1500kbps. +// Set resolutions are a bit more conservative than svc_config.cc sets, e.g. +// for 300kbps resolution converged to 270p instead of 360p. +constexpr int kLowVp9QpThreshold = 149; +constexpr int kHighVp9QpThreshold = 205; + +std::pair<size_t, size_t> GetActiveLayers( + const VideoBitrateAllocation& allocation) { + for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) { + if (allocation.GetSpatialLayerSum(sl_idx) > 0) { + size_t last_layer = sl_idx + 1; + while (last_layer < kMaxSpatialLayers && + allocation.GetSpatialLayerSum(last_layer) > 0) { + ++last_layer; + } + return std::make_pair(sl_idx, last_layer); + } + } + return {0, 0}; +} + +std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure( + const VideoCodec& codec) { + int num_spatial_layers = codec.VP9().numberOfSpatialLayers; + int num_temporal_layers = + std::max(1, int{codec.VP9().numberOfTemporalLayers}); + if (num_spatial_layers == 1 && num_temporal_layers == 1) { + return std::make_unique<ScalableVideoControllerNoLayering>(); + } + + char name[20]; + rtc::SimpleStringBuilder ss(name); + if (codec.mode == VideoCodecMode::kScreensharing) { + // TODO(bugs.webrtc.org/11999): Compose names of the structures when they + // are implemented. + return nullptr; + } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn || + num_spatial_layers == 1) { + ss << "L" << num_spatial_layers << "T" << num_temporal_layers; + } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) { + ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY"; + } else { + RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff); + ss << "S" << num_spatial_layers << "T" << num_temporal_layers; + } + + // Check spatial ratio. + if (num_spatial_layers > 1 && codec.spatialLayers[0].targetBitrate > 0) { + if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width || + codec.height != codec.spatialLayers[num_spatial_layers - 1].height) { + RTC_LOG(LS_WARNING) + << "Top layer resolution expected to match overall resolution"; + return nullptr; + } + // Check if the ratio is one of the supported. + int numerator; + int denominator; + if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) { + numerator = 1; + denominator = 2; + // no suffix for 1:2 ratio. + } else if (2 * codec.spatialLayers[1].width == + 3 * codec.spatialLayers[0].width) { + numerator = 2; + denominator = 3; + ss << "h"; + } else { + RTC_LOG(LS_WARNING) << "Unsupported scalability ratio " + << codec.spatialLayers[0].width << ":" + << codec.spatialLayers[1].width; + return nullptr; + } + // Validate ratio is consistent for all spatial layer transitions. + for (int sid = 1; sid < num_spatial_layers; ++sid) { + if (codec.spatialLayers[sid].width * numerator != + codec.spatialLayers[sid - 1].width * denominator || + codec.spatialLayers[sid].height * numerator != + codec.spatialLayers[sid - 1].height * denominator) { + RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator + << ":" << denominator; + return nullptr; + } + } + } + + absl::optional<ScalabilityMode> scalability_mode = + ScalabilityModeFromString(name); + if (!scalability_mode.has_value()) { + RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name; + return nullptr; + } + auto scalability_structure_controller = + CreateScalabilityStructure(*scalability_mode); + if (scalability_structure_controller == nullptr) { + RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name; + } else { + RTC_LOG(LS_INFO) << "Created scalability structure " << name; + } + return scalability_structure_controller; +} + +vpx_svc_ref_frame_config_t Vp9References( + rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers) { + vpx_svc_ref_frame_config_t ref_config = {}; + for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) { + const auto& buffers = layer_frame.Buffers(); + RTC_DCHECK_LE(buffers.size(), 3); + int sid = layer_frame.SpatialId(); + if (!buffers.empty()) { + ref_config.lst_fb_idx[sid] = buffers[0].id; + ref_config.reference_last[sid] = buffers[0].referenced; + if (buffers[0].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id); + } + } + if (buffers.size() > 1) { + ref_config.gld_fb_idx[sid] = buffers[1].id; + ref_config.reference_golden[sid] = buffers[1].referenced; + if (buffers[1].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id); + } + } + if (buffers.size() > 2) { + ref_config.alt_fb_idx[sid] = buffers[2].id; + ref_config.reference_alt_ref[sid] = buffers[2].referenced; + if (buffers[2].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id); + } + } + } + // TODO(bugs.webrtc.org/11999): Fill ref_config.duration + return ref_config; +} + +bool AllowDenoising() { + // Do not enable the denoiser on ARM since optimization is pending. + // Denoiser is on by default on other platforms. +#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) && \ + !defined(ANDROID) + return true; +#else + return false; +#endif +} + +} // namespace + +void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, + void* user_data) { + LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data); + enc->GetEncodedLayerFrame(pkt); +} + +LibvpxVp9Encoder::LibvpxVp9Encoder(const cricket::VideoCodec& codec, + std::unique_ptr<LibvpxInterface> interface, + const FieldTrialsView& trials) + : libvpx_(std::move(interface)), + encoded_image_(), + encoded_complete_callback_(nullptr), + profile_( + ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)), + inited_(false), + timestamp_(0), + rc_max_intra_target_(0), + encoder_(nullptr), + config_(nullptr), + raw_(nullptr), + input_image_(nullptr), + force_key_frame_(true), + pics_since_key_(0), + num_temporal_layers_(0), + num_spatial_layers_(0), + num_active_spatial_layers_(0), + first_active_layer_(0), + layer_deactivation_requires_key_frame_(absl::StartsWith( + trials.Lookup("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation"), + "Enabled")), + is_svc_(false), + inter_layer_pred_(InterLayerPredMode::kOn), + external_ref_control_(false), // Set in InitEncode because of tests. + trusted_rate_controller_( + RateControlSettings::ParseFromKeyValueConfig(&trials) + .LibvpxVp9TrustedRateController()), + layer_buffering_(false), + full_superframe_drop_(true), + first_frame_in_picture_(true), + ss_info_needed_(false), + force_all_active_layers_(false), + num_cores_(0), + is_flexible_mode_(false), + variable_framerate_experiment_(ParseVariableFramerateConfig(trials)), + variable_framerate_controller_( + variable_framerate_experiment_.framerate_limit), + quality_scaler_experiment_(ParseQualityScalerConfig(trials)), + external_ref_ctrl_( + !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"), + "Disabled")), + performance_flags_(ParsePerformanceFlagsFromTrials(trials)), + num_steady_state_frames_(0), + config_changed_(true) { + codec_ = {}; + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); +} + +LibvpxVp9Encoder::~LibvpxVp9Encoder() { + Release(); +} + +void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) { + // Ignored. +} + +int LibvpxVp9Encoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + if (encoder_ != nullptr) { + if (inited_) { + if (libvpx_->codec_destroy(encoder_)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + delete encoder_; + encoder_ = nullptr; + } + if (config_ != nullptr) { + delete config_; + config_ = nullptr; + } + if (raw_ != nullptr) { + libvpx_->img_free(raw_); + raw_ = nullptr; + } + inited_ = false; + return ret_val; +} + +bool LibvpxVp9Encoder::ExplicitlyConfiguredSpatialLayers() const { + // We check target_bitrate_bps of the 0th layer to see if the spatial layers + // (i.e. bitrates) were explicitly configured. + return codec_.spatialLayers[0].targetBitrate > 0; +} + +bool LibvpxVp9Encoder::SetSvcRates( + const VideoBitrateAllocation& bitrate_allocation) { + std::pair<size_t, size_t> current_layers = + GetActiveLayers(current_bitrate_allocation_); + std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation); + + const bool layer_activation_requires_key_frame = + inter_layer_pred_ == InterLayerPredMode::kOff || + inter_layer_pred_ == InterLayerPredMode::kOnKeyPic; + const bool lower_layers_enabled = new_layers.first < current_layers.first; + const bool higher_layers_enabled = new_layers.second > current_layers.second; + const bool disabled_layers = new_layers.first > current_layers.first || + new_layers.second < current_layers.second; + + if (lower_layers_enabled || + (higher_layers_enabled && layer_activation_requires_key_frame) || + (disabled_layers && layer_deactivation_requires_key_frame_)) { + force_key_frame_ = true; + } + + if (current_layers != new_layers) { + ss_info_needed_ = true; + } + + config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps(); + + if (ExplicitlyConfiguredSpatialLayers()) { + for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0); + config_->ss_target_bitrate[sl_idx] = + bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000; + + for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) { + config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] = + bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000; + } + + if (!was_layer_active) { + // Reset frame rate controller if layer is resumed after pause. + framerate_controller_[sl_idx].Reset(); + } + + framerate_controller_[sl_idx].SetTargetRate( + codec_.spatialLayers[sl_idx].maxFramerate); + } + } else { + float rate_ratio[VPX_MAX_LAYERS] = {0}; + float total = 0; + for (int i = 0; i < num_spatial_layers_; ++i) { + if (svc_params_.scaling_factor_num[i] <= 0 || + svc_params_.scaling_factor_den[i] <= 0) { + RTC_LOG(LS_ERROR) << "Scaling factors not specified!"; + return false; + } + rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) / + svc_params_.scaling_factor_den[i]; + total += rate_ratio[i]; + } + + for (int i = 0; i < num_spatial_layers_; ++i) { + RTC_CHECK_GT(total, 0); + config_->ss_target_bitrate[i] = static_cast<unsigned int>( + config_->rc_target_bitrate * rate_ratio[i] / total); + if (num_temporal_layers_ == 1) { + config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i]; + } else if (num_temporal_layers_ == 2) { + config_->layer_target_bitrate[i * num_temporal_layers_] = + config_->ss_target_bitrate[i] * 2 / 3; + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = + config_->ss_target_bitrate[i]; + } else if (num_temporal_layers_ == 3) { + config_->layer_target_bitrate[i * num_temporal_layers_] = + config_->ss_target_bitrate[i] / 2; + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = + config_->layer_target_bitrate[i * num_temporal_layers_] + + (config_->ss_target_bitrate[i] / 4); + config_->layer_target_bitrate[i * num_temporal_layers_ + 2] = + config_->ss_target_bitrate[i]; + } else { + RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: " + << num_temporal_layers_; + return false; + } + + framerate_controller_[i].SetTargetRate(codec_.maxFramerate); + } + } + + num_active_spatial_layers_ = 0; + first_active_layer_ = 0; + bool seen_active_layer = false; + bool expect_no_more_active_layers = false; + for (int i = 0; i < num_spatial_layers_; ++i) { + if (config_->ss_target_bitrate[i] > 0) { + RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is " + "deactivated."; + if (!seen_active_layer) { + first_active_layer_ = i; + } + num_active_spatial_layers_ = i + 1; + seen_active_layer = true; + } else { + expect_no_more_active_layers = seen_active_layer; + } + } + + if (seen_active_layer && performance_flags_.use_per_layer_speed) { + bool denoiser_on = + AllowDenoising() && codec_.VP9()->denoisingOn && + performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1] + .allow_denoising; + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + denoiser_on ? 1 : 0); + } + + if (higher_layers_enabled && !force_key_frame_) { + // Prohibit drop of all layers for the next frame, so newly enabled + // layer would have a valid spatial reference. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = 0; + } + force_all_active_layers_ = true; + } + + if (svc_controller_) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Bitrates in `layer_target_bitrate` are accumulated for each temporal + // layer but in `VideoBitrateAllocation` they should be separated. + int previous_bitrate_kbps = 0; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + int accumulated_bitrate_kbps = + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid]; + int single_layer_bitrate_kbps = + accumulated_bitrate_kbps - previous_bitrate_kbps; + RTC_DCHECK_GE(single_layer_bitrate_kbps, 0); + current_bitrate_allocation_.SetBitrate( + sid, tid, single_layer_bitrate_kbps * 1'000); + previous_bitrate_kbps = accumulated_bitrate_kbps; + } + } + svc_controller_->OnRatesUpdated(current_bitrate_allocation_); + } else { + current_bitrate_allocation_ = bitrate_allocation; + } + config_changed_ = true; + return true; +} + +void LibvpxVp9Encoder::DisableSpatialLayer(int sid) { + RTC_DCHECK_LT(sid, num_spatial_layers_); + if (config_->ss_target_bitrate[sid] == 0) { + return; + } + config_->ss_target_bitrate[sid] = 0; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0; + } + config_changed_ = true; +} + +void LibvpxVp9Encoder::EnableSpatialLayer(int sid) { + RTC_DCHECK_LT(sid, num_spatial_layers_); + if (config_->ss_target_bitrate[sid] > 0) { + return; + } + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = + current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000; + } + config_->ss_target_bitrate[sid] = + current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000; + RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0); + config_changed_ = true; +} + +void LibvpxVp9Encoder::SetActiveSpatialLayers() { + // Svc controller may decide to skip a frame at certain spatial layer even + // when bitrate for it is non-zero, however libvpx uses configured bitrate as + // a signal which layers should be produced. + RTC_DCHECK(svc_controller_); + RTC_DCHECK(!layer_frames_.empty()); + RTC_DCHECK(absl::c_is_sorted( + layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs, + const ScalableVideoController::LayerFrameConfig& rhs) { + return lhs.SpatialId() < rhs.SpatialId(); + })); + + auto frame_it = layer_frames_.begin(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) { + EnableSpatialLayer(sid); + ++frame_it; + } else { + DisableSpatialLayer(sid); + } + } +} + +void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) { + if (!inited_) { + RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized."; + return; + } + if (encoder_->err) { + RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err; + return; + } + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Unsupported framerate: " + << parameters.framerate_fps; + return; + } + + codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5); + + bool res = SetSvcRates(parameters.bitrate); + RTC_DCHECK(res) << "Failed to set new bitrate allocation"; + config_changed_ = true; +} + +// TODO(eladalon): s/inst/codec_settings/g. +int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, + const Settings& settings) { + if (inst == nullptr) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // Allow zero to represent an unspecified maxBitRate + if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (settings.number_of_cores < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->VP9().numberOfTemporalLayers > 3) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // libvpx probably does not support more than 3 spatial layers. + if (inst->VP9().numberOfSpatialLayers > 3) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + absl::optional<vpx_img_fmt_t> previous_img_fmt = + raw_ ? absl::make_optional<vpx_img_fmt_t>(raw_->fmt) : absl::nullopt; + + int ret_val = Release(); + if (ret_val < 0) { + return ret_val; + } + if (encoder_ == nullptr) { + encoder_ = new vpx_codec_ctx_t; + memset(encoder_, 0, sizeof(*encoder_)); + } + if (config_ == nullptr) { + config_ = new vpx_codec_enc_cfg_t; + memset(config_, 0, sizeof(*config_)); + } + timestamp_ = 0; + if (&codec_ != inst) { + codec_ = *inst; + } + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); + + force_key_frame_ = true; + pics_since_key_ = 0; + num_cores_ = settings.number_of_cores; + + absl::optional<ScalabilityMode> scalability_mode = inst->GetScalabilityMode(); + if (scalability_mode.has_value()) { + // Use settings from `ScalabilityMode` identifier. + RTC_LOG(LS_INFO) << "Create scalability structure " + << ScalabilityModeToString(*scalability_mode); + svc_controller_ = CreateScalabilityStructure(*scalability_mode); + if (!svc_controller_) { + RTC_LOG(LS_WARNING) << "Failed to create scalability structure."; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + ScalableVideoController::StreamLayersConfig info = + svc_controller_->StreamConfig(); + num_spatial_layers_ = info.num_spatial_layers; + num_temporal_layers_ = info.num_temporal_layers; + inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode); + } else { + num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; + RTC_DCHECK_GT(num_spatial_layers_, 0); + num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; + if (num_temporal_layers_ == 0) { + num_temporal_layers_ = 1; + } + inter_layer_pred_ = inst->VP9().interLayerPred; + svc_controller_ = CreateVp9ScalabilityStructure(*inst); + } + + framerate_controller_ = std::vector<FramerateControllerDeprecated>( + num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate)); + + is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1); + + // Populate encoder configuration with default values. + if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE; + unsigned int bits_for_storage = 8; + switch (profile_) { + case VP9Profile::kProfile0: + img_fmt = previous_img_fmt.value_or(VPX_IMG_FMT_I420); + bits_for_storage = 8; + config_->g_bit_depth = VPX_BITS_8; + config_->g_profile = 0; + config_->g_input_bit_depth = 8; + break; + case VP9Profile::kProfile1: + // Encoding of profile 1 is not implemented. It would require extended + // support for I444, I422, and I440 buffers. + RTC_DCHECK_NOTREACHED(); + break; + case VP9Profile::kProfile2: + img_fmt = VPX_IMG_FMT_I42016; + bits_for_storage = 16; + config_->g_bit_depth = VPX_BITS_10; + config_->g_profile = 2; + config_->g_input_bit_depth = 10; + break; + case VP9Profile::kProfile3: + // Encoding of profile 3 is not implemented. + RTC_DCHECK_NOTREACHED(); + break; + } + + // Creating a wrapper to the image - setting image data to nullptr. Actual + // pointer will be set in encode. Setting align to 1, as it is meaningless + // (actual memory is not allocated). + raw_ = libvpx_->img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1, + nullptr); + raw_->bit_depth = bits_for_storage; + + config_->g_w = codec_.width; + config_->g_h = codec_.height; + config_->rc_target_bitrate = inst->startBitrate; // in kbit/s + config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0; + // Setting the time base of the codec. + config_->g_timebase.num = 1; + config_->g_timebase.den = 90000; + config_->g_lag_in_frames = 0; // 0- no frame lagging + config_->g_threads = 1; + // Rate control settings. + config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0; + config_->rc_end_usage = VPX_CBR; + config_->g_pass = VPX_RC_ONE_PASS; + config_->rc_min_quantizer = + codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2; + config_->rc_max_quantizer = 52; + config_->rc_undershoot_pct = 50; + config_->rc_overshoot_pct = 50; + config_->rc_buf_initial_sz = 500; + config_->rc_buf_optimal_sz = 600; + config_->rc_buf_sz = 1000; + // Set the maximum target size of any key-frame. + rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz); + // Key-frame interval is enforced manually by this wrapper. + config_->kf_mode = VPX_KF_DISABLED; + // TODO(webm:1592): work-around for libvpx issue, as it can still + // put some key-frames at will even in VPX_KF_DISABLED kf_mode. + config_->kf_max_dist = inst->VP9().keyFrameInterval; + config_->kf_min_dist = config_->kf_max_dist; + if (quality_scaler_experiment_.enabled) { + // In that experiment webrtc wide quality scaler is used instead of libvpx + // internal scaler. + config_->rc_resize_allowed = 0; + } else { + config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0; + } + // Determine number of threads based on the image size and #cores. + config_->g_threads = + NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores); + + is_flexible_mode_ = inst->VP9().flexibleMode; + + if (num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) { + RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with " + "several spatial layers"; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // External reference control is required for different frame rate on spatial + // layers because libvpx generates rtp incompatible references in this case. + external_ref_control_ = external_ref_ctrl_ || + (num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing) || + inter_layer_pred_ == InterLayerPredMode::kOn; + + if (num_temporal_layers_ == 1) { + gof_.SetGofInfoVP9(kTemporalStructureMode1); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; + config_->ts_number_layers = 1; + config_->ts_rate_decimator[0] = 1; + config_->ts_periodicity = 1; + config_->ts_layer_id[0] = 0; + } else if (num_temporal_layers_ == 2) { + gof_.SetGofInfoVP9(kTemporalStructureMode2); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101; + config_->ts_number_layers = 2; + config_->ts_rate_decimator[0] = 2; + config_->ts_rate_decimator[1] = 1; + config_->ts_periodicity = 2; + config_->ts_layer_id[0] = 0; + config_->ts_layer_id[1] = 1; + } else if (num_temporal_layers_ == 3) { + gof_.SetGofInfoVP9(kTemporalStructureMode3); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212; + config_->ts_number_layers = 3; + config_->ts_rate_decimator[0] = 4; + config_->ts_rate_decimator[1] = 2; + config_->ts_rate_decimator[2] = 1; + config_->ts_periodicity = 4; + config_->ts_layer_id[0] = 0; + config_->ts_layer_id[1] = 2; + config_->ts_layer_id[2] = 1; + config_->ts_layer_id[3] = 2; + } else { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + if (external_ref_control_) { + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing) { + // External reference control for several temporal layers with different + // frame rates on spatial layers is not implemented yet. + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + } + ref_buf_ = {}; + + return InitAndSetControlSettings(inst); +} + +int LibvpxVp9Encoder::NumberOfThreads(int width, + int height, + int number_of_cores) { + // Keep the number of encoder threads equal to the possible number of column + // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS. + if (width * height >= 1280 * 720 && number_of_cores > 4) { + return 4; + } else if (width * height >= 640 * 360 && number_of_cores > 2) { + return 2; + } else { +// Use 2 threads for low res on ARM. +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) + if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } +#endif + // 1 thread less than VGA. + return 1; + } +} + +int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { + // Set QP-min/max per spatial and temporal layer. + int tot_num_layers = num_spatial_layers_ * num_temporal_layers_; + for (int i = 0; i < tot_num_layers; ++i) { + svc_params_.max_quantizers[i] = config_->rc_max_quantizer; + svc_params_.min_quantizers[i] = config_->rc_min_quantizer; + } + config_->ss_number_layers = num_spatial_layers_; + if (svc_controller_) { + auto stream_config = svc_controller_->StreamConfig(); + for (int i = 0; i < stream_config.num_spatial_layers; ++i) { + svc_params_.scaling_factor_num[i] = stream_config.scaling_factor_num[i]; + svc_params_.scaling_factor_den[i] = stream_config.scaling_factor_den[i]; + } + } else if (ExplicitlyConfiguredSpatialLayers()) { + for (int i = 0; i < num_spatial_layers_; ++i) { + const auto& layer = codec_.spatialLayers[i]; + RTC_CHECK_GT(layer.width, 0); + const int scale_factor = codec_.width / layer.width; + RTC_DCHECK_GT(scale_factor, 0); + + // Ensure scaler factor is integer. + if (scale_factor * layer.width != codec_.width) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Ensure scale factor is the same in both dimensions. + if (scale_factor * layer.height != codec_.height) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Ensure scale factor is power of two. + const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0; + if (!is_pow_of_two) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + svc_params_.scaling_factor_num[i] = 1; + svc_params_.scaling_factor_den[i] = scale_factor; + + RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0); + RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate); + if (i > 0) { + // Frame rate of high spatial layer is supposed to be equal or higher + // than frame rate of low spatial layer. + RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate, + codec_.spatialLayers[i - 1].maxFramerate); + } + } + } else { + int scaling_factor_num = 256; + for (int i = num_spatial_layers_ - 1; i >= 0; --i) { + // 1:2 scaling in each dimension. + svc_params_.scaling_factor_num[i] = scaling_factor_num; + svc_params_.scaling_factor_den[i] = 256; + } + } + + UpdatePerformanceFlags(); + RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(), + static_cast<size_t>(num_spatial_layers_)); + + SvcRateAllocator init_allocator(codec_); + current_bitrate_allocation_ = + init_allocator.Allocate(VideoBitrateAllocationParameters( + inst->startBitrate * 1000, inst->maxFramerate)); + if (!SetSvcRates(current_bitrate_allocation_)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + const vpx_codec_err_t rv = libvpx_->codec_enc_init( + encoder_, vpx_codec_vp9_cx(), config_, + config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH); + if (rv != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + if (performance_flags_.use_per_layer_speed) { + for (int si = 0; si < num_spatial_layers_; ++si) { + svc_params_.speed_per_layer[si] = + performance_flags_by_spatial_index_[si].base_layer_speed; + svc_params_.loopfilter_ctrl[si] = + performance_flags_by_spatial_index_[si].deblock_mode; + } + bool denoiser_on = + AllowDenoising() && inst->VP9().denoisingOn && + performance_flags_by_spatial_index_[num_spatial_layers_ - 1] + .allow_denoising; + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + denoiser_on ? 1 : 0); + } + + libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE, + inst->VP9().adaptiveQpMode ? 3 : 0); + + libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0); + + if (is_svc_) { + libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + if (!is_svc_ || !performance_flags_.use_per_layer_speed) { + libvpx_->codec_control( + encoder_, VP8E_SET_CPUUSED, + performance_flags_by_spatial_index_.rbegin()->base_layer_speed); + } + + if (num_spatial_layers_ > 1) { + switch (inter_layer_pred_) { + case InterLayerPredMode::kOn: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0); + break; + case InterLayerPredMode::kOff: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1); + break; + case InterLayerPredMode::kOnKeyPic: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_)); + const bool reverse_constrained_drop_mode = + inter_layer_pred_ == InterLayerPredMode::kOn && + codec_.mode == VideoCodecMode::kScreensharing && + num_spatial_layers_ > 1; + if (reverse_constrained_drop_mode) { + // Screenshare dropping mode: drop a layer only together with all lower + // layers. This ensures that drops on lower layers won't reduce frame-rate + // for higher layers and reference structure is RTP-compatible. + svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP; + svc_drop_frame_.max_consec_drop = 5; + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + // No buffering is needed because the highest layer is always present in + // all frames in CONSTRAINED_FROM_ABOVE drop mode. + layer_buffering_ = false; + } else { + // Configure encoder to drop entire superframe whenever it needs to drop + // a layer. This mode is preferred over per-layer dropping which causes + // quality flickering and is not compatible with RTP non-flexible mode. + svc_drop_frame_.framedrop_mode = + full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP; + // Buffering is needed only for constrained layer drop, as it's not clear + // which frame is the last. + layer_buffering_ = !full_superframe_drop_; + svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max(); + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } + libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + + // Register callback for getting each spatial layer. + vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { + LibvpxVp9Encoder::EncoderOutputCodedPacketCallback, + reinterpret_cast<void*>(this)}; + libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, + reinterpret_cast<void*>(&cbp)); + + // Control function to set the number of column tiles in encoding a frame, in + // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns. + // The number tile columns will be capped by the encoder based on image size + // (minimum width of tile column is 256 pixels, maximum is 4096). + libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS, + static_cast<int>((config_->g_threads >> 1))); + + // Turn on row-based multithreading. + libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1); + + if (AllowDenoising() && !performance_flags_.use_per_layer_speed) { + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + inst->VP9().denoisingOn ? 1 : 0); + } + + if (codec_.mode == VideoCodecMode::kScreensharing) { + // Adjust internal parameters to screen content. + libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1); + } + // Enable encoder skip of static/low content blocks. + libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1); + inited_ = true; + config_changed_ = true; + return WEBRTC_VIDEO_CODEC_OK; +} + +uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) { + // Set max to the optimal buffer level (normalized by target BR), + // and scaled by a scale_par. + // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps]. + // This value is presented in percentage of perFrameBw: + // perFrameBw = targetBR[Kbps] * 1000 / framerate. + // The target in % is as follows: + float scale_par = 0.5; + uint32_t target_pct = + optimal_buffer_size * scale_par * codec_.maxFramerate / 10; + // Don't go below 3 times the per frame bandwidth. + const uint32_t min_intra_size = 300; + return (target_pct < min_intra_size) ? min_intra_size : target_pct; +} + +int LibvpxVp9Encoder::Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (encoded_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (num_active_spatial_layers_ == 0) { + // All spatial layers are disabled, return without encoding anything. + return WEBRTC_VIDEO_CODEC_OK; + } + + // We only support one stream at the moment. + if (frame_types && !frame_types->empty()) { + if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) { + force_key_frame_ = true; + } + } + + if (pics_since_key_ + 1 == + static_cast<size_t>(codec_.VP9()->keyFrameInterval)) { + force_key_frame_ = true; + } + + if (svc_controller_) { + layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_); + if (layer_frames_.empty()) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + if (layer_frames_.front().IsKeyframe()) { + force_key_frame_ = true; + } + } + + vpx_svc_layer_id_t layer_id = {0}; + if (!force_key_frame_) { + const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; + layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; + + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint32_t frame_timestamp_ms = + 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; + + // To ensure that several rate-limiters with different limits don't + // interfere, they must be queried in order of increasing limit. + + bool use_steady_state_limiter = + variable_framerate_experiment_.enabled && + input_image.update_rect().IsEmpty() && + num_steady_state_frames_ >= + variable_framerate_experiment_.frames_before_steady_state; + + // Need to check all frame limiters, even if lower layers are disabled, + // because variable frame-rate limiter should be checked after the first + // layer. It's easier to overwrite active layers after, then check all + // cases. + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + const float layer_fps = + framerate_controller_[layer_id.spatial_layer_id].GetTargetRate(); + // Use steady state rate-limiter at the correct place. + if (use_steady_state_limiter && + layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) { + if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) { + layer_id.spatial_layer_id = num_active_spatial_layers_; + } + // Break always: if rate limiter triggered frame drop, no need to + // continue; otherwise, the rate is less than the next limiters. + break; + } + if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { + ++layer_id.spatial_layer_id; + } else { + break; + } + } + + if (use_steady_state_limiter && + layer_id.spatial_layer_id < num_active_spatial_layers_) { + variable_framerate_controller_.AddFrame(frame_timestamp_ms); + } + } + + if (force_all_active_layers_) { + layer_id.spatial_layer_id = first_active_layer_; + force_all_active_layers_ = false; + } + + RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); + if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { + // Drop entire picture. + return WEBRTC_VIDEO_CODEC_OK; + } + } + + // Need to set temporal layer id on ALL layers, even disabled ones. + // Otherwise libvpx might produce frames on a disabled layer: + // http://crbug.com/1051476 + for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id; + } + + if (layer_id.spatial_layer_id < first_active_layer_) { + layer_id.spatial_layer_id = first_active_layer_; + } + + if (svc_controller_) { + layer_id.spatial_layer_id = layer_frames_.front().SpatialId(); + layer_id.temporal_layer_id = layer_frames_.front().TemporalId(); + for (const auto& layer : layer_frames_) { + layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] = + layer.TemporalId(); + } + SetActiveSpatialLayers(); + } + + if (is_svc_ && performance_flags_.use_per_layer_speed) { + // Update speed settings that might depend on temporal index. + bool speed_updated = false; + for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const int target_speed = + layer_id.temporal_layer_id_per_spatial[sl_idx] == 0 + ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed + : performance_flags_by_spatial_index_[sl_idx].high_layer_speed; + if (svc_params_.speed_per_layer[sl_idx] != target_speed) { + svc_params_.speed_per_layer[sl_idx] = target_speed; + speed_updated = true; + } + } + if (speed_updated) { + libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + } + + libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); + + if (num_spatial_layers_ > 1) { + // Update frame dropping settings as they may change on per-frame basis. + libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + + if (config_changed_) { + if (libvpx_->codec_enc_config_set(encoder_, config_)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (!performance_flags_.use_per_layer_speed) { + // Not setting individual speeds per layer, find the highest active + // resolution instead and base the speed on that. + for (int i = num_spatial_layers_ - 1; i >= 0; --i) { + if (config_->ss_target_bitrate[i] > 0) { + int width = (svc_params_.scaling_factor_num[i] * config_->g_w) / + svc_params_.scaling_factor_den[i]; + int height = (svc_params_.scaling_factor_num[i] * config_->g_h) / + svc_params_.scaling_factor_den[i]; + int speed = + std::prev(performance_flags_.settings_by_resolution.lower_bound( + width * height)) + ->second.base_layer_speed; + libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed); + break; + } + } + } + config_changed_ = false; + } + + if (input_image.width() != codec_.width || + input_image.height() != codec_.height) { + int ret = UpdateCodecFrameSize(input_image); + if (ret < 0) { + return ret; + } + } + + RTC_DCHECK_EQ(input_image.width(), raw_->d_w); + RTC_DCHECK_EQ(input_image.height(), raw_->d_h); + + // Set input image for use in the callback. + // This was necessary since you need some information from input_image. + // You can save only the necessary information (such as timestamp) instead of + // doing this. + input_image_ = &input_image; + + // In case we need to map the buffer, `mapped_buffer` is used to keep it alive + // through reference counting until after encoding has finished. + rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer; + const I010BufferInterface* i010_buffer; + rtc::scoped_refptr<const I010BufferInterface> i010_copy; + switch (profile_) { + case VP9Profile::kProfile0: { + mapped_buffer = + PrepareBufferForProfile0(input_image.video_frame_buffer()); + if (!mapped_buffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + break; + } + case VP9Profile::kProfile1: { + RTC_DCHECK_NOTREACHED(); + break; + } + case VP9Profile::kProfile2: { + // We can inject kI010 frames directly for encode. All other formats + // should be converted to it. + switch (input_image.video_frame_buffer()->type()) { + case VideoFrameBuffer::Type::kI010: { + i010_buffer = input_image.video_frame_buffer()->GetI010(); + break; + } + default: { + auto i420_buffer = input_image.video_frame_buffer()->ToI420(); + if (!i420_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_image.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + i010_copy = I010Buffer::Copy(*i420_buffer); + i010_buffer = i010_copy.get(); + } + } + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataY())); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataU())); + raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataV())); + raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2; + raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2; + raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2; + break; + } + case VP9Profile::kProfile3: { + RTC_DCHECK_NOTREACHED(); + break; + } + } + + vpx_enc_frame_flags_t flags = 0; + if (force_key_frame_) { + flags = VPX_EFLAG_FORCE_KF; + } + + if (svc_controller_) { + vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_config); + } else if (external_ref_control_) { + vpx_svc_ref_frame_config_t ref_config = + SetReferences(force_key_frame_, layer_id.spatial_layer_id); + + if (VideoCodecMode::kScreensharing == codec_.mode) { + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + ref_config.duration[sl_idx] = static_cast<int64_t>( + 90000 / (std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[sl_idx].GetTargetRate()))); + } + } + + libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_config); + } + + first_frame_in_picture_ = true; + + // TODO(ssilkin): Frame duration should be specified per spatial layer + // since their frame rate can be different. For now calculate frame duration + // based on target frame rate of the highest spatial layer, which frame rate + // is supposed to be equal or higher than frame rate of low spatial layers. + // Also, timestamp should represent actual time passed since previous frame + // (not 'expected' time). Then rate controller can drain buffer more + // accurately. + RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_); + float target_framerate_fps = + (codec_.mode == VideoCodecMode::kScreensharing) + ? std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[num_active_spatial_layers_ - 1] + .GetTargetRate()) + : codec_.maxFramerate; + uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps); + const vpx_codec_err_t rv = libvpx_->codec_encode( + encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME); + if (rv != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv) + << "\n" + "Details: " + << libvpx_->codec_error(encoder_) << "\n" + << libvpx_->codec_error_detail(encoder_); + return WEBRTC_VIDEO_CODEC_ERROR; + } + timestamp_ += duration; + + if (layer_buffering_) { + const bool end_of_picture = true; + DeliverBufferedFrame(end_of_picture); + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Encoder::UpdateCodecFrameSize( + const VideoFrame& input_image) { + RTC_LOG(LS_INFO) << "Reconfiging VP from " << + codec_.width << "x" << codec_.height << " to " << + input_image.width() << "x" << input_image.height(); + // Preserve latest bitrate/framerate setting + // TODO: Mozilla - see below, we need to save more state here. + //uint32_t old_bitrate_kbit = config_->rc_target_bitrate; + //uint32_t old_framerate = codec_.maxFramerate; + + codec_.width = input_image.width(); + codec_.height = input_image.height(); + + vpx_img_free(raw_); + raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, + 1, NULL); + // Update encoder context for new frame size. + config_->g_w = codec_.width; + config_->g_h = codec_.height; + + // Determine number of threads based on the image size and #cores. + config_->g_threads = NumberOfThreads(codec_.width, codec_.height, + num_cores_); + + // NOTE: We would like to do this the same way vp8 does it + // (with vpx_codec_enc_config_set()), but that causes asserts + // in AQ 3 (cyclic); and in AQ 0 it works, but on a resize to smaller + // than 1/2 x 1/2 original it asserts in convolve(). Given these + // bugs in trying to do it the "right" way, we basically re-do + // the initialization. + vpx_codec_destroy(encoder_); // clean up old state + int result = InitAndSetControlSettings(&codec_); + if (result == WEBRTC_VIDEO_CODEC_OK) { + // TODO: Mozilla rates have become much more complicated, we need to store + // more state or find another way of doing this. + //return SetRates(old_bitrate_kbit, old_framerate); + RTC_CHECK(false); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + return result; +} + +bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + absl::optional<int>* spatial_idx, + absl::optional<int>* temporal_idx, + const vpx_codec_cx_pkt& pkt) { + RTC_CHECK(codec_specific != nullptr); + codec_specific->codecType = kVideoCodecVP9; + CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9); + + vp9_info->first_frame_in_picture = first_frame_in_picture_; + vp9_info->flexible_mode = is_flexible_mode_; + + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { + pics_since_key_ = 0; + } else if (first_frame_in_picture_) { + ++pics_since_key_; + } + + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + // Can't have keyframe with non-zero temporal layer. + RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0); + + RTC_CHECK_GT(num_temporal_layers_, 0); + RTC_CHECK_GT(num_active_spatial_layers_, 0); + if (num_temporal_layers_ == 1) { + RTC_CHECK_EQ(layer_id.temporal_layer_id, 0); + vp9_info->temporal_idx = kNoTemporalIdx; + *temporal_idx = absl::nullopt; + } else { + vp9_info->temporal_idx = layer_id.temporal_layer_id; + *temporal_idx = layer_id.temporal_layer_id; + } + if (num_active_spatial_layers_ == 1) { + RTC_CHECK_EQ(layer_id.spatial_layer_id, 0); + *spatial_idx = absl::nullopt; + } else { + *spatial_idx = layer_id.spatial_layer_id; + } + + const bool is_key_pic = (pics_since_key_ == 0); + const bool is_inter_layer_pred_allowed = + (inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic)); + + // Always set inter_layer_predicted to true on high layer frame if inter-layer + // prediction (ILP) is allowed even if encoder didn't actually use it. + // Setting inter_layer_predicted to false would allow receiver to decode high + // layer frame without decoding low layer frame. If that would happen (e.g. + // if low layer frame is lost) then receiver won't be able to decode next high + // layer frame which uses ILP. + vp9_info->inter_layer_predicted = + first_frame_in_picture_ ? false : is_inter_layer_pred_allowed; + + // Mark all low spatial layer frames as references (not just frames of + // active low spatial layers) if inter-layer prediction is enabled since + // these frames are indirect references of high spatial layer, which can + // later be enabled without key frame. + vp9_info->non_ref_for_inter_layer_pred = + !is_inter_layer_pred_allowed || + layer_id.spatial_layer_id + 1 == num_spatial_layers_; + + // Always populate this, so that the packetizer can properly set the marker + // bit. + vp9_info->num_spatial_layers = num_active_spatial_layers_; + vp9_info->first_active_layer = first_active_layer_; + + vp9_info->num_ref_pics = 0; + FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted, + vp9_info); + if (vp9_info->flexible_mode) { + vp9_info->gof_idx = kNoGofIdx; + if (!svc_controller_) { + if (num_temporal_layers_ == 1) { + vp9_info->temporal_up_switch = true; + } else { + // In flexible mode with > 1 temporal layer but no SVC controller we + // can't techincally determine if a frame is an upswitch point, use + // gof-based data as proxy for now. + // TODO(sprang): Remove once SVC controller is the only choice. + vp9_info->gof_idx = + static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); + vp9_info->temporal_up_switch = + gof_.temporal_up_switch[vp9_info->gof_idx]; + } + } + } else { + vp9_info->gof_idx = + static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); + vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx]; + RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] || + vp9_info->num_ref_pics == 0); + } + + vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0); + + // Write SS on key frame of independently coded spatial layers and on base + // temporal/spatial layer frame if number of layers changed without issuing + // of key picture (inter-layer prediction is enabled). + const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted; + if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 && + layer_id.spatial_layer_id == first_active_layer_)) { + vp9_info->ss_data_available = true; + vp9_info->spatial_layer_resolution_present = true; + // Signal disabled layers. + for (size_t i = 0; i < first_active_layer_; ++i) { + vp9_info->width[i] = 0; + vp9_info->height[i] = 0; + } + for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) { + vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] / + svc_params_.scaling_factor_den[i]; + vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] / + svc_params_.scaling_factor_den[i]; + } + if (vp9_info->flexible_mode) { + vp9_info->gof.num_frames_in_gof = 0; + } else { + vp9_info->gof.CopyGofInfoVP9(gof_); + } + + ss_info_needed_ = false; + } else { + vp9_info->ss_data_available = false; + } + + first_frame_in_picture_ = false; + + // Populate codec-agnostic section in the codec specific structure. + if (svc_controller_) { + auto it = absl::c_find_if( + layer_frames_, + [&](const ScalableVideoController::LayerFrameConfig& config) { + return config.SpatialId() == layer_id.spatial_layer_id; + }); + if (it == layer_frames_.end()) { + RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S" + << layer_id.spatial_layer_id << "T" + << layer_id.temporal_layer_id + << " that wasn't requested."; + return false; + } + codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it); + if (is_key_frame) { + codec_specific->template_structure = + svc_controller_->DependencyStructure(); + auto& resolutions = codec_specific->template_structure->resolutions; + resolutions.resize(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + resolutions[sid] = RenderResolution( + /*width=*/codec_.width * svc_params_.scaling_factor_num[sid] / + svc_params_.scaling_factor_den[sid], + /*height=*/codec_.height * svc_params_.scaling_factor_num[sid] / + svc_params_.scaling_factor_den[sid]); + } + } + if (is_flexible_mode_) { + // Populate data for legacy temporal-upswitch state. + // We can switch up to a higher temporal layer only if all temporal layers + // higher than this (within the current spatial layer) are switch points. + vp9_info->temporal_up_switch = true; + for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_; + ++i) { + // Assumes decode targets are always ordered first by spatial then by + // temporal id. + size_t dti_index = + (layer_id.spatial_layer_id * num_temporal_layers_) + i; + vp9_info->temporal_up_switch &= + (codec_specific->generic_frame_info + ->decode_target_indications[dti_index] == + DecodeTargetIndication::kSwitch); + } + } + } + return true; +} + +void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, + const size_t pic_num, + const bool inter_layer_predicted, + CodecSpecificInfoVP9* vp9_info) { + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + const bool is_key_frame = + (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + + std::vector<RefFrameBuffer> ref_buf_list; + + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, + &enc_layer_conf); + int ref_buf_flags = 0; + + if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags |= 1 << fb_idx; + } + } + + if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags |= 1 << fb_idx; + } + } + + if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags |= 1 << fb_idx; + } + } + + RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " + << layer_id.spatial_layer_id << " tl " + << layer_id.temporal_layer_id << " refered buffers " + << (ref_buf_flags & (1 << 0) ? 1 : 0) + << (ref_buf_flags & (1 << 1) ? 1 : 0) + << (ref_buf_flags & (1 << 2) ? 1 : 0) + << (ref_buf_flags & (1 << 3) ? 1 : 0) + << (ref_buf_flags & (1 << 4) ? 1 : 0) + << (ref_buf_flags & (1 << 5) ? 1 : 0) + << (ref_buf_flags & (1 << 6) ? 1 : 0) + << (ref_buf_flags & (1 << 7) ? 1 : 0); + + } else if (!is_key_frame) { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-SVC mode encoder doesn't provide reference list. Assume each frame + // refers previous one, which is stored in buffer 0. + ref_buf_list.push_back(ref_buf_[0]); + } + + std::vector<size_t> ref_pid_list; + + vp9_info->num_ref_pics = 0; + for (const RefFrameBuffer& ref_buf : ref_buf_list) { + RTC_DCHECK_LE(ref_buf.pic_num, pic_num); + if (ref_buf.pic_num < pic_num) { + if (inter_layer_pred_ != InterLayerPredMode::kOn) { + // RTP spec limits temporal prediction to the same spatial layer. + // It is safe to ignore this requirement if inter-layer prediction is + // enabled for all frames when all base frames are relayed to receiver. + RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); + } else { + RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); + } + RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id); + + // Encoder may reference several spatial layers on the same previous + // frame in case if some spatial layers are skipped on the current frame. + // We shouldn't put duplicate references as it may break some old + // clients and isn't RTP compatible. + if (std::find(ref_pid_list.begin(), ref_pid_list.end(), + ref_buf.pic_num) != ref_pid_list.end()) { + continue; + } + ref_pid_list.push_back(ref_buf.pic_num); + + const size_t p_diff = pic_num - ref_buf.pic_num; + RTC_DCHECK_LE(p_diff, 127UL); + + vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff); + ++vp9_info->num_ref_pics; + } else { + RTC_DCHECK(inter_layer_predicted); + // RTP spec only allows to use previous spatial layer for inter-layer + // prediction. + RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id); + } + } +} + +void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, + const size_t pic_num) { + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + RefFrameBuffer frame_buf = {.pic_num = pic_num, + .spatial_layer_id = layer_id.spatial_layer_id, + .temporal_layer_id = layer_id.temporal_layer_id}; + + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, + &enc_layer_conf); + const int update_buffer_slot = + enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id]; + + for (size_t i = 0; i < ref_buf_.size(); ++i) { + if (update_buffer_slot & (1 << i)) { + ref_buf_[i] = frame_buf; + } + } + + RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " + << layer_id.spatial_layer_id << " tl " + << layer_id.temporal_layer_id << " updated buffers " + << (update_buffer_slot & (1 << 0) ? 1 : 0) + << (update_buffer_slot & (1 << 1) ? 1 : 0) + << (update_buffer_slot & (1 << 2) ? 1 : 0) + << (update_buffer_slot & (1 << 3) ? 1 : 0) + << (update_buffer_slot & (1 << 4) ? 1 : 0) + << (update_buffer_slot & (1 << 5) ? 1 : 0) + << (update_buffer_slot & (1 << 6) ? 1 : 0) + << (update_buffer_slot & (1 << 7) ? 1 : 0); + } else { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-svc mode encoder doesn't provide reference list. Assume each frame + // is reference and stored in buffer 0. + ref_buf_[0] = frame_buf; + } +} + +vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences( + bool is_key_pic, + int first_active_spatial_layer_id) { + // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs. + RTC_DCHECK_LE(gof_.num_frames_in_gof, 4); + + vpx_svc_ref_frame_config_t ref_config; + memset(&ref_config, 0, sizeof(ref_config)); + + const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1); + const bool is_inter_layer_pred_allowed = + inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic); + absl::optional<int> last_updated_buf_idx; + + // Put temporal reference to LAST and spatial reference to GOLDEN. Update + // frame buffer (i.e. store encoded frame) if current frame is a temporal + // reference (i.e. it belongs to a low temporal layer) or it is a spatial + // reference. In later case, always store spatial reference in the last + // reference frame buffer. + // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers + // for temporal references plus 1 buffer for spatial reference. 7 buffers + // in total. + + for (int sl_idx = first_active_spatial_layer_id; + sl_idx < num_active_spatial_layers_; ++sl_idx) { + const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1; + const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof; + + if (!is_key_pic) { + // Set up temporal reference. + const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx]; + + // Last reference frame buffer is reserved for spatial reference. It is + // not supposed to be used for temporal prediction. + RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1); + + const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num; + // Incorrect spatial layer may be in the buffer due to a key-frame. + const bool same_spatial_layer = + ref_buf_[buf_idx].spatial_layer_id == sl_idx; + bool correct_pid = false; + if (is_flexible_mode_) { + correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff; + } else { + // Below code assumes single temporal referecence. + RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1); + correct_pid = pid_diff == gof_.pid_diff[gof_idx][0]; + } + + if (same_spatial_layer && correct_pid) { + ref_config.lst_fb_idx[sl_idx] = buf_idx; + ref_config.reference_last[sl_idx] = 1; + } else { + // This reference doesn't match with one specified by GOF. This can + // only happen if spatial layer is enabled dynamically without key + // frame. Spatial prediction is supposed to be enabled in this case. + RTC_DCHECK(is_inter_layer_pred_allowed && + sl_idx > first_active_spatial_layer_id); + } + } + + if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) { + // Set up spatial reference. + RTC_DCHECK(last_updated_buf_idx); + ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx; + ref_config.reference_golden[sl_idx] = 1; + } else { + RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || + sl_idx == first_active_spatial_layer_id || + inter_layer_pred_ == InterLayerPredMode::kOff); + } + + last_updated_buf_idx.reset(); + + if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 || + num_temporal_layers_ == 1) { + last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx]; + + // Ensure last frame buffer is not used for temporal prediction (it is + // reserved for spatial reference). + RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1); + } else if (is_inter_layer_pred_allowed) { + last_updated_buf_idx = kNumVp9Buffers - 1; + } + + if (last_updated_buf_idx) { + ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx; + } + } + + return ref_config; +} + +void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { + RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT); + + if (pkt->data.frame.sz == 0) { + // Ignore dropped frame. + return; + } + + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + if (layer_buffering_) { + // Deliver buffered low spatial layer frame. + const bool end_of_picture = false; + DeliverBufferedFrame(end_of_picture); + } + + encoded_image_.SetEncodedData(EncodedImageBuffer::Create( + static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz)); + + codec_specific_ = {}; + absl::optional<int> spatial_index; + absl::optional<int> temporal_index; + if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index, + *pkt)) { + // Drop the frame. + encoded_image_.set_size(0); + return; + } + encoded_image_.SetSpatialIndex(spatial_index); + encoded_image_.SetTemporalIndex(temporal_index); + + const bool is_key_frame = + ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) && + !codec_specific_.codecSpecific.VP9.inter_layer_predicted; + + // Ensure encoder issued key frame on request. + RTC_DCHECK(is_key_frame || !force_key_frame_); + + // Check if encoded frame is a key frame. + encoded_image_._frameType = VideoFrameType::kVideoFrameDelta; + if (is_key_frame) { + encoded_image_._frameType = VideoFrameType::kVideoFrameKey; + force_key_frame_ = false; + } + + UpdateReferenceBuffers(*pkt, pics_since_key_); + + TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size()); + encoded_image_.SetTimestamp(input_image_->timestamp()); + encoded_image_.SetColorSpace(input_image_->color_space()); + encoded_image_._encodedHeight = + pkt->data.frame.height[layer_id.spatial_layer_id]; + encoded_image_._encodedWidth = + pkt->data.frame.width[layer_id.spatial_layer_id]; + int qp = -1; + libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp); + encoded_image_.qp_ = qp; + + if (!layer_buffering_) { + const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 == + num_active_spatial_layers_; + DeliverBufferedFrame(end_of_picture); + } +} + +void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) { + if (encoded_image_.size() > 0) { + if (num_spatial_layers_ > 1) { + // Restore frame dropping settings, as dropping may be temporary forbidden + // due to dynamically enabled layers. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } + + codec_specific_.end_of_picture = end_of_picture; + + encoded_complete_callback_->OnEncodedImage(encoded_image_, + &codec_specific_); + + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0); + const uint32_t frame_timestamp_ms = + 1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency; + framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms); + + const size_t steady_state_size = SteadyStateSize( + spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx); + + // Only frames on spatial layers, which may be limited in a steady state + // are considered for steady state detection. + if (framerate_controller_[spatial_idx].GetTargetRate() > + variable_framerate_experiment_.framerate_limit + 1e-9) { + if (encoded_image_.qp_ <= + variable_framerate_experiment_.steady_state_qp && + encoded_image_.size() <= steady_state_size) { + ++num_steady_state_frames_; + } else { + num_steady_state_frames_ = 0; + } + } + } + encoded_image_.set_size(0); + } +} + +int LibvpxVp9Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "libvpx"; + if (quality_scaler_experiment_.enabled && inited_ && + codec_.VP9().automaticResizeOn) { + info.scaling_settings = VideoEncoder::ScalingSettings( + quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp); + } else { + info.scaling_settings = VideoEncoder::ScalingSettings::kOff; + } + info.has_trusted_rate_controller = trusted_rate_controller_; + info.is_hardware_accelerated = false; + if (inited_) { + // Find the max configured fps of any active spatial layer. + float max_fps = 0.0; + for (size_t si = 0; si < num_spatial_layers_; ++si) { + if (codec_.spatialLayers[si].active && + codec_.spatialLayers[si].maxFramerate > max_fps) { + max_fps = codec_.spatialLayers[si].maxFramerate; + } + } + + for (size_t si = 0; si < num_spatial_layers_; ++si) { + info.fps_allocation[si].clear(); + if (!codec_.spatialLayers[si].active) { + continue; + } + + // This spatial layer may already use a fraction of the total frame rate. + const float sl_fps_fraction = + codec_.spatialLayers[si].maxFramerate / max_fps; + for (size_t ti = 0; ti < num_temporal_layers_; ++ti) { + const uint32_t decimator = + num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti]; + RTC_DCHECK_GT(decimator, 0); + info.fps_allocation[si].push_back( + rtc::saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction * + (sl_fps_fraction / decimator))); + } + } + if (profile_ == VP9Profile::kProfile0) { + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + } + } + if (!encoder_info_override_.resolution_bitrate_limits().empty()) { + info.resolution_bitrate_limits = + encoder_info_override_.resolution_bitrate_limits(); + } + return info; +} + +size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) { + const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate( + sid, tid == kNoTemporalIdx ? 0 : tid); + const float fps = (codec_.mode == VideoCodecMode::kScreensharing) + ? std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[sid].GetTargetRate()) + : codec_.maxFramerate; + return static_cast<size_t>( + bitrate_bps / (8 * fps) * + (100 - + variable_framerate_experiment_.steady_state_undershoot_percentage) / + 100 + + 0.5); +} + +// static +LibvpxVp9Encoder::VariableFramerateExperiment +LibvpxVp9Encoder::ParseVariableFramerateConfig(const FieldTrialsView& trials) { + FieldTrialFlag enabled = FieldTrialFlag("Enabled"); + FieldTrialParameter<double> framerate_limit("min_fps", 5.0); + FieldTrialParameter<int> qp("min_qp", 32); + FieldTrialParameter<int> undershoot_percentage("undershoot", 30); + FieldTrialParameter<int> frames_before_steady_state( + "frames_before_steady_state", 5); + ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage, + &frames_before_steady_state}, + trials.Lookup("WebRTC-VP9VariableFramerateScreenshare")); + VariableFramerateExperiment config; + config.enabled = enabled.Get(); + config.framerate_limit = framerate_limit.Get(); + config.steady_state_qp = qp.Get(); + config.steady_state_undershoot_percentage = undershoot_percentage.Get(); + config.frames_before_steady_state = frames_before_steady_state.Get(); + + return config; +} + +// static +LibvpxVp9Encoder::QualityScalerExperiment +LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) { + FieldTrialFlag disabled = FieldTrialFlag("Disabled"); + FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold); + FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold); + ParseFieldTrial({&disabled, &low_qp, &high_qp}, + trials.Lookup("WebRTC-VP9QualityScaler")); + QualityScalerExperiment config; + config.enabled = !disabled.Get(); + RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is " + << (config.enabled ? "enabled." : "disabled"); + config.low_qp = low_qp.Get(); + config.high_qp = high_qp.Get(); + + return config; +} + +void LibvpxVp9Encoder::UpdatePerformanceFlags() { + flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution; + if (codec_.GetVideoEncoderComplexity() == + VideoCodecComplexity::kComplexityLow) { + // For low tier devices, always use speed 9. Only disable upper + // layer deblocking below QCIF. + params_by_resolution[0] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 1, + .allow_denoising = true}; + params_by_resolution[352 * 288] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 0, + .allow_denoising = true}; + } else { + params_by_resolution = performance_flags_.settings_by_resolution; + } + + const auto find_speed = [&](int min_pixel_count) { + RTC_DCHECK(!params_by_resolution.empty()); + auto it = params_by_resolution.upper_bound(min_pixel_count); + return std::prev(it)->second; + }; + performance_flags_by_spatial_index_.clear(); + + if (is_svc_) { + for (int si = 0; si < num_spatial_layers_; ++si) { + performance_flags_by_spatial_index_.push_back(find_speed( + codec_.spatialLayers[si].width * codec_.spatialLayers[si].height)); + } + } else { + performance_flags_by_spatial_index_.push_back( + find_speed(codec_.width * codec_.height)); + } +} + +// static +LibvpxVp9Encoder::PerformanceFlags +LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials( + const FieldTrialsView& trials) { + struct Params : public PerformanceFlags::ParameterSet { + int min_pixel_count = 0; + }; + + FieldTrialStructList<Params> trials_list( + {FieldTrialStructMember("min_pixel_count", + [](Params* p) { return &p->min_pixel_count; }), + FieldTrialStructMember("high_layer_speed", + [](Params* p) { return &p->high_layer_speed; }), + FieldTrialStructMember("base_layer_speed", + [](Params* p) { return &p->base_layer_speed; }), + FieldTrialStructMember("deblock_mode", + [](Params* p) { return &p->deblock_mode; }), + FieldTrialStructMember("denoiser", + [](Params* p) { return &p->allow_denoising; })}, + {}); + + FieldTrialFlag per_layer_speed("use_per_layer_speed"); + + ParseFieldTrial({&trials_list, &per_layer_speed}, + trials.Lookup("WebRTC-VP9-PerformanceFlags")); + + PerformanceFlags flags; + flags.use_per_layer_speed = per_layer_speed.Get(); + + constexpr int kMinSpeed = 1; + constexpr int kMaxSpeed = 9; + for (auto& f : trials_list.Get()) { + if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed || + f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed || + f.deblock_mode < 0 || f.deblock_mode > 2) { + RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: " + << "min_pixel_count = " << f.min_pixel_count + << ", high_layer_speed = " << f.high_layer_speed + << ", base_layer_speed = " << f.base_layer_speed + << ", deblock_mode = " << f.deblock_mode; + continue; + } + flags.settings_by_resolution[f.min_pixel_count] = f; + } + + if (flags.settings_by_resolution.empty()) { + return GetDefaultPerformanceFlags(); + } + + return flags; +} + +// static +LibvpxVp9Encoder::PerformanceFlags +LibvpxVp9Encoder::GetDefaultPerformanceFlags() { + PerformanceFlags flags; + flags.use_per_layer_speed = true; +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID) + // Speed 8 on all layers for all resolutions. + flags.settings_by_resolution[0] = {.base_layer_speed = 8, + .high_layer_speed = 8, + .deblock_mode = 0, + .allow_denoising = true}; +#else + + // For smaller resolutions, use lower speed setting for the temporal base + // layer (get some coding gain at the cost of increased encoding complexity). + // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and + // disable deblocking for upper-most temporal layers. + flags.settings_by_resolution[0] = {.base_layer_speed = 5, + .high_layer_speed = 8, + .deblock_mode = 1, + .allow_denoising = true}; + + // Use speed 7 for QCIF and above. + // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and + // enable deblocking for all temporal layers. + flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7, + .high_layer_speed = 8, + .deblock_mode = 0, + .allow_denoising = true}; + + // For very high resolution (1080p and up), turn the speed all the way up + // since this is very CPU intensive. Also disable denoising to save CPU, at + // these resolutions denoising appear less effective and hopefully you also + // have a less noisy video source at this point. + flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 0, + .allow_denoising = false}; + +#endif + return flags; +} + +void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) { + if (!raw_) { + raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1, + nullptr); + } else if (raw_->fmt != fmt) { + RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to " + << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420"); + libvpx_->img_free(raw_); + raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1, + nullptr); + } + // else no-op since the image is already in the right format. +} + +rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0( + rtc::scoped_refptr<VideoFrameBuffer> buffer) { + absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> + supported_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + + rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer; + if (buffer->type() != VideoFrameBuffer::Type::kNative) { + // `buffer` is already mapped. + mapped_buffer = buffer; + } else { + // Attempt to map to one of the supported formats. + mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats); + } + if (!mapped_buffer || + (absl::c_find(supported_formats, mapped_buffer->type()) == + supported_formats.end() && + mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) { + // Unknown pixel format or unable to map, convert to I420 and prepare that + // buffer instead to ensure Scale() is safe to use. + auto converted_buffer = buffer->ToI420(); + if (!converted_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString(buffer->type()) + << " image to I420. Can't encode frame."; + return {}; + } + RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 || + converted_buffer->type() == VideoFrameBuffer::Type::kI420A); + + // Because `buffer` had to be converted, use `converted_buffer` instead. + buffer = mapped_buffer = converted_buffer; + } + + // Prepare `raw_` from `mapped_buffer`. + switch (mapped_buffer->type()) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI420A: { + MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420); + const I420BufferInterface* i420_buffer = mapped_buffer->GetI420(); + RTC_DCHECK(i420_buffer); + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY()); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU()); + raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV()); + raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY(); + raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU(); + raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV(); + break; + } + case VideoFrameBuffer::Type::kNV12: { + MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12); + const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12(); + RTC_DCHECK(nv12_buffer); + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY()); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV()); + raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1; + raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY(); + raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV(); + raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV(); + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + return mapped_buffer; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h new file mode 100644 index 0000000000..427e721c1b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_ + +#ifdef RTC_ENABLE_VP9 + +#include <array> +#include <memory> +#include <vector> + +#include "api/fec_controller_override.h" +#include "api/field_trials_view.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp9_profile.h" +#include "common_video/include/video_frame_buffer_pool.h" +#include "modules/video_coding/codecs/interface/libvpx_interface.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/utility/framerate_controller_deprecated.h" +#include "rtc_base/containers/flat_map.h" +#include "rtc_base/experiments/encoder_info_settings.h" +#include "vpx/vp8cx.h" + +namespace webrtc { + +class LibvpxVp9Encoder : public VP9Encoder { + public: + LibvpxVp9Encoder(const cricket::VideoCodec& codec, + std::unique_ptr<LibvpxInterface> interface, + const FieldTrialsView& trials); + + ~LibvpxVp9Encoder() override; + + void SetFecControllerOverride( + FecControllerOverride* fec_controller_override) override; + + int Release() override; + + int InitEncode(const VideoCodec* codec_settings, + const Settings& settings) override; + + int Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) override; + + int RegisterEncodeCompleteCallback(EncodedImageCallback* callback) override; + + void SetRates(const RateControlParameters& parameters) override; + + EncoderInfo GetEncoderInfo() const override; + + private: + // Determine number of encoder threads to use. + int NumberOfThreads(int width, int height, int number_of_cores); + + // Call encoder initialize function and set control settings. + int InitAndSetControlSettings(const VideoCodec* inst); + + // Update frame size for codec. + int UpdateCodecFrameSize(const VideoFrame& input_image); + + bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + absl::optional<int>* spatial_idx, + absl::optional<int>* temporal_idx, + const vpx_codec_cx_pkt& pkt); + void FillReferenceIndices(const vpx_codec_cx_pkt& pkt, + size_t pic_num, + bool inter_layer_predicted, + CodecSpecificInfoVP9* vp9_info); + void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, size_t pic_num); + vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic, + int first_active_spatial_layer_id); + + bool ExplicitlyConfiguredSpatialLayers() const; + bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation); + + // Configures which spatial layers libvpx should encode according to + // configuration provided by svc_controller_. + void EnableSpatialLayer(int sid); + void DisableSpatialLayer(int sid); + void SetActiveSpatialLayers(); + + void GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt); + + // Callback function for outputting packets per spatial layer. + static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, + void* user_data); + + void DeliverBufferedFrame(bool end_of_picture); + + bool DropFrame(uint8_t spatial_idx, uint32_t rtp_timestamp); + + // Determine maximum target for Intra frames + // + // Input: + // - optimal_buffer_size : Optimal buffer size + // Return Value : Max target size for Intra frames represented as + // percentage of the per frame bandwidth + uint32_t MaxIntraTarget(uint32_t optimal_buffer_size); + + size_t SteadyStateSize(int sid, int tid); + + void MaybeRewrapRawWithFormat(vpx_img_fmt fmt); + // Prepares `raw_` to reference image data of `buffer`, or of mapped or scaled + // versions of `buffer`. Returns the buffer that got referenced as a result, + // allowing the caller to keep a reference to it until after encoding has + // finished. On failure to convert the buffer, null is returned. + rtc::scoped_refptr<VideoFrameBuffer> PrepareBufferForProfile0( + rtc::scoped_refptr<VideoFrameBuffer> buffer); + + const std::unique_ptr<LibvpxInterface> libvpx_; + EncodedImage encoded_image_; + CodecSpecificInfo codec_specific_; + EncodedImageCallback* encoded_complete_callback_; + VideoCodec codec_; + const VP9Profile profile_; + bool inited_; + int64_t timestamp_; + uint32_t rc_max_intra_target_; + vpx_codec_ctx_t* encoder_; + vpx_codec_enc_cfg_t* config_; + vpx_image_t* raw_; + vpx_svc_extra_cfg_t svc_params_; + const VideoFrame* input_image_; + GofInfoVP9 gof_; // Contains each frame's temporal information for + // non-flexible mode. + bool force_key_frame_; + size_t pics_since_key_; + uint8_t num_temporal_layers_; + uint8_t num_spatial_layers_; // Number of configured SLs + uint8_t num_active_spatial_layers_; // Number of actively encoded SLs + uint8_t first_active_layer_; + bool layer_deactivation_requires_key_frame_; + bool is_svc_; + InterLayerPredMode inter_layer_pred_; + bool external_ref_control_; + const bool trusted_rate_controller_; + bool layer_buffering_; + const bool full_superframe_drop_; + vpx_svc_frame_drop_t svc_drop_frame_; + bool first_frame_in_picture_; + VideoBitrateAllocation current_bitrate_allocation_; + bool ss_info_needed_; + bool force_all_active_layers_; + uint8_t num_cores_; + + std::unique_ptr<ScalableVideoController> svc_controller_; + std::vector<FramerateControllerDeprecated> framerate_controller_; + + // Used for flexible mode. + bool is_flexible_mode_; + struct RefFrameBuffer { + bool operator==(const RefFrameBuffer& o) { + return pic_num == o.pic_num && spatial_layer_id == o.spatial_layer_id && + temporal_layer_id == o.temporal_layer_id; + } + + size_t pic_num = 0; + int spatial_layer_id = 0; + int temporal_layer_id = 0; + }; + std::array<RefFrameBuffer, kNumVp9Buffers> ref_buf_; + std::vector<ScalableVideoController::LayerFrameConfig> layer_frames_; + + // Variable frame-rate related fields and methods. + const struct VariableFramerateExperiment { + bool enabled; + // Framerate is limited to this value in steady state. + float framerate_limit; + // This qp or below is considered a steady state. + int steady_state_qp; + // Frames of at least this percentage below ideal for configured bitrate are + // considered in a steady state. + int steady_state_undershoot_percentage; + // Number of consecutive frames with good QP and size required to detect + // the steady state. + int frames_before_steady_state; + } variable_framerate_experiment_; + static VariableFramerateExperiment ParseVariableFramerateConfig( + const FieldTrialsView& trials); + FramerateControllerDeprecated variable_framerate_controller_; + + const struct QualityScalerExperiment { + int low_qp; + int high_qp; + bool enabled; + } quality_scaler_experiment_; + static QualityScalerExperiment ParseQualityScalerConfig( + const FieldTrialsView& trials); + const bool external_ref_ctrl_; + + // Flags that can affect speed vs quality tradeoff, and are configureable per + // resolution ranges. + struct PerformanceFlags { + // If false, a lookup will be made in `settings_by_resolution` base on the + // highest currently active resolution, and the overall speed then set to + // to the `base_layer_speed` matching that entry. + // If true, each active resolution will have it's speed and deblock_mode set + // based on it resolution, and the high layer speed configured for non + // base temporal layer frames. + bool use_per_layer_speed = false; + + struct ParameterSet { + int base_layer_speed = -1; // Speed setting for TL0. + int high_layer_speed = -1; // Speed setting for TL1-TL3. + // 0 = deblock all temporal layers (TL) + // 1 = disable deblock for top-most TL + // 2 = disable deblock for all TLs + int deblock_mode = 0; + bool allow_denoising = true; + }; + // Map from min pixel count to settings for that resolution and above. + // E.g. if you want some settings A if below wvga (640x360) and some other + // setting B at wvga and above, you'd use map {{0, A}, {230400, B}}. + flat_map<int, ParameterSet> settings_by_resolution; + }; + // Performance flags, ordered by `min_pixel_count`. + const PerformanceFlags performance_flags_; + // Caching of of `speed_configs_`, where index i maps to the resolution as + // specified in `codec_.spatialLayer[i]`. + std::vector<PerformanceFlags::ParameterSet> + performance_flags_by_spatial_index_; + void UpdatePerformanceFlags(); + static PerformanceFlags ParsePerformanceFlagsFromTrials( + const FieldTrialsView& trials); + static PerformanceFlags GetDefaultPerformanceFlags(); + + int num_steady_state_frames_; + // Only set config when this flag is set. + bool config_changed_; + + const LibvpxVp9EncoderInfoSettings encoder_info_override_; +}; + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_LIBVPX_VP9_ENCODER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc new file mode 100644 index 0000000000..77eee3dbf5 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.cc @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp9/svc_config.h" + +#include <algorithm> +#include <cmath> +#include <memory> +#include <vector> + +#include "media/base/video_common.h" +#include "modules/video_coding/codecs/vp9/include/vp9_globals.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { +const size_t kMinVp9SvcBitrateKbps = 30; + +const size_t kMaxNumLayersForScreenSharing = 3; +const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 10.0, 30.0}; +const size_t kMinScreenSharingLayerBitrateKbps[] = {30, 200, 500}; +const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950}; +const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950}; + +// Gets limited number of layers for given resolution. +size_t GetLimitedNumSpatialLayers(size_t width, size_t height) { + const bool is_landscape = width >= height; + const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength + : kMinVp9SpatialLayerShortSideLength; + const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength + : kMinVp9SpatialLayerLongSideLength; + const size_t num_layers_fit_horz = static_cast<size_t>( + std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width)))); + const size_t num_layers_fit_vert = static_cast<size_t>( + std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height)))); + return std::min(num_layers_fit_horz, num_layers_fit_vert); +} +} // namespace + +std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t num_spatial_layers) { + num_spatial_layers = + std::min(num_spatial_layers, kMaxNumLayersForScreenSharing); + std::vector<SpatialLayer> spatial_layers; + + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + SpatialLayer spatial_layer = {0}; + spatial_layer.width = input_width; + spatial_layer.height = input_height; + spatial_layer.maxFramerate = + std::min(kMaxScreenSharingLayerFramerateFps[sl_idx], max_framerate_fps); + spatial_layer.numberOfTemporalLayers = 1; + spatial_layer.minBitrate = + static_cast<int>(kMinScreenSharingLayerBitrateKbps[sl_idx]); + spatial_layer.maxBitrate = + static_cast<int>(kMaxScreenSharingLayerBitrateKbps[sl_idx]); + spatial_layer.targetBitrate = + static_cast<int>(kTargetScreenSharingLayerBitrateKbps[sl_idx]); + spatial_layer.active = true; + spatial_layers.push_back(spatial_layer); + } + + return spatial_layers; +} + +std::vector<SpatialLayer> ConfigureSvcNormalVideo( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + absl::optional<ScalableVideoController::StreamLayersConfig> config) { + RTC_DCHECK_LT(first_active_layer, num_spatial_layers); + + // Limit number of layers for given resolution. + size_t limited_num_spatial_layers = + GetLimitedNumSpatialLayers(input_width, input_height); + if (limited_num_spatial_layers < num_spatial_layers) { + RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from " + << num_spatial_layers << " to " + << limited_num_spatial_layers + << " due to low input resolution."; + num_spatial_layers = limited_num_spatial_layers; + } + + // First active layer must be configured. + num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1); + + // Ensure top layer is even enough. + int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1); + if (config) { + required_divisiblity = 1; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + required_divisiblity = cricket::LeastCommonMultiple( + required_divisiblity, config->scaling_factor_den[sl_idx]); + } + } + input_width = input_width - input_width % required_divisiblity; + input_height = input_height - input_height % required_divisiblity; + + std::vector<SpatialLayer> spatial_layers; + for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers; + ++sl_idx) { + SpatialLayer spatial_layer = {0}; + spatial_layer.width = input_width >> (num_spatial_layers - sl_idx - 1); + spatial_layer.height = input_height >> (num_spatial_layers - sl_idx - 1); + spatial_layer.maxFramerate = max_framerate_fps; + spatial_layer.numberOfTemporalLayers = num_temporal_layers; + spatial_layer.active = true; + + if (config) { + spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] / + config->scaling_factor_den[sl_idx]; + spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] / + config->scaling_factor_den[sl_idx]; + } + + // minBitrate and maxBitrate formulas were derived from + // subjective-quality data to determing bit rates below which video + // quality is unacceptable and above which additional bits do not provide + // benefit. The formulas express rate in units of kbps. + + // TODO(ssilkin): Add to the comment PSNR/SSIM we get at encoding certain + // video to min/max bitrate specified by those formulas. + const size_t num_pixels = spatial_layer.width * spatial_layer.height; + int min_bitrate = + static_cast<int>((600. * std::sqrt(num_pixels) - 95000.) / 1000.); + min_bitrate = std::max(min_bitrate, 0); + spatial_layer.minBitrate = + std::max(static_cast<size_t>(min_bitrate), kMinVp9SvcBitrateKbps); + spatial_layer.maxBitrate = + static_cast<int>((1.6 * num_pixels + 50 * 1000) / 1000); + spatial_layer.targetBitrate = + (spatial_layer.minBitrate + spatial_layer.maxBitrate) / 2; + spatial_layers.push_back(spatial_layer); + } + + // A workaround for situation when single HD layer is left with minBitrate + // about 500kbps. This would mean that there will always be at least 500kbps + // allocated to video regardless of how low is the actual BWE. + // Also, boost maxBitrate for the first layer to account for lost ability to + // predict from previous layers. + if (first_active_layer > 0) { + spatial_layers[0].minBitrate = kMinVp9SvcBitrateKbps; + // TODO(ilnik): tune this value or come up with a different formula to + // ensure that all singlecast configurations look good and not too much + // bitrate is added. + spatial_layers[0].maxBitrate *= 1.1; + } + + return spatial_layers; +} + +// Uses scalability mode to configure spatial layers. +std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& codec) { + RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9); + + absl::optional<ScalabilityMode> scalability_mode = codec.GetScalabilityMode(); + RTC_DCHECK(scalability_mode.has_value()); + + absl::optional<ScalableVideoController::StreamLayersConfig> info = + ScalabilityStructureConfig(*scalability_mode); + if (!info.has_value()) { + RTC_LOG(LS_WARNING) << "Failed to create structure " + << ScalabilityModeToString(*scalability_mode); + return {}; + } + + if (static_cast<int>(GetLimitedNumSpatialLayers(codec.width, codec.height)) < + info->num_spatial_layers) { + // Layers will be reduced, do not use scalability mode for now. + // TODO(bugs.webrtc.org/11607): Use a lower scalability mode once all lower + // modes are supported. + codec.UnsetScalabilityMode(); + codec.VP9()->interLayerPred = + ScalabilityModeToInterLayerPredMode(*scalability_mode); + } + + // TODO(bugs.webrtc.org/11607): Add support for screensharing. + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(codec.width, codec.height, codec.maxFramerate, + /*first_active_layer=*/0, info->num_spatial_layers, + info->num_temporal_layers, /*is_screen_sharing=*/false, + codec.GetScalabilityMode() ? info : absl::nullopt); + RTC_DCHECK(!spatial_layers.empty()); + + // Use codec bitrate limits if spatial layering is not requested. + if (info->num_spatial_layers == 1) { + spatial_layers.back().minBitrate = codec.minBitrate; + spatial_layers.back().targetBitrate = codec.maxBitrate; + spatial_layers.back().maxBitrate = codec.maxBitrate; + } + + return spatial_layers; +} + +std::vector<SpatialLayer> GetSvcConfig( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing, + absl::optional<ScalableVideoController::StreamLayersConfig> config) { + RTC_DCHECK_GT(input_width, 0); + RTC_DCHECK_GT(input_height, 0); + RTC_DCHECK_GT(num_spatial_layers, 0); + RTC_DCHECK_GT(num_temporal_layers, 0); + + if (is_screen_sharing) { + return ConfigureSvcScreenSharing(input_width, input_height, + max_framerate_fps, num_spatial_layers); + } else { + return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps, + first_active_layer, num_spatial_layers, + num_temporal_layers, config); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h new file mode 100644 index 0000000000..adeaf0f161 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_ + +#include <stddef.h> + +#include <vector> + +#include "api/video_codecs/spatial_layer.h" +#include "api/video_codecs/video_codec.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +// Uses scalability mode to configure spatial layers. +std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& video_codec); + +std::vector<SpatialLayer> GetSvcConfig( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing, + absl::optional<ScalableVideoController::StreamLayersConfig> config = + absl::nullopt); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc new file mode 100644 index 0000000000..4de3c5b2a6 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/svc_config_unittest.cc @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp9/svc_config.h" + +#include <cstddef> +#include <vector> + +#include "modules/video_coding/codecs/vp9/include/vp9_globals.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::ElementsAre; +using ::testing::Field; + +namespace webrtc { +TEST(SvcConfig, NumSpatialLayers) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 0; + const size_t num_spatial_layers = 2; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30, + first_active_layer, max_num_spatial_layers, 1, false); + + EXPECT_EQ(spatial_layers.size(), num_spatial_layers); +} + +TEST(SvcConfig, NumSpatialLayersPortrait) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 0; + const size_t num_spatial_layers = 2; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), 30, + first_active_layer, max_num_spatial_layers, 1, false); + + EXPECT_EQ(spatial_layers.size(), num_spatial_layers); +} + +TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 960; + codec.height = 540; + codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY); + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270), + Field(&SpatialLayer::height, 540))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 480; + codec.height = 270; + codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY); + + // Scalability mode reset, configuration should be in accordance to L2T3_KEY. + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3))); + EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOnKeyPic); + EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 270; + codec.height = 480; + codec.SetScalabilityMode(ScalabilityMode::kL3T1); + + // Scalability mode reset, configuration should be in accordance to L2T1. + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135), + Field(&SpatialLayer::width, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1), + Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn); + EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt); +} + +TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 270; + codec.height = 480; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180), + Field(&SpatialLayer::width, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1), + Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 320; + codec.height = 180; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + // Scalability mode reset, configuration should be in accordance to L1T1. + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn); + EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt); +} + +TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 5; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength, kMinVp9SpatialLayerShortSideLength, 30, + first_active_layer, max_num_spatial_layers, 1, false); + EXPECT_EQ(spatial_layers.size(), 1u); + EXPECT_EQ(spatial_layers.back().width, kMinVp9SpatialLayerLongSideLength); +} + +TEST(SvcConfig, AlwaysSendsAtLeastOneLayerPortrait) { + const size_t max_num_spatial_layers = 6; + const size_t first_active_layer = 5; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerShortSideLength, kMinVp9SpatialLayerLongSideLength, 30, + first_active_layer, max_num_spatial_layers, 1, false); + EXPECT_EQ(spatial_layers.size(), 1u); + EXPECT_EQ(spatial_layers.back().width, kMinVp9SpatialLayerShortSideLength); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParity) { + const size_t max_num_spatial_layers = 3; + const size_t kOddSize = 1023; + + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(kOddSize, kOddSize, 30, + /*first_active_layer=*/1, max_num_spatial_layers, 1, false); + // Since there are 2 layers total (1, 2), divisiblity by 2 is required. + EXPECT_EQ(spatial_layers.back().width, kOddSize - 1); + EXPECT_EQ(spatial_layers.back().width, kOddSize - 1); + + spatial_layers = + GetSvcConfig(kOddSize, kOddSize, 30, + /*first_active_layer=*/0, max_num_spatial_layers, 1, false); + // Since there are 3 layers total (0, 1, 2), divisiblity by 4 is required. + EXPECT_EQ(spatial_layers.back().width, kOddSize - 3); + EXPECT_EQ(spatial_layers.back().width, kOddSize - 3); + + spatial_layers = + GetSvcConfig(kOddSize, kOddSize, 30, + /*first_active_layer=*/2, max_num_spatial_layers, 1, false); + // Since there is only 1 layer active (2), divisiblity by 1 is required. + EXPECT_EQ(spatial_layers.back().width, kOddSize); + EXPECT_EQ(spatial_layers.back().width, kOddSize); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 1023; + codec.height = 1023; + codec.SetScalabilityMode(ScalabilityMode::kL3T1); + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 4 required. + ElementsAre(Field(&SpatialLayer::width, 255), + Field(&SpatialLayer::width, 510), + Field(&SpatialLayer::width, 1020))); + + codec.SetScalabilityMode(ScalabilityMode::kL2T1); + spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 2 required. + ElementsAre(Field(&SpatialLayer::width, 511), + Field(&SpatialLayer::width, 1022))); + + codec.SetScalabilityMode(ScalabilityMode::kL1T1); + spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 1 required. + ElementsAre(Field(&SpatialLayer::width, 1023))); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 1280; + codec.height = 1280; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 3 required. + ElementsAre(Field(&SpatialLayer::width, 852), + Field(&SpatialLayer::width, 1278))); +} + +TEST(SvcConfig, SkipsInactiveLayers) { + const size_t num_spatial_layers = 4; + const size_t first_active_layer = 2; + + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30, + first_active_layer, num_spatial_layers, 1, false); + EXPECT_EQ(spatial_layers.size(), 2u); + EXPECT_EQ(spatial_layers.back().width, + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1)); +} + +TEST(SvcConfig, BitrateThresholds) { + const size_t first_active_layer = 0; + const size_t num_spatial_layers = 3; + std::vector<SpatialLayer> spatial_layers = GetSvcConfig( + kMinVp9SpatialLayerLongSideLength << (num_spatial_layers - 1), + kMinVp9SpatialLayerShortSideLength << (num_spatial_layers - 1), 30, + first_active_layer, num_spatial_layers, 1, false); + + EXPECT_EQ(spatial_layers.size(), num_spatial_layers); + + for (const SpatialLayer& layer : spatial_layers) { + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} + +TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 960; + codec.height = 540; + codec.SetScalabilityMode(ScalabilityMode::kS3T3); + + std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270), + Field(&SpatialLayer::height, 540))); + + for (const SpatialLayer& layer : spatial_layers) { + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} + +TEST(SvcConfig, ScreenSharing) { + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(1920, 1080, 30, 1, 3, 3, true); + + EXPECT_EQ(spatial_layers.size(), 3UL); + + for (size_t i = 0; i < 3; ++i) { + const SpatialLayer& layer = spatial_layers[i]; + EXPECT_EQ(layer.width, 1920); + EXPECT_EQ(layer.height, 1080); + EXPECT_EQ(layer.maxFramerate, (i < 1) ? 5 : (i < 2 ? 10 : 30)); + EXPECT_EQ(layer.numberOfTemporalLayers, 1); + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc new file mode 100644 index 0000000000..b6293a342e --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -0,0 +1,2446 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/memory/memory.h" +#include "api/test/create_frame_generator.h" +#include "api/test/frame_generator_interface.h" +#include "api/test/mock_video_encoder.h" +#include "api/video/color_space.h" +#include "api/video/i420_buffer.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp9_profile.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/codecs/interface/libvpx_interface.h" +#include "modules/video_coding/codecs/interface/mock_libvpx_interface.h" +#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h" +#include "modules/video_coding/codecs/test/video_codec_unittest.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/codecs/vp9/svc_config.h" +#include "rtc_base/strings/string_builder.h" +#include "test/explicit_key_value_config.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mappable_native_buffer.h" +#include "test/video_codec_settings.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::A; +using ::testing::AllOf; +using ::testing::An; +using ::testing::AnyNumber; +using ::testing::ByRef; +using ::testing::DoAll; +using ::testing::Each; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Field; +using ::testing::IsEmpty; +using ::testing::Mock; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::SafeMatcherCast; +using ::testing::SaveArgPointee; +using ::testing::SetArgPointee; +using ::testing::SizeIs; +using ::testing::TypedEq; +using ::testing::UnorderedElementsAreArray; +using ::testing::WithArg; +using EncoderInfo = webrtc::VideoEncoder::EncoderInfo; +using FramerateFractions = + absl::InlinedVector<uint8_t, webrtc::kMaxTemporalStreams>; + +constexpr size_t kWidth = 1280; +constexpr size_t kHeight = 720; + +const VideoEncoder::Capabilities kCapabilities(false); +const VideoEncoder::Settings kSettings(kCapabilities, + /*number_of_cores=*/1, + /*max_payload_size=*/0); + +VideoCodec DefaultCodecSettings() { + VideoCodec codec_settings; + webrtc::test::CodecSettings(kVideoCodecVP9, &codec_settings); + codec_settings.width = kWidth; + codec_settings.height = kHeight; + codec_settings.VP9()->numberOfTemporalLayers = 1; + codec_settings.VP9()->numberOfSpatialLayers = 1; + return codec_settings; +} + +void ConfigureSvc(VideoCodec& codec_settings, + int num_spatial_layers, + int num_temporal_layers = 1) { + codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers; + codec_settings.SetFrameDropEnabled(false); + + std::vector<SpatialLayer> layers = GetSvcConfig( + codec_settings.width, codec_settings.height, codec_settings.maxFramerate, + /*first_active_layer=*/0, num_spatial_layers, num_temporal_layers, false); + for (size_t i = 0; i < layers.size(); ++i) { + codec_settings.spatialLayers[i] = layers[i]; + } +} + +} // namespace + +class TestVp9Impl : public VideoCodecUnitTest { + protected: + std::unique_ptr<VideoEncoder> CreateEncoder() override { + return VP9Encoder::Create(); + } + + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return VP9Decoder::Create(); + } + + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kVideoCodecVP9, codec_settings); + codec_settings->width = kWidth; + codec_settings->height = kHeight; + codec_settings->VP9()->numberOfTemporalLayers = 1; + codec_settings->VP9()->numberOfSpatialLayers = 1; + } +}; + +class TestVp9ImplForPixelFormat + : public TestVp9Impl, + public ::testing::WithParamInterface< + test::FrameGeneratorInterface::OutputType> { + protected: + void SetUp() override { + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, GetParam(), absl::optional<int>()); + TestVp9Impl::SetUp(); + } +}; + +// Disabled on ios as flake, see https://crbug.com/webrtc/7057 +#if defined(WEBRTC_IOS) +TEST_P(TestVp9ImplForPixelFormat, DISABLED_EncodeDecode) { +#else +TEST_P(TestVp9ImplForPixelFormat, EncodeDecode) { +#endif + VideoFrame input_frame = NextInputFrame(); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + EXPECT_GT(I420PSNR(&input_frame, decoded_frame.get()), 36); + + const ColorSpace color_space = *decoded_frame->color_space(); + EXPECT_EQ(ColorSpace::PrimaryID::kUnspecified, color_space.primaries()); + EXPECT_EQ(ColorSpace::TransferID::kUnspecified, color_space.transfer()); + EXPECT_EQ(ColorSpace::MatrixID::kUnspecified, color_space.matrix()); + EXPECT_EQ(ColorSpace::RangeID::kLimited, color_space.range()); + EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified, + color_space.chroma_siting_horizontal()); + EXPECT_EQ(ColorSpace::ChromaSiting::kUnspecified, + color_space.chroma_siting_vertical()); +} + +TEST_P(TestVp9ImplForPixelFormat, EncodeNativeBuffer) { + VideoFrame input_frame = NextInputFrame(); + // Replace the input frame with a fake native buffer of the same size and + // underlying pixel format. Do not allow ToI420() for non-I420 buffers, + // ensuring zero-conversion. + input_frame = test::CreateMappableNativeFrame( + input_frame.ntp_time_ms(), input_frame.video_frame_buffer()->type(), + input_frame.width(), input_frame.height()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // After encoding, we would expect a single mapping to have happened. + rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer = + test::GetMappableNativeBufferFromVideoFrame(input_frame); + std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers = + mappable_buffer->GetMappedFramedBuffers(); + ASSERT_EQ(mapped_buffers.size(), 1u); + EXPECT_EQ(mapped_buffers[0]->type(), mappable_buffer->mappable_type()); + EXPECT_EQ(mapped_buffers[0]->width(), input_frame.width()); + EXPECT_EQ(mapped_buffers[0]->height(), input_frame.height()); + EXPECT_FALSE(mappable_buffer->DidConvertToI420()); +} + +TEST_P(TestVp9ImplForPixelFormat, DecodedColorSpaceFromBitstream) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Encoded frame without explicit color space information. + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + // Color space present from encoded bitstream. + ASSERT_TRUE(decoded_frame->color_space()); + // No HDR metadata present. + EXPECT_FALSE(decoded_frame->color_space()->hdr_metadata()); +} + +TEST_P(TestVp9ImplForPixelFormat, DecodedQpEqualsEncodedQp) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + ASSERT_TRUE(decoded_qp); + EXPECT_EQ(encoded_frame.qp_, *decoded_qp); +} + +TEST_F(TestVp9Impl, SwitchInputPixelFormatsWithoutReconfigure) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Change the input frame type from I420 to NV12, encoding should still work. + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12, + absl::optional<int>()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Flipping back to I420, encoding should still work. + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, + absl::optional<int>()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); +} + +TEST(Vp9ImplTest, ParserQpEqualsEncodedQp) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + encoder->InitEncode(&codec_settings, kSettings); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(1) + .SetResolution({kWidth, kHeight}) + .Encode(); + ASSERT_THAT(frames, SizeIs(1)); + const auto& encoded_frame = frames.front().encoded_image; + int qp = 0; + ASSERT_TRUE(vp9::GetQp(encoded_frame.data(), encoded_frame.size(), &qp)); + EXPECT_EQ(encoded_frame.qp_, qp); +} + +TEST(Vp9ImplTest, EncodeAttachesTemplateStructureWithSvcController) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(2) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(2)); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + EXPECT_TRUE(frames[0].codec_specific_info.generic_frame_info); + + EXPECT_FALSE(frames[1].codec_specific_info.template_structure); + EXPECT_TRUE(frames[1].codec_specific_info.generic_frame_info); +} + +TEST(Vp9ImplTest, EncoderWith2TemporalLayers) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfTemporalLayers = 2; + // Tl0PidIdx is only used in non-flexible mode. + codec_settings.VP9()->flexibleMode = false; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(4) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(4)); + EXPECT_EQ(frames[0].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[1].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); + EXPECT_EQ(frames[2].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[3].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); +} + +TEST(Vp9ImplTest, EncodeTemporalLayersWithSvcController) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfTemporalLayers = 2; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(4) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(4)); + EXPECT_EQ(frames[0].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[1].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); + EXPECT_EQ(frames[2].codec_specific_info.codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(frames[3].codec_specific_info.codecSpecific.VP9.temporal_idx, 1); + // Verify codec agnostic part + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[1].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[2].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[3].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->temporal_id, 0); + EXPECT_EQ(frames[1].codec_specific_info.generic_frame_info->temporal_id, 1); + EXPECT_EQ(frames[2].codec_specific_info.generic_frame_info->temporal_id, 0); + EXPECT_EQ(frames[3].codec_specific_info.generic_frame_info->temporal_id, 1); +} + +TEST(Vp9ImplTest, EncoderWith2SpatialLayers) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfSpatialLayers = 2; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(1) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].encoded_image.SpatialIndex(), 0); + EXPECT_EQ(frames[1].encoded_image.SpatialIndex(), 1); +} + +TEST(Vp9ImplTest, EncodeSpatialLayersWithSvcController) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->numberOfSpatialLayers = 2; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(2) + .SetResolution({kWidth, kHeight}) + .Encode(); + + ASSERT_THAT(frames, SizeIs(4)); + EXPECT_EQ(frames[0].encoded_image.SpatialIndex(), 0); + EXPECT_EQ(frames[1].encoded_image.SpatialIndex(), 1); + EXPECT_EQ(frames[2].encoded_image.SpatialIndex(), 0); + EXPECT_EQ(frames[3].encoded_image.SpatialIndex(), 1); + // Verify codec agnostic part + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[1].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[2].codec_specific_info.generic_frame_info); + ASSERT_TRUE(frames[3].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 0); + EXPECT_EQ(frames[1].codec_specific_info.generic_frame_info->spatial_id, 1); + EXPECT_EQ(frames[2].codec_specific_info.generic_frame_info->spatial_id, 0); + EXPECT_EQ(frames[3].codec_specific_info.generic_frame_info->spatial_id, 1); +} + +TEST_F(TestVp9Impl, EncoderExplicitLayering) { + // Override default settings. + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->numberOfSpatialLayers = 2; + + codec_settings_.width = 960; + codec_settings_.height = 540; + codec_settings_.spatialLayers[0].minBitrate = 200; + codec_settings_.spatialLayers[0].maxBitrate = 500; + codec_settings_.spatialLayers[0].targetBitrate = + (codec_settings_.spatialLayers[0].minBitrate + + codec_settings_.spatialLayers[0].maxBitrate) / + 2; + codec_settings_.spatialLayers[0].active = true; + + codec_settings_.spatialLayers[1].minBitrate = 400; + codec_settings_.spatialLayers[1].maxBitrate = 1500; + codec_settings_.spatialLayers[1].targetBitrate = + (codec_settings_.spatialLayers[1].minBitrate + + codec_settings_.spatialLayers[1].maxBitrate) / + 2; + codec_settings_.spatialLayers[1].active = true; + + codec_settings_.spatialLayers[0].width = codec_settings_.width / 2; + codec_settings_.spatialLayers[0].height = codec_settings_.height / 2; + codec_settings_.spatialLayers[0].maxFramerate = codec_settings_.maxFramerate; + codec_settings_.spatialLayers[1].width = codec_settings_.width; + codec_settings_.spatialLayers[1].height = codec_settings_.height; + codec_settings_.spatialLayers[1].maxFramerate = codec_settings_.maxFramerate; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Ensure it fails if scaling factors in horz/vert dimentions are different. + codec_settings_.spatialLayers[0].width = codec_settings_.width; + codec_settings_.spatialLayers[0].height = codec_settings_.height / 2; + codec_settings_.spatialLayers[1].width = codec_settings_.width; + codec_settings_.spatialLayers[1].height = codec_settings_.height; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_PARAMETER, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Ensure it fails if scaling factor is not power of two. + codec_settings_.spatialLayers[0].width = codec_settings_.width / 3; + codec_settings_.spatialLayers[0].height = codec_settings_.height / 3; + codec_settings_.spatialLayers[1].width = codec_settings_.width; + codec_settings_.spatialLayers[1].height = codec_settings_.height; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_PARAMETER, + encoder_->InitEncode(&codec_settings_, kSettings)); +} + +TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { + // Configure encoder to produce N spatial layers. Encode frames of layer 0 + // then enable layer 1 and encode more frames and so on until layer N-1. + // Then disable layers one by one in the same way. + // Note: bit rate allocation is high to avoid frame dropping due to rate + // control, the encoder should always produce a frame. A dropped + // frame indicates a problem and the test will fail. + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(true); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + } + + for (size_t i = 0; i < num_spatial_layers - 1; ++i) { + const size_t sl_idx = num_spatial_layers - i - 1; + bitrate_allocation.SetBitrate(sl_idx, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + } +} + +TEST(Vp9ImplTest, EnableDisableSpatialLayersWithSvcController) { + const int num_spatial_layers = 3; + // Configure encoder to produce 3 spatial layers. Encode frames of layer 0 + // then enable layer 1 and encode more frames and so on. + // Then disable layers one by one in the same way. + // Note: bit rate allocation is high to avoid frame dropping due to rate + // control, the encoder should always produce a frame. A dropped + // frame indicates a problem and the test will fail. + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, num_spatial_layers); + codec_settings.SetFrameDropEnabled(true); + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + // Encode a key frame to validate all other frames are delta frames. + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + producer.SetNumInputFrames(1).Encode(); + ASSERT_THAT(frames, Not(IsEmpty())); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + + const size_t num_frames_to_encode = 5; + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + // With (sl_idx+1) spatial layers expect (sl_idx+1) frames per input frame. + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * (sl_idx + 1))); + for (size_t i = 0; i < frames.size(); ++i) { + EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + } + } + + for (int sl_idx = num_spatial_layers - 1; sl_idx > 0; --sl_idx) { + bitrate_allocation.SetBitrate(sl_idx, 0, 0); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + // With `sl_idx` spatial layer disabled, there are `sl_idx` spatial layers + // left. + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * sl_idx)); + for (size_t i = 0; i < frames.size(); ++i) { + EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + } + } +} + +MATCHER_P2(GenericLayerIs, spatial_id, temporal_id, "") { + if (arg.codec_specific_info.generic_frame_info == absl::nullopt) { + *result_listener << " miss generic_frame_info"; + return false; + } + const auto& layer = *arg.codec_specific_info.generic_frame_info; + if (layer.spatial_id != spatial_id || layer.temporal_id != temporal_id) { + *result_listener << " frame from layer (" << layer.spatial_id << ", " + << layer.temporal_id << ")"; + return false; + } + return true; +} + +TEST(Vp9ImplTest, SpatialUpswitchNotAtGOFBoundary) { + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3); + codec_settings.SetFrameDropEnabled(true); + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + // Disable all but spatial_layer = 0; + VideoBitrateAllocation bitrate_allocation; + int layer_bitrate_bps = codec_settings.spatialLayers[0].targetBitrate * 1000; + bitrate_allocation.SetBitrate(0, 0, layer_bitrate_bps); + bitrate_allocation.SetBitrate(0, 1, layer_bitrate_bps); + bitrate_allocation.SetBitrate(0, 2, layer_bitrate_bps); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + EXPECT_THAT(producer.SetNumInputFrames(3).Encode(), + ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(0, 2), + GenericLayerIs(0, 1))); + + // Upswitch to spatial_layer = 1 + layer_bitrate_bps = codec_settings.spatialLayers[1].targetBitrate * 1000; + bitrate_allocation.SetBitrate(1, 0, layer_bitrate_bps); + bitrate_allocation.SetBitrate(1, 1, layer_bitrate_bps); + bitrate_allocation.SetBitrate(1, 2, layer_bitrate_bps); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + // Expect upswitch doesn't happen immediately since there is no S1 frame that + // S1T2 frame can reference. + EXPECT_THAT(producer.SetNumInputFrames(1).Encode(), + ElementsAre(GenericLayerIs(0, 2))); + // Expect spatial upswitch happens now, at T0 frame. + EXPECT_THAT(producer.SetNumInputFrames(1).Encode(), + ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(1, 0))); +} +// TODO(bugs.webrtc.org/13442) Enable once a forward fix has landed in WebRTC. +TEST_F(TestVp9Impl, DISABLED_DisableEnableBaseLayerTriggersKeyFrame) { + // Configure encoder to produce N spatial layers. Encode frames for all + // layers. Then disable all but the last layer. Then reenable all back again. + test::ScopedFieldTrials override_field_trials( + "WebRTC-Vp9ExternalRefCtrl/Enabled/"); + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 3; + // Must not be multiple of temporal period to exercise all code paths. + const size_t num_frames_to_encode = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings_.mode = VideoCodecMode::kRealtimeVideo; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + } + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + + // Disable all but top layer. + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0); + } + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + bool seen_ss_data = false; + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // SS available immediatly after switching on base temporal layer. + if (seen_ss_data) { + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + false); + } else { + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + codec_specific_info[0].codecSpecific.VP9.temporal_idx == 0); + seen_ss_data |= + codec_specific_info[0].codecSpecific.VP9.ss_data_available; + } + // No key-frames generated for disabling layers. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + } + EXPECT_TRUE(seen_ss_data); + + // Force key-frame. + std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey}; + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), &frame_types)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // Key-frame should be produced. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + + // Encode some more frames. + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + } + + // Enable the second layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 1, tl_idx, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 2u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1); + EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2); + } + + // Enable the first layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 0, tl_idx, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 3u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + } +} +// TODO(bugs.webrtc.org/13442) Enable once a forward fix has landed in WebRTC. +TEST(Vp9ImplTest, + DISABLED_DisableEnableBaseLayerWithSvcControllerTriggersKeyFrame) { + // Configure encoder to produce N spatial layers. Encode frames for all + // layers. Then disable all but the last layer. Then reenable all back again. + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 3; + // Must not be multiple of temporal period to exercise all code paths. + const size_t num_frames_to_encode = 5; + + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, num_spatial_layers, num_temporal_layers); + codec_settings.SetFrameDropEnabled(false); + codec_settings.VP9()->flexibleMode = false; + codec_settings.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings.mode = VideoCodecMode::kRealtimeVideo; + + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + } + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * num_spatial_layers)); + + // Disable all but top spatial layer. + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0); + } + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + EXPECT_THAT(frames, SizeIs(num_frames_to_encode)); + for (const auto& frame : frames) { + // Expect no key-frames generated. + EXPECT_FALSE(frame.codec_specific_info.template_structure); + ASSERT_TRUE(frame.codec_specific_info.generic_frame_info); + EXPECT_EQ(frame.codec_specific_info.generic_frame_info->spatial_id, 2); + } + + frames = producer.ForceKeyFrame().SetNumInputFrames(1).Encode(); + ASSERT_THAT(frames, SizeIs(1)); + // Key-frame should be produced. + EXPECT_EQ(frames[0].encoded_image._frameType, VideoFrameType::kVideoFrameKey); + ASSERT_TRUE(frames[0].codec_specific_info.template_structure); + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 2); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode)); + for (const auto& frame : frames) { + EXPECT_EQ(frame.encoded_image._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_FALSE(frame.codec_specific_info.template_structure); + ASSERT_TRUE(frame.codec_specific_info.generic_frame_info); + EXPECT_EQ(frame.codec_specific_info.generic_frame_info->spatial_id, 2); + } + + // Enable the second layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 1, tl_idx, codec_settings.spatialLayers[0].targetBitrate * 1000 * 2); + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * 2)); + EXPECT_EQ(frames[0].encoded_image._frameType, VideoFrameType::kVideoFrameKey); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 1); + for (size_t i = 1; i < frames.size(); ++i) { + EXPECT_EQ(frames[i].encoded_image._frameType, + VideoFrameType::kVideoFrameDelta); + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + ASSERT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[i].codec_specific_info.generic_frame_info->spatial_id, + 1 + static_cast<int>(i % 2)); + } + + // Enable the first layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + 0, tl_idx, codec_settings.spatialLayers[1].targetBitrate * 1000 * 2); + } + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + + frames = producer.SetNumInputFrames(num_frames_to_encode).Encode(); + ASSERT_THAT(frames, SizeIs(num_frames_to_encode * 3)); + EXPECT_TRUE(frames[0].codec_specific_info.template_structure); + ASSERT_TRUE(frames[0].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[0].codec_specific_info.generic_frame_info->spatial_id, 0); + for (size_t i = 1; i < frames.size(); ++i) { + EXPECT_FALSE(frames[i].codec_specific_info.template_structure); + ASSERT_TRUE(frames[i].codec_specific_info.generic_frame_info); + EXPECT_EQ(frames[i].codec_specific_info.generic_frame_info->spatial_id, + static_cast<int>(i % 3)); + } +} + +TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrameForScreenshare) { + // Configure encoder to produce N spatial layers. Encode frames for all + // layers. Then disable all but the last layer. Then reenable all back again. + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + } + + // Disable all but top layer. + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + bitrate_allocation.SetBitrate(sl_idx, 0, 0); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // SS available immediatly after switching off. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // No key-frames generated for disabling layers. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2); + } + + // Force key-frame. + std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey}; + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), &frame_types)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + // Key-frame should be produced. + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey); + + // Enable the second layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 2u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1); + EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2); + } + + // Enable the first layer back. + // Allocate high bit rate to avoid frame dropping due to rate control. + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(encoded_frame.size(), 3u); + // SS available immediatly after switching on. + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); + // Keyframe should be generated when enabling lower layers. + const VideoFrameType expected_type = frame_num == 0 + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + EXPECT_EQ(encoded_frame[0]._frameType, expected_type); + } +} + +TEST_F(TestVp9Impl, EndOfPicture) { + const size_t num_spatial_layers = 2; + ConfigureSvc(codec_settings_, num_spatial_layers); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Encode both base and upper layers. Check that end-of-superframe flag is + // set on upper layer frame but not on base layer frame. + VideoBitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + + std::vector<EncodedImage> frames; + std::vector<CodecSpecificInfo> codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + EXPECT_FALSE(codec_specific[0].end_of_picture); + EXPECT_TRUE(codec_specific[1].end_of_picture); + + // Encode only base layer. Check that end-of-superframe flag is + // set on base layer frame. + bitrate_allocation.SetBitrate(1, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + EXPECT_FALSE(frames[0].SpatialIndex()); + EXPECT_TRUE(codec_specific[0].end_of_picture); +} + +TEST_F(TestVp9Impl, InterLayerPred) { + const size_t num_spatial_layers = 2; + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + + VideoBitrateAllocation bitrate_allocation; + for (size_t i = 0; i < num_spatial_layers; ++i) { + bitrate_allocation.SetBitrate( + i, 0, codec_settings_.spatialLayers[i].targetBitrate * 1000); + } + + const std::vector<InterLayerPredMode> inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + + std::vector<EncodedImage> frames; + std::vector<CodecSpecificInfo> codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + + // Key frame. + ASSERT_EQ(frames[0].SpatialIndex(), 0); + ASSERT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred, + inter_layer_pred == InterLayerPredMode::kOff); + EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.ss_data_available); + + ASSERT_EQ(frames[1].SpatialIndex(), 1); + ASSERT_FALSE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted); + EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted, + inter_layer_pred == InterLayerPredMode::kOn || + inter_layer_pred == InterLayerPredMode::kOnKeyPic); + EXPECT_EQ(codec_specific[1].codecSpecific.VP9.ss_data_available, + inter_layer_pred == InterLayerPredMode::kOff); + EXPECT_TRUE( + codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred); + + // Delta frame. + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + + ASSERT_EQ(frames[0].SpatialIndex(), 0); + ASSERT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred, + inter_layer_pred != InterLayerPredMode::kOn); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.ss_data_available); + + ASSERT_EQ(frames[1].SpatialIndex(), 1); + ASSERT_TRUE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted); + EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted, + inter_layer_pred == InterLayerPredMode::kOn); + EXPECT_TRUE( + codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred); + EXPECT_FALSE(codec_specific[1].codecSpecific.VP9.ss_data_available); + } +} + +TEST_F(TestVp9Impl, + EnablingUpperLayerTriggersKeyFrameIfInterLayerPredIsDisabled) { + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + + const std::vector<InterLayerPredMode> inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; + ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + + const bool is_first_upper_layer_frame = (sl_idx > 0 && frame_num == 0); + if (is_first_upper_layer_frame) { + if (inter_layer_pred == InterLayerPredMode::kOn) { + EXPECT_EQ(encoded_frame[0]._frameType, + VideoFrameType::kVideoFrameDelta); + } else { + EXPECT_EQ(encoded_frame[0]._frameType, + VideoFrameType::kVideoFrameKey); + } + } else if (sl_idx == 0 && frame_num == 0) { + EXPECT_EQ(encoded_frame[0]._frameType, + VideoFrameType::kVideoFrameKey); + } else { + for (size_t i = 0; i <= sl_idx; ++i) { + EXPECT_EQ(encoded_frame[i]._frameType, + VideoFrameType::kVideoFrameDelta); + } + } + } + } + } +} + +TEST_F(TestVp9Impl, + EnablingUpperLayerUnsetsInterPicPredictedInInterlayerPredModeOn) { + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + + const std::vector<InterLayerPredMode> inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; + ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + + ASSERT_EQ(codec_specific_info.size(), sl_idx + 1); + + for (size_t i = 0; i <= sl_idx; ++i) { + const bool is_keyframe = + encoded_frame[0]._frameType == VideoFrameType::kVideoFrameKey; + const bool is_first_upper_layer_frame = + (i == sl_idx && frame_num == 0); + // Interframe references are there, unless it's a keyframe, + // or it's a first activated frame in a upper layer + const bool expect_no_references = + is_keyframe || (is_first_upper_layer_frame && + inter_layer_pred == InterLayerPredMode::kOn); + EXPECT_EQ( + codec_specific_info[i].codecSpecific.VP9.inter_pic_predicted, + !expect_no_references); + } + } + } + } +} + +TEST_F(TestVp9Impl, EnablingDisablingUpperLayerInTheSameGof) { + const size_t num_spatial_layers = 2; + const size_t num_temporal_layers = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + + // Enable both spatial and both temporal layers. + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + + // Encode 3 frames. + for (int i = 0; i < 3; ++i) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + } + + // Disable SL1 layer. + bitrate_allocation.SetBitrate(1, 0, 0); + bitrate_allocation.SetBitrate(1, 1, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 1 frame. + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 1u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true); + + // Enable SL1 layer. + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 1 frame. + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true); + EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted, true); +} + +TEST_F(TestVp9Impl, EnablingDisablingUpperLayerAccrossGof) { + const size_t num_spatial_layers = 2; + const size_t num_temporal_layers = 2; + + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = false; + + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + + // Enable both spatial and both temporal layers. + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 0, 1, codec_settings_.spatialLayers[0].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frame; + std::vector<CodecSpecificInfo> codec_specific_info; + + // Encode 3 frames. + for (int i = 0; i < 3; ++i) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + } + + // Disable SL1 layer. + bitrate_allocation.SetBitrate(1, 0, 0); + bitrate_allocation.SetBitrate(1, 1, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 11 frames. More than Gof length 2, and odd to end at TL1 frame. + for (int i = 0; i < 11; ++i) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 1u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1 - i % 2); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, + true); + } + + // Enable SL1 layer. + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + bitrate_allocation.SetBitrate( + 1, 1, codec_settings_.spatialLayers[1].targetBitrate * 1000 / 2); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode 1 frame. + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + ASSERT_EQ(codec_specific_info.size(), 2u); + EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.inter_pic_predicted, true); + EXPECT_EQ(codec_specific_info[1].codecSpecific.VP9.inter_pic_predicted, + false); +} + +TEST_F(TestVp9Impl, EnablingNewLayerInScreenshareForcesAllLayersWithSS) { + const size_t num_spatial_layers = 3; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + const size_t num_frames_to_encode_before_drop = 1; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.spatialLayers[0].maxFramerate = 5.0; + // use 30 for the SL 1 instead of 10, so even if SL 0 frame is dropped due to + // framerate capping we would still get back at least a middle layer. It + // simplifies the test. + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Enable all but the last layer. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode enough frames to force drop due to framerate capping. + for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + // Enable the last layer. + bitrate_allocation.SetBitrate( + num_spatial_layers - 1, 0, + codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate * + 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // All layers are encoded, even though frame dropping should happen. + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + // Now all 3 layers should be encoded. + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(encoded_frames.size(), 3u); + // Scalability structure has to be triggered. + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, ScreenshareFrameDropping) { + const int num_spatial_layers = 3; + const int num_frames_to_detect_drops = 2; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(codec_settings_, num_spatial_layers); + // use 30 for the SL0 and SL1 because it simplifies the test. + codec_settings_.spatialLayers[0].maxFramerate = 30.0; + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.SetFrameDropEnabled(true); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Enable all but the last layer. + VideoBitrateAllocation bitrate_allocation; + // Very low bitrate for the lowest spatial layer to ensure rate-control drops. + bitrate_allocation.SetBitrate(0, 0, 1000); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000); + // Disable highest layer. + bitrate_allocation.SetBitrate(2, 0, 0); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + bool frame_dropped = false; + // Encode enough frames to force drop due to rate-control. + for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops; + ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_LE(encoded_frames.size(), 2u); + EXPECT_GE(encoded_frames.size(), 1u); + if (encoded_frames.size() == 1) { + frame_dropped = true; + // Dropped frame is on the SL0. + EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1); + } + } + EXPECT_TRUE(frame_dropped); + + // Enable the last layer. + bitrate_allocation.SetBitrate( + 2, 0, codec_settings_.spatialLayers[2].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // No drop allowed. + EXPECT_EQ(encoded_frames.size(), 3u); + + // Verify that frame-dropping is re-enabled back. + frame_dropped = false; + // Encode enough frames to force drop due to rate-control. + for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops; + ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_LE(encoded_frames.size(), 3u); + EXPECT_GE(encoded_frames.size(), 2u); + if (encoded_frames.size() == 2) { + frame_dropped = true; + // Dropped frame is on the SL0. + EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1); + EXPECT_EQ(encoded_frames[1].SpatialIndex(), 2); + } + } + EXPECT_TRUE(frame_dropped); +} + +TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) { + const size_t num_spatial_layers = 3; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + const size_t num_frames_to_encode_before_drop = 1; + // Chosen by hand, exactly 5 frames are dropped for input fps=30 and max + // framerate = 5. + const size_t num_dropped_frames = 5; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(codec_settings_, num_spatial_layers); + codec_settings_.spatialLayers[0].maxFramerate = 5.0; + // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to + // framerate capping we would still get back at least a middle layer. It + // simplifies the test. + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // All layers are enabled from the start. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Encode enough frames to force drop due to framerate capping. + for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + // Now the first layer should not have frames in it. + for (size_t frame_num = 0; frame_num < num_dropped_frames - 2; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is skipped. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1); + } + + // Disable the last layer. + bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Still expected to drop first layer. Last layer has to be disable also. + for (size_t frame_num = num_dropped_frames - 2; + frame_num < num_dropped_frames; ++frame_num) { + // Expect back one frame. + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is skipped. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1); + // No SS data on non-base spatial layer. + EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + } + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is not skipped now. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 0); + // SS data should be present. + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, DisableNewLayerInVideoDelaysSsInfoTillTL0) { + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 2; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + ConfigureSvc(codec_settings_, num_spatial_layers, num_temporal_layers); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.mode = VideoCodecMode::kRealtimeVideo; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings_.VP9()->flexibleMode = false; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Enable all the layers. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 / + num_temporal_layers); + } + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific_info; + + // Encode one TL0 frame + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u); + + // Disable the last layer. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(num_spatial_layers - 1, tl_idx, 0); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + // Next is TL1 frame. The last layer is disabled immediately, but SS structure + // is not provided here. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u); + EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + + // Next is TL0 frame, which should have delayed SS structure. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u); + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + EXPECT_TRUE(codec_specific_info[0] + .codecSpecific.VP9.spatial_layer_resolution_present); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.num_spatial_layers, + num_spatial_layers - 1); +} + +TEST_F(TestVp9Impl, + LowLayerMarkedAsRefIfHighLayerNotEncodedAndInterLayerPredIsEnabled) { + ConfigureSvc(codec_settings_, 3); + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_info)); + EXPECT_TRUE(codec_info.codecSpecific.VP9.ss_data_available); + EXPECT_FALSE(codec_info.codecSpecific.VP9.non_ref_for_inter_layer_pred); +} + +TEST_F(TestVp9Impl, ScalabilityStructureIsAvailableInFlexibleMode) { + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + EXPECT_TRUE(codec_specific_info.codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, Profile0PreferredPixelFormats) { + EXPECT_THAT(encoder_->GetEncoderInfo().preferred_pixel_formats, + testing::UnorderedElementsAre(VideoFrameBuffer::Type::kNV12, + VideoFrameBuffer::Type::kI420)); +} + +TEST_F(TestVp9Impl, EncoderInfoWithoutResolutionBitrateLimits) { + EXPECT_TRUE(encoder_->GetEncoderInfo().resolution_bitrate_limits.empty()); +} + +TEST_F(TestVp9Impl, EncoderInfoWithBitrateLimitsFromFieldTrial) { + test::ScopedFieldTrials field_trials( + "WebRTC-VP9-GetEncoderInfoOverride/" + "frame_size_pixels:123|456|789," + "min_start_bitrate_bps:11000|22000|33000," + "min_bitrate_bps:44000|55000|66000," + "max_bitrate_bps:77000|88000|99000/"); + SetUp(); + + EXPECT_THAT( + encoder_->GetEncoderInfo().resolution_bitrate_limits, + ::testing::ElementsAre( + VideoEncoder::ResolutionBitrateLimits{123, 11000, 44000, 77000}, + VideoEncoder::ResolutionBitrateLimits{456, 22000, 55000, 88000}, + VideoEncoder::ResolutionBitrateLimits{789, 33000, 66000, 99000})); +} + +TEST_F(TestVp9Impl, EncoderInfoFpsAllocation) { + const uint8_t kNumSpatialLayers = 3; + const uint8_t kNumTemporalLayers = 3; + + codec_settings_.maxFramerate = 30; + codec_settings_.VP9()->numberOfSpatialLayers = kNumSpatialLayers; + codec_settings_.VP9()->numberOfTemporalLayers = kNumTemporalLayers; + + for (uint8_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) { + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].active = true; + codec_settings_.spatialLayers[sl_idx].maxFramerate = + codec_settings_.maxFramerate; + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 4); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction); + expected_fps_allocation[1] = expected_fps_allocation[0]; + expected_fps_allocation[2] = expected_fps_allocation[0]; + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ElementsAreArray(expected_fps_allocation)); +} + +TEST_F(TestVp9Impl, EncoderInfoFpsAllocationFlexibleMode) { + const uint8_t kNumSpatialLayers = 3; + + codec_settings_.maxFramerate = 30; + codec_settings_.VP9()->numberOfSpatialLayers = kNumSpatialLayers; + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->flexibleMode = true; + + VideoEncoder::RateControlParameters rate_params; + for (uint8_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) { + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].active = true; + // Force different frame rates for different layers, to verify that total + // fraction is correct. + codec_settings_.spatialLayers[sl_idx].maxFramerate = + codec_settings_.maxFramerate / (kNumSpatialLayers - sl_idx); + rate_params.bitrate.SetBitrate(sl_idx, 0, + codec_settings_.startBitrate * 1000); + } + rate_params.bandwidth_allocation = + DataRate::BitsPerSec(rate_params.bitrate.get_sum_bps()); + rate_params.framerate_fps = codec_settings_.maxFramerate; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // No temporal layers allowed when spatial layers have different fps targets. + FramerateFractions expected_fps_allocation[kMaxSpatialLayers]; + expected_fps_allocation[0].push_back(EncoderInfo::kMaxFramerateFraction / 3); + expected_fps_allocation[1].push_back(EncoderInfo::kMaxFramerateFraction / 2); + expected_fps_allocation[2].push_back(EncoderInfo::kMaxFramerateFraction); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); + + // SetRates with current fps does not alter outcome. + encoder_->SetRates(rate_params); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); + + // Higher fps than the codec wants, should still not affect outcome. + rate_params.framerate_fps *= 2; + encoder_->SetRates(rate_params); + EXPECT_THAT(encoder_->GetEncoderInfo().fps_allocation, + ::testing::ElementsAreArray(expected_fps_allocation)); +} + +class Vp9ImplWithLayeringTest + : public ::testing::TestWithParam<std::tuple<int, int, bool>> { + protected: + Vp9ImplWithLayeringTest() + : num_spatial_layers_(std::get<0>(GetParam())), + num_temporal_layers_(std::get<1>(GetParam())), + override_field_trials_(std::get<2>(GetParam()) + ? "WebRTC-Vp9ExternalRefCtrl/Enabled/" + : "") {} + + const uint8_t num_spatial_layers_; + const uint8_t num_temporal_layers_; + const test::ScopedFieldTrials override_field_trials_; +}; + +TEST_P(Vp9ImplWithLayeringTest, FlexibleMode) { + // In flexible mode encoder wrapper obtains actual list of references from + // encoder and writes it into RTP payload descriptor. Check that reference + // list in payload descriptor matches the predefined one, which is used + // in non-flexible mode. + std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.VP9()->flexibleMode = true; + codec_settings.SetFrameDropEnabled(false); + codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers_; + codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers_; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + GofInfoVP9 gof; + if (num_temporal_layers_ == 1) { + gof.SetGofInfoVP9(kTemporalStructureMode1); + } else if (num_temporal_layers_ == 2) { + gof.SetGofInfoVP9(kTemporalStructureMode2); + } else if (num_temporal_layers_ == 3) { + gof.SetGofInfoVP9(kTemporalStructureMode3); + } + + // Encode at least (num_frames_in_gof + 1) frames to verify references + // of non-key frame with gof_idx = 0. + int num_input_frames = gof.num_frames_in_gof + 1; + std::vector<EncodedVideoFrameProducer::EncodedFrame> frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(num_input_frames) + .SetResolution({kWidth, kHeight}) + .Encode(); + ASSERT_THAT(frames, SizeIs(num_input_frames * num_spatial_layers_)); + + for (size_t i = 0; i < frames.size(); ++i) { + const EncodedVideoFrameProducer::EncodedFrame& frame = frames[i]; + const size_t picture_idx = i / num_spatial_layers_; + const size_t gof_idx = picture_idx % gof.num_frames_in_gof; + + const CodecSpecificInfoVP9& vp9 = + frame.codec_specific_info.codecSpecific.VP9; + EXPECT_EQ(frame.encoded_image.SpatialIndex(), + num_spatial_layers_ == 1 + ? absl::nullopt + : absl::optional<int>(i % num_spatial_layers_)) + << "Frame " << i; + EXPECT_EQ(vp9.temporal_idx, num_temporal_layers_ == 1 + ? kNoTemporalIdx + : gof.temporal_idx[gof_idx]) + << "Frame " << i; + EXPECT_EQ(vp9.temporal_up_switch, gof.temporal_up_switch[gof_idx]) + << "Frame " << i; + if (picture_idx == 0) { + EXPECT_EQ(vp9.num_ref_pics, 0) << "Frame " << i; + } else { + EXPECT_THAT(rtc::MakeArrayView(vp9.p_diff, vp9.num_ref_pics), + UnorderedElementsAreArray(gof.pid_diff[gof_idx], + gof.num_ref_pics[gof_idx])) + << "Frame " << i; + } + } +} + +INSTANTIATE_TEST_SUITE_P(All, + Vp9ImplWithLayeringTest, + ::testing::Combine(::testing::Values(1, 2, 3), + ::testing::Values(1, 2, 3), + ::testing::Bool())); + +class TestVp9ImplFrameDropping : public TestVp9Impl { + protected: + void ModifyCodecSettings(VideoCodec* codec_settings) override { + webrtc::test::CodecSettings(kVideoCodecVP9, codec_settings); + // We need to encode quite a lot of frames in this test. Use low resolution + // to reduce execution time. + codec_settings->width = 64; + codec_settings->height = 64; + codec_settings->mode = VideoCodecMode::kScreensharing; + } +}; + +TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) { + const size_t num_frames_to_encode = 100; + const float input_framerate_fps = 30.0; + const float video_duration_secs = num_frames_to_encode / input_framerate_fps; + const float expected_framerate_fps = 5.0f; + const float max_abs_framerate_error_fps = expected_framerate_fps * 0.1f; + + codec_settings_.maxFramerate = static_cast<uint32_t>(expected_framerate_fps); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoFrame input_frame = NextInputFrame(); + for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + const size_t timestamp = input_frame.timestamp() + + kVideoPayloadTypeFrequency / input_framerate_fps; + input_frame.set_timestamp(static_cast<uint32_t>(timestamp)); + } + + const size_t num_encoded_frames = GetNumEncodedFrames(); + const float encoded_framerate_fps = num_encoded_frames / video_duration_secs; + EXPECT_NEAR(encoded_framerate_fps, expected_framerate_fps, + max_abs_framerate_error_fps); +} + +TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) { + // Assign different frame rate to spatial layers and check that result frame + // rate is close to the assigned one. + const uint8_t num_spatial_layers = 3; + const float input_framerate_fps = 30.0; + const size_t video_duration_secs = 3; + const size_t num_input_frames = video_duration_secs * input_framerate_fps; + + codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->flexibleMode = true; + + VideoBitrateAllocation bitrate_allocation; + for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Frame rate increases from low to high layer. + const uint32_t framerate_fps = 10 * (sl_idx + 1); + + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].maxFramerate = framerate_fps; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].active = true; + + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + VideoFrame input_frame = NextInputFrame(); + for (size_t frame_num = 0; frame_num < num_input_frames; ++frame_num) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + const size_t timestamp = input_frame.timestamp() + + kVideoPayloadTypeFrequency / input_framerate_fps; + input_frame.set_timestamp(static_cast<uint32_t>(timestamp)); + } + + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_infos; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_infos)); + + std::vector<size_t> num_encoded_frames(num_spatial_layers, 0); + for (EncodedImage& encoded_frame : encoded_frames) { + ++num_encoded_frames[encoded_frame.SpatialIndex().value_or(0)]; + } + + for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + const float layer_target_framerate_fps = + codec_settings_.spatialLayers[sl_idx].maxFramerate; + const float layer_output_framerate_fps = + static_cast<float>(num_encoded_frames[sl_idx]) / video_duration_secs; + const float max_framerate_error_fps = layer_target_framerate_fps * 0.1f; + EXPECT_NEAR(layer_output_framerate_fps, layer_target_framerate_fps, + max_framerate_error_fps); + } +} + +class TestVp9ImplProfile2 : public TestVp9Impl { + protected: + void SetUp() override { + // Profile 2 might not be available on some platforms until + // https://bugs.chromium.org/p/webm/issues/detail?id=1544 is solved. + bool profile_2_is_supported = false; + for (const auto& codec : SupportedVP9Codecs()) { + if (ParseSdpForVP9Profile(codec.parameters) + .value_or(VP9Profile::kProfile0) == VP9Profile::kProfile2) { + profile_2_is_supported = true; + } + } + if (!profile_2_is_supported) + return; + + TestVp9Impl::SetUp(); + input_frame_generator_ = test::CreateSquareFrameGenerator( + codec_settings_.width, codec_settings_.height, + test::FrameGeneratorInterface::OutputType::kI010, + absl::optional<int>()); + } + + std::unique_ptr<VideoEncoder> CreateEncoder() override { + cricket::VideoCodec profile2_codec; + profile2_codec.SetParam(kVP9FmtpProfileId, + VP9ProfileToString(VP9Profile::kProfile2)); + return VP9Encoder::Create(profile2_codec); + } + + std::unique_ptr<VideoDecoder> CreateDecoder() override { + return VP9Decoder::Create(); + } +}; + +TEST_F(TestVp9ImplProfile2, EncodeDecode) { + if (!encoder_) + return; + + VideoFrame input_frame = NextInputFrame(); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + // First frame should be a key frame. + encoded_frame._frameType = VideoFrameType::kVideoFrameKey; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0)); + std::unique_ptr<VideoFrame> decoded_frame; + absl::optional<uint8_t> decoded_qp; + ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp)); + ASSERT_TRUE(decoded_frame); + + // TODO(emircan): Add PSNR for different color depths. + EXPECT_GT(I420PSNR(*input_frame.video_frame_buffer()->ToI420(), + *decoded_frame->video_frame_buffer()->ToI420()), + 31); +} + +TEST_F(TestVp9Impl, EncodeWithDynamicRate) { + // Configured dynamic rate field trial and re-create the encoder. + test::ScopedFieldTrials field_trials( + "WebRTC-VideoRateControl/vp9_dynamic_rate:true/"); + SetUp(); + + // Set 300kbps target with 100% headroom. + VideoEncoder::RateControlParameters params; + params.bandwidth_allocation = DataRate::BitsPerSec(300000); + params.bitrate.SetBitrate(0, 0, params.bandwidth_allocation.bps()); + params.framerate_fps = 30.0; + + encoder_->SetRates(params); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); + + // Set no headroom and encode again. + params.bandwidth_allocation = DataRate::Zero(); + encoder_->SetRates(params); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); +} + +TEST_F(TestVp9Impl, ReenablingUpperLayerAfterKFWithInterlayerPredIsEnabled) { + const size_t num_spatial_layers = 2; + const int num_frames_to_encode = 10; + codec_settings_.VP9()->flexibleMode = true; + codec_settings_.SetFrameDropEnabled(false); + codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + // Force low frame-rate, so all layers are present for all frames. + codec_settings_.maxFramerate = 5; + + ConfigureSvc(codec_settings_, num_spatial_layers); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + std::vector<EncodedImage> encoded_frames; + std::vector<CodecSpecificInfo> codec_specific; + + for (int i = 0; i < num_frames_to_encode; ++i) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers); + } + + // Disable the last layer. + bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + for (int i = 0; i < num_frames_to_encode; ++i) { + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1); + } + + std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey}; + + // Force a key-frame with the last layer still disabled. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(NextInputFrame(), &frame_types)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1); + ASSERT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameKey); + + // Re-enable the last layer. + bitrate_allocation.SetBitrate( + num_spatial_layers - 1, 0, + codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate * + 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific)); + EXPECT_EQ(encoded_frames.size(), num_spatial_layers); + EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta); +} + +TEST_F(TestVp9Impl, HandlesEmptyDecoderConfigure) { + std::unique_ptr<VideoDecoder> decoder = CreateDecoder(); + // Check that default settings are ok for decoder. + EXPECT_TRUE(decoder->Configure({})); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release()); +} + +INSTANTIATE_TEST_SUITE_P( + TestVp9ImplForPixelFormat, + TestVp9ImplForPixelFormat, + ::testing::Values(test::FrameGeneratorInterface::OutputType::kI420, + test::FrameGeneratorInterface::OutputType::kNV12), + [](const auto& info) { + return test::FrameGeneratorInterface::OutputTypeToString(info.param); + }); + +// Helper function to populate an vpx_image_t instance with dimensions and +// potential image data. +std::function<vpx_image_t*(vpx_image_t*, + vpx_img_fmt_t, + unsigned int, + unsigned int, + unsigned int, + unsigned char* img_data)> +GetWrapImageFunction(vpx_image_t* img) { + return [img](vpx_image_t* /*img*/, vpx_img_fmt_t fmt, unsigned int d_w, + unsigned int d_h, unsigned int /*stride_align*/, + unsigned char* img_data) { + img->fmt = fmt; + img->d_w = d_w; + img->d_h = d_h; + img->img_data = img_data; + return img; + }; +} + +TEST(Vp9SpeedSettingsTrialsTest, NoSvcUsesGlobalSpeedFromTl0InLayerConfig) { + // TL0 speed 8 at >= 480x270, 5 if below that. + test::ExplicitKeyValueConfig trials( + "WebRTC-VP9-PerformanceFlags/" + "use_per_layer_speed," + "min_pixel_count:0|129600," + "base_layer_speed:4|8," + "high_layer_speed:5|9," + "deblock_mode:1|0/"); + + // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise + // passed on to LibvpxVp9Encoder. + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + absl::WrapUnique<LibvpxInterface>(vpx), trials); + + VideoCodec settings = DefaultCodecSettings(); + settings.width = 480; + settings.height = 270; + vpx_image_t img; + + ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img)); + ON_CALL(*vpx, codec_enc_config_default) + .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) { + memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t)); + }), + Return(VPX_CODEC_OK))); + EXPECT_CALL(*vpx, codec_control(_, _, An<int>())).Times(AnyNumber()); + + EXPECT_CALL(*vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS, + A<vpx_svc_extra_cfg_t*>())) + .Times(0); + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(8))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); + + encoder.Release(); + settings.width = 352; + settings.height = 216; + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(4))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); +} + +TEST(Vp9SpeedSettingsTrialsTest, + NoPerLayerFlagUsesGlobalSpeedFromTopLayerInConfig) { + // TL0 speed 8 at >= 480x270, 5 if below that. + test::ExplicitKeyValueConfig trials( + "WebRTC-VP9-PerformanceFlags/" + "min_pixel_count:0|129600," + "base_layer_speed:4|8," + "high_layer_speed:5|9," + "deblock_mode:1|0/"); + + // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise + // passed on to LibvpxVp9Encoder. + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + absl::WrapUnique<LibvpxInterface>(vpx), trials); + + VideoCodec settings = DefaultCodecSettings(); + settings.width = 480; + settings.height = 270; + ConfigureSvc(settings, 2, 3); + vpx_image_t img; + + ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img)); + ON_CALL(*vpx, codec_enc_config_default) + .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) { + memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t)); + }), + Return(VPX_CODEC_OK))); + EXPECT_CALL(*vpx, codec_control(_, _, An<int>())).Times(AnyNumber()); + + // Speed settings not populated when 'use_per_layer_speed' flag is absent. + EXPECT_CALL(*vpx, + codec_control( + _, VP9E_SET_SVC_PARAMETERS, + SafeMatcherCast<vpx_svc_extra_cfg_t*>(AllOf( + Field(&vpx_svc_extra_cfg_t::speed_per_layer, Each(0)), + Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, Each(0)))))) + .Times(2); + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(8))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); + + encoder.Release(); + settings.width = 476; + settings.height = 268; + settings.spatialLayers[0].width = settings.width / 2; + settings.spatialLayers[0].height = settings.height / 2; + settings.spatialLayers[1].width = settings.width; + settings.spatialLayers[1].height = settings.height; + + EXPECT_CALL(*vpx, codec_control(_, VP8E_SET_CPUUSED, TypedEq<int>(4))); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); +} + +TEST(Vp9SpeedSettingsTrialsTest, DefaultPerLayerFlagsWithSvc) { + // Per-temporal and spatial layer speed settings: + // SL0: TL0 = speed 5, TL1/TL2 = speed 8. + // SL1/2: TL0 = speed 7, TL1/TL2 = speed 8. + // Deblocking-mode per spatial layer: + // SL0: mode 1, SL1/2: mode 0. + test::ExplicitKeyValueConfig trials( + "WebRTC-VP9-PerformanceFlags/" + "use_per_layer_speed," + "min_pixel_count:0|129600," + "base_layer_speed:5|7," + "high_layer_speed:8|8," + "deblock_mode:1|0/"); + + // Keep a raw pointer for EXPECT calls and the like. Ownership is otherwise + // passed on to LibvpxVp9Encoder. + auto* const vpx = new NiceMock<MockLibvpxInterface>(); + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + absl::WrapUnique<LibvpxInterface>(vpx), trials); + + VideoCodec settings = DefaultCodecSettings(); + constexpr int kNumSpatialLayers = 3; + constexpr int kNumTemporalLayers = 3; + ConfigureSvc(settings, kNumSpatialLayers, kNumTemporalLayers); + VideoBitrateAllocation bitrate_allocation; + for (int si = 0; si < kNumSpatialLayers; ++si) { + for (int ti = 0; ti < kNumTemporalLayers; ++ti) { + uint32_t bitrate_bps = + settings.spatialLayers[si].targetBitrate * 1'000 / kNumTemporalLayers; + bitrate_allocation.SetBitrate(si, ti, bitrate_bps); + } + } + vpx_image_t img; + + // Speed settings per spatial layer, for TL0. + const int kBaseTlSpeed[VPX_MAX_LAYERS] = {5, 7, 7}; + // Speed settings per spatial layer, for TL1, TL2. + const int kHighTlSpeed[VPX_MAX_LAYERS] = {8, 8, 8}; + // Loopfilter settings are handled within libvpx, so this array is valid for + // both TL0 and higher. + const int kLoopFilter[VPX_MAX_LAYERS] = {1, 0, 0}; + + ON_CALL(*vpx, img_wrap).WillByDefault(GetWrapImageFunction(&img)); + ON_CALL(*vpx, codec_enc_init) + .WillByDefault(WithArg<0>([](vpx_codec_ctx_t* ctx) { + memset(ctx, 0, sizeof(*ctx)); + return VPX_CODEC_OK; + })); + ON_CALL(*vpx, codec_enc_config_default) + .WillByDefault(DoAll(WithArg<1>([](vpx_codec_enc_cfg_t* cfg) { + memset(cfg, 0, sizeof(vpx_codec_enc_cfg_t)); + }), + Return(VPX_CODEC_OK))); + EXPECT_CALL( + *vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS, + SafeMatcherCast<vpx_svc_extra_cfg_t*>( + AllOf(Field(&vpx_svc_extra_cfg_t::speed_per_layer, + ElementsAreArray(kBaseTlSpeed)), + Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, + ElementsAreArray(kLoopFilter)))))); + + // Capture the callback into the vp9 wrapper. + vpx_codec_priv_output_cx_pkt_cb_pair_t callback_pointer = {}; + EXPECT_CALL(*vpx, codec_control(_, VP9E_REGISTER_CX_CALLBACK, A<void*>())) + .WillOnce(WithArg<2>([&](void* cbp) { + callback_pointer = + *reinterpret_cast<vpx_codec_priv_output_cx_pkt_cb_pair_t*>(cbp); + return VPX_CODEC_OK; + })); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder.InitEncode(&settings, kSettings)); + + encoder.SetRates(VideoEncoder::RateControlParameters(bitrate_allocation, + settings.maxFramerate)); + + MockEncodedImageCallback callback; + encoder.RegisterEncodeCompleteCallback(&callback); + auto frame_generator = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, 10); + Mock::VerifyAndClearExpectations(vpx); + + uint8_t data[1] = {0}; + vpx_codec_cx_pkt encoded_data = {}; + encoded_data.data.frame.buf = &data; + encoded_data.data.frame.sz = 1; + + const auto kImageOk = + EncodedImageCallback::Result(EncodedImageCallback::Result::OK); + + int spatial_id = 0; + int temporal_id = 0; + EXPECT_CALL(*vpx, + codec_control(_, VP9E_SET_SVC_LAYER_ID, A<vpx_svc_layer_id_t*>())) + .Times(AnyNumber()); + EXPECT_CALL(*vpx, + codec_control(_, VP9E_GET_SVC_LAYER_ID, A<vpx_svc_layer_id_t*>())) + .WillRepeatedly(WithArg<2>([&](vpx_svc_layer_id_t* layer_id) { + layer_id->spatial_layer_id = spatial_id; + layer_id->temporal_layer_id = temporal_id; + return VPX_CODEC_OK; + })); + vpx_svc_ref_frame_config_t stored_refs = {}; + ON_CALL(*vpx, codec_control(_, VP9E_SET_SVC_REF_FRAME_CONFIG, + A<vpx_svc_ref_frame_config_t*>())) + .WillByDefault( + DoAll(SaveArgPointee<2>(&stored_refs), Return(VPX_CODEC_OK))); + ON_CALL(*vpx, codec_control(_, VP9E_GET_SVC_REF_FRAME_CONFIG, + A<vpx_svc_ref_frame_config_t*>())) + .WillByDefault( + DoAll(SetArgPointee<2>(ByRef(stored_refs)), Return(VPX_CODEC_OK))); + + // First frame is keyframe. + encoded_data.data.frame.flags = VPX_FRAME_IS_KEY; + + // Default 3-layer temporal pattern: 0-2-1-2, then repeat and do two more. + for (int ti : {0, 2, 1, 2, 0, 2}) { + EXPECT_CALL(*vpx, codec_encode).WillOnce(Return(VPX_CODEC_OK)); + // No update expected if flags haven't changed, and they change we we move + // between base temporal layer and non-base temporal layer. + if ((ti > 0) != (temporal_id > 0)) { + EXPECT_CALL(*vpx, codec_control( + _, VP9E_SET_SVC_PARAMETERS, + SafeMatcherCast<vpx_svc_extra_cfg_t*>(AllOf( + Field(&vpx_svc_extra_cfg_t::speed_per_layer, + ElementsAreArray(ti == 0 ? kBaseTlSpeed + : kHighTlSpeed)), + Field(&vpx_svc_extra_cfg_t::loopfilter_ctrl, + ElementsAreArray(kLoopFilter)))))); + } else { + EXPECT_CALL(*vpx, codec_control(_, VP9E_SET_SVC_PARAMETERS, + A<vpx_svc_extra_cfg_t*>())) + .Times(0); + } + + VideoFrame frame = + VideoFrame::Builder() + .set_video_frame_buffer(frame_generator->NextFrame().buffer) + .build(); + encoder.Encode(frame, nullptr); + + temporal_id = ti; + for (int si = 0; si < kNumSpatialLayers; ++si) { + spatial_id = si; + + EXPECT_CALL(callback, OnEncodedImage).WillOnce(Return(kImageOk)); + callback_pointer.output_cx_pkt(&encoded_data, callback_pointer.user_priv); + } + + encoded_data.data.frame.flags = 0; // Following frames are delta frames. + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc new file mode 100644 index 0000000000..222e57b6ba --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/codecs/vp9/include/vp9.h" + +#include <memory> + +#include "absl/container/inlined_vector.h" +#include "api/transport/field_trial_based_config.h" +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/vp9_profile.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "rtc_base/checks.h" +#include "vpx/vp8cx.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_codec.h" + +namespace webrtc { + +std::vector<SdpVideoFormat> SupportedVP9Codecs(bool add_scalability_modes) { +#ifdef RTC_ENABLE_VP9 + // Profile 2 might not be available on some platforms until + // https://bugs.chromium.org/p/webm/issues/detail?id=1544 is solved. + static bool vpx_supports_high_bit_depth = + (vpx_codec_get_caps(vpx_codec_vp9_cx()) & VPX_CODEC_CAP_HIGHBITDEPTH) != + 0 && + (vpx_codec_get_caps(vpx_codec_vp9_dx()) & VPX_CODEC_CAP_HIGHBITDEPTH) != + 0; + + absl::InlinedVector<ScalabilityMode, kScalabilityModeCount> scalability_modes; + if (add_scalability_modes) { + for (const auto scalability_mode : kAllScalabilityModes) { + if (ScalabilityStructureConfig(scalability_mode).has_value()) { + scalability_modes.push_back(scalability_mode); + } + } + } + std::vector<SdpVideoFormat> supported_formats{SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile0)}}, + scalability_modes)}; + if (vpx_supports_high_bit_depth) { + supported_formats.push_back(SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile2)}}, + scalability_modes)); + } + + return supported_formats; +#else + return std::vector<SdpVideoFormat>(); +#endif +} + +std::vector<SdpVideoFormat> SupportedVP9DecoderCodecs() { +#ifdef RTC_ENABLE_VP9 + std::vector<SdpVideoFormat> supported_formats = SupportedVP9Codecs(); + // The WebRTC internal decoder supports VP9 profile 1 and 3. However, there's + // currently no way of sending VP9 profile 1 or 3 using the internal encoder. + // It would require extended support for I444, I422, and I440 buffers. + supported_formats.push_back(SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile1)}})); + supported_formats.push_back(SdpVideoFormat( + cricket::kVp9CodecName, + {{kVP9FmtpProfileId, VP9ProfileToString(VP9Profile::kProfile3)}})); + return supported_formats; +#else + return std::vector<SdpVideoFormat>(); +#endif +} + +std::unique_ptr<VP9Encoder> VP9Encoder::Create() { +#ifdef RTC_ENABLE_VP9 + return std::make_unique<LibvpxVp9Encoder>(cricket::VideoCodec(), + LibvpxInterface::Create(), + FieldTrialBasedConfig()); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +std::unique_ptr<VP9Encoder> VP9Encoder::Create( + const cricket::VideoCodec& codec) { +#ifdef RTC_ENABLE_VP9 + return std::make_unique<LibvpxVp9Encoder>(codec, LibvpxInterface::Create(), + FieldTrialBasedConfig()); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +bool VP9Encoder::SupportsScalabilityMode(ScalabilityMode scalability_mode) { + return ScalabilityStructureConfig(scalability_mode).has_value(); +} + +std::unique_ptr<VP9Decoder> VP9Decoder::Create() { +#ifdef RTC_ENABLE_VP9 + return std::make_unique<LibvpxVp9Decoder>(); +#else + RTC_DCHECK_NOTREACHED(); + return nullptr; +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc new file mode 100644 index 0000000000..181550ce91 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifdef RTC_ENABLE_VP9 + +#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "vpx/vpx_codec.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vpx_frame_buffer.h" + +namespace webrtc { + +uint8_t* Vp9FrameBufferPool::Vp9FrameBuffer::GetData() { + return data_.data<uint8_t>(); +} + +size_t Vp9FrameBufferPool::Vp9FrameBuffer::GetDataSize() const { + return data_.size(); +} + +void Vp9FrameBufferPool::Vp9FrameBuffer::SetSize(size_t size) { + data_.SetSize(size); +} + +bool Vp9FrameBufferPool::InitializeVpxUsePool( + vpx_codec_ctx* vpx_codec_context) { + RTC_DCHECK(vpx_codec_context); + // Tell libvpx to use this pool. + if (vpx_codec_set_frame_buffer_functions( + // In which context to use these callback functions. + vpx_codec_context, + // Called by libvpx when it needs another frame buffer. + &Vp9FrameBufferPool::VpxGetFrameBuffer, + // Called by libvpx when it no longer uses a frame buffer. + &Vp9FrameBufferPool::VpxReleaseFrameBuffer, + // `this` will be passed as `user_priv` to VpxGetFrameBuffer. + this)) { + // Failed to configure libvpx to use Vp9FrameBufferPool. + return false; + } + return true; +} + +rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> +Vp9FrameBufferPool::GetFrameBuffer(size_t min_size) { + RTC_DCHECK_GT(min_size, 0); + rtc::scoped_refptr<Vp9FrameBuffer> available_buffer = nullptr; + { + MutexLock lock(&buffers_lock_); + // Do we have a buffer we can recycle? + for (const auto& buffer : allocated_buffers_) { + if (buffer->HasOneRef()) { + available_buffer = buffer; + break; + } + } + // Otherwise create one. + if (available_buffer == nullptr) { + available_buffer = new Vp9FrameBuffer(); + allocated_buffers_.push_back(available_buffer); + if (allocated_buffers_.size() > max_num_buffers_) { + RTC_LOG(LS_WARNING) + << allocated_buffers_.size() + << " Vp9FrameBuffers have been " + "allocated by a Vp9FrameBufferPool (exceeding what is " + "considered reasonable, " + << max_num_buffers_ << ")."; + + // TODO(phoglund): this limit is being hit in tests since Oct 5 2016. + // See https://bugs.chromium.org/p/webrtc/issues/detail?id=6484. + // RTC_DCHECK_NOTREACHED(); + } + } + } + + available_buffer->SetSize(min_size); + return available_buffer; +} + +int Vp9FrameBufferPool::GetNumBuffersInUse() const { + int num_buffers_in_use = 0; + MutexLock lock(&buffers_lock_); + for (const auto& buffer : allocated_buffers_) { + if (!buffer->HasOneRef()) + ++num_buffers_in_use; + } + return num_buffers_in_use; +} + +bool Vp9FrameBufferPool::Resize(size_t max_number_of_buffers) { + MutexLock lock(&buffers_lock_); + size_t used_buffers_count = 0; + for (const auto& buffer : allocated_buffers_) { + // If the buffer is in use, the ref count will be >= 2, one from the list we + // are looping over and one from the application. If the ref count is 1, + // then the list we are looping over holds the only reference and it's safe + // to reuse. + if (!buffer->HasOneRef()) { + used_buffers_count++; + } + } + if (used_buffers_count > max_number_of_buffers) { + return false; + } + max_num_buffers_ = max_number_of_buffers; + + size_t buffers_to_purge = allocated_buffers_.size() - max_num_buffers_; + auto iter = allocated_buffers_.begin(); + while (iter != allocated_buffers_.end() && buffers_to_purge > 0) { + if ((*iter)->HasOneRef()) { + iter = allocated_buffers_.erase(iter); + buffers_to_purge--; + } else { + ++iter; + } + } + return true; +} + +void Vp9FrameBufferPool::ClearPool() { + MutexLock lock(&buffers_lock_); + allocated_buffers_.clear(); +} + +// static +int32_t Vp9FrameBufferPool::VpxGetFrameBuffer(void* user_priv, + size_t min_size, + vpx_codec_frame_buffer* fb) { + RTC_DCHECK(user_priv); + RTC_DCHECK(fb); + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Limit size of 8k YUV highdef frame + size_t size_limit = 7680 * 4320 * 3 / 2 * 2; + if (min_size > size_limit) + return -1; +#endif + + Vp9FrameBufferPool* pool = static_cast<Vp9FrameBufferPool*>(user_priv); + + rtc::scoped_refptr<Vp9FrameBuffer> buffer = pool->GetFrameBuffer(min_size); + fb->data = buffer->GetData(); + fb->size = buffer->GetDataSize(); + // Store Vp9FrameBuffer* in `priv` for use in VpxReleaseFrameBuffer. + // This also makes vpx_codec_get_frame return images with their `fb_priv` set + // to `buffer` which is important for external reference counting. + // Release from refptr so that the buffer's `ref_count_` remains 1 when + // `buffer` goes out of scope. + fb->priv = static_cast<void*>(buffer.release()); + return 0; +} + +// static +int32_t Vp9FrameBufferPool::VpxReleaseFrameBuffer(void* user_priv, + vpx_codec_frame_buffer* fb) { + RTC_DCHECK(user_priv); + RTC_DCHECK(fb); + Vp9FrameBuffer* buffer = static_cast<Vp9FrameBuffer*>(fb->priv); + if (buffer != nullptr) { + buffer->Release(); + // When libvpx fails to decode and you continue to try to decode (and fail) + // libvpx can for some reason try to release the same buffer multiple times. + // Setting `priv` to null protects against trying to Release multiple times. + fb->priv = nullptr; + } + return 0; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h new file mode 100644 index 0000000000..f46f1b7ea2 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_ +#define MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_ + +#ifdef RTC_ENABLE_VP9 + +#include <vector> + +#include "api/ref_counted_base.h" +#include "api/scoped_refptr.h" +#include "rtc_base/buffer.h" +#include "rtc_base/synchronization/mutex.h" + +struct vpx_codec_ctx; +struct vpx_codec_frame_buffer; + +namespace webrtc { + +// If more buffers than this are allocated we print warnings and crash if in +// debug mode. VP9 is defined to have 8 reference buffers, of which 3 can be +// referenced by any frame, see +// https://tools.ietf.org/html/draft-grange-vp9-bitstream-00#section-2.2.2. +// Assuming VP9 holds on to at most 8 buffers, any more buffers than that +// would have to be by application code. Decoded frames should not be +// referenced for longer than necessary. If we allow ~60 additional buffers +// then the application has ~1 second to e.g. render each frame of a 60 fps +// video. +constexpr size_t kDefaultMaxNumBuffers = 68; + +// This memory pool is used to serve buffers to libvpx for decoding purposes in +// VP9, which is set up in InitializeVPXUsePool. After the initialization any +// time libvpx wants to decode a frame it will use buffers provided and released +// through VpxGetFrameBuffer and VpxReleaseFrameBuffer. +// The benefit of owning the pool that libvpx relies on for decoding is that the +// decoded frames returned by libvpx (from vpx_codec_get_frame) use parts of our +// buffers for the decoded image data. By retaining ownership of this buffer +// using scoped_refptr, the image buffer can be reused by VideoFrames and no +// frame copy has to occur during decoding and frame delivery. +// +// Pseudo example usage case: +// Vp9FrameBufferPool pool; +// pool.InitializeVpxUsePool(decoder_ctx); +// ... +// +// // During decoding, libvpx will get and release buffers from the pool. +// vpx_codec_decode(decoder_ctx, ...); +// +// vpx_image_t* img = vpx_codec_get_frame(decoder_ctx, &iter); +// // Important to use scoped_refptr to protect it against being recycled by +// // the pool. +// scoped_refptr<Vp9FrameBuffer> img_buffer = (Vp9FrameBuffer*)img->fb_priv; +// ... +// +// // Destroying the codec will make libvpx release any buffers it was using. +// vpx_codec_destroy(decoder_ctx); +class Vp9FrameBufferPool { + public: + class Vp9FrameBuffer final + : public rtc::RefCountedNonVirtual<Vp9FrameBuffer> { + public: + uint8_t* GetData(); + size_t GetDataSize() const; + void SetSize(size_t size); + + using rtc::RefCountedNonVirtual<Vp9FrameBuffer>::HasOneRef; + + private: + // Data as an easily resizable buffer. + rtc::Buffer data_; + }; + + // Configures libvpx to, in the specified context, use this memory pool for + // buffers used to decompress frames. This is only supported for VP9. + bool InitializeVpxUsePool(vpx_codec_ctx* vpx_codec_context); + + // Gets a frame buffer of at least `min_size`, recycling an available one or + // creating a new one. When no longer referenced from the outside the buffer + // becomes recyclable. + rtc::scoped_refptr<Vp9FrameBuffer> GetFrameBuffer(size_t min_size); + // Gets the number of buffers currently in use (not ready to be recycled). + int GetNumBuffersInUse() const; + // Changes the max amount of buffers in the pool to the new value. + // Returns true if change was successful and false if the amount of already + // allocated buffers is bigger than new value. + bool Resize(size_t max_number_of_buffers); + // Releases allocated buffers, deleting available buffers. Buffers in use are + // not deleted until they are no longer referenced. + void ClearPool(); + + // InitializeVpxUsePool configures libvpx to call this function when it needs + // a new frame buffer. Parameters: + // `user_priv` Private data passed to libvpx, InitializeVpxUsePool sets it up + // to be a pointer to the pool. + // `min_size` Minimum size needed by libvpx (to decompress a frame). + // `fb` Pointer to the libvpx frame buffer object, this is updated to + // use the pool's buffer. + // Returns 0 on success. Returns < 0 on failure. + static int32_t VpxGetFrameBuffer(void* user_priv, + size_t min_size, + vpx_codec_frame_buffer* fb); + + // InitializeVpxUsePool configures libvpx to call this function when it has + // finished using one of the pool's frame buffer. Parameters: + // `user_priv` Private data passed to libvpx, InitializeVpxUsePool sets it up + // to be a pointer to the pool. + // `fb` Pointer to the libvpx frame buffer object, its `priv` will be + // a pointer to one of the pool's Vp9FrameBuffer. + static int32_t VpxReleaseFrameBuffer(void* user_priv, + vpx_codec_frame_buffer* fb); + + private: + // Protects `allocated_buffers_`. + mutable Mutex buffers_lock_; + // All buffers, in use or ready to be recycled. + std::vector<rtc::scoped_refptr<Vp9FrameBuffer>> allocated_buffers_ + RTC_GUARDED_BY(buffers_lock_); + size_t max_num_buffers_ = kDefaultMaxNumBuffers; +}; + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 + +#endif // MODULES_VIDEO_CODING_CODECS_VP9_VP9_FRAME_BUFFER_POOL_H_ |