diff options
Diffstat (limited to 'third_party/libwebrtc/modules/video_coding/svc')
30 files changed, 6511 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/video_coding/svc/BUILD.gn b/third_party/libwebrtc/modules/video_coding/svc/BUILD.gn new file mode 100644 index 0000000000..b8ce91d99a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/BUILD.gn @@ -0,0 +1,135 @@ +# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_source_set("scalability_mode_util") { + sources = [ + "scalability_mode_util.cc", + "scalability_mode_util.h", + ] + deps = [ + "../../../api/video_codecs:scalability_mode", + "../../../api/video_codecs:video_codecs_api", + "../../../rtc_base:checks", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_source_set("scalable_video_controller") { + sources = [ + "scalable_video_controller.h", + "scalable_video_controller_no_layering.cc", + "scalable_video_controller_no_layering.h", + ] + deps = [ + "../../../api/transport/rtp:dependency_descriptor", + "../../../api/video:video_bitrate_allocation", + "../../../common_video/generic_frame_descriptor", + "../../../rtc_base:checks", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/container:inlined_vector", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_source_set("scalability_structures") { + sources = [ + "create_scalability_structure.cc", + "create_scalability_structure.h", + "scalability_structure_full_svc.cc", + "scalability_structure_full_svc.h", + "scalability_structure_key_svc.cc", + "scalability_structure_key_svc.h", + "scalability_structure_l2t2_key_shift.cc", + "scalability_structure_l2t2_key_shift.h", + "scalability_structure_simulcast.cc", + "scalability_structure_simulcast.h", + ] + deps = [ + ":scalable_video_controller", + "../../../api/transport/rtp:dependency_descriptor", + "../../../api/video:video_bitrate_allocation", + "../../../api/video_codecs:scalability_mode", + "../../../common_video/generic_frame_descriptor", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_source_set("svc_rate_allocator") { + sources = [ + "svc_rate_allocator.cc", + "svc_rate_allocator.h", + ] + deps = [ + ":scalability_structures", + "../../../api/video:video_bitrate_allocation", + "../../../api/video:video_bitrate_allocator", + "../../../api/video:video_codec_constants", + "../../../api/video_codecs:video_codecs_api", + "../../../rtc_base:checks", + "../../../rtc_base/experiments:stable_target_rate_experiment", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/container:inlined_vector" ] +} + +if (rtc_include_tests) { + rtc_source_set("scalability_structure_tests") { + testonly = true + sources = [ + "scalability_mode_util_unittest.cc", + "scalability_structure_full_svc_unittest.cc", + "scalability_structure_key_svc_unittest.cc", + "scalability_structure_l2t2_key_shift_unittest.cc", + "scalability_structure_test_helpers.cc", + "scalability_structure_test_helpers.h", + "scalability_structure_unittest.cc", + ] + deps = [ + ":scalability_mode_util", + ":scalability_structures", + ":scalable_video_controller", + "..:chain_diff_calculator", + "..:frame_dependencies_calculator", + "../../../api:array_view", + "../../../api/transport/rtp:dependency_descriptor", + "../../../api/video:video_bitrate_allocation", + "../../../api/video:video_frame_type", + "../../../api/video_codecs:scalability_mode", + "../../../common_video/generic_frame_descriptor", + "../../../rtc_base:stringutils", + "../../../test:test_support", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } + + rtc_source_set("svc_rate_allocator_tests") { + testonly = true + sources = [ "svc_rate_allocator_unittest.cc" ] + deps = [ + ":svc_rate_allocator", + "..:webrtc_vp9_helpers", + "../../../rtc_base:checks", + "../../../test:field_trial", + "../../../test:test_support", + ] + } +} diff --git a/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.cc b/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.cc new file mode 100644 index 0000000000..fbcd27b139 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/create_scalability_structure.h" + +#include <memory> + +#include "api/video_codecs/scalability_mode.h" +#include "modules/video_coding/svc/scalability_structure_full_svc.h" +#include "modules/video_coding/svc/scalability_structure_key_svc.h" +#include "modules/video_coding/svc/scalability_structure_l2t2_key_shift.h" +#include "modules/video_coding/svc/scalability_structure_simulcast.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +struct NamedStructureFactory { + ScalabilityMode name; + // Use function pointer to make NamedStructureFactory trivally destructable. + std::unique_ptr<ScalableVideoController> (*factory)(); + ScalableVideoController::StreamLayersConfig config; +}; + +// Wrap std::make_unique function to have correct return type. +template <typename T> +std::unique_ptr<ScalableVideoController> Create() { + return std::make_unique<T>(); +} + +template <typename T> +std::unique_ptr<ScalableVideoController> CreateH() { + // 1.5:1 scaling, see https://w3c.github.io/webrtc-svc/#scalabilitymodes* + typename T::ScalingFactor factor; + factor.num = 2; + factor.den = 3; + return std::make_unique<T>(factor); +} + +constexpr ScalableVideoController::StreamLayersConfig kConfigL1T1 = { + /*num_spatial_layers=*/1, /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/false}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL1T2 = { + /*num_spatial_layers=*/1, /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/false}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL1T3 = { + /*num_spatial_layers=*/1, /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/false}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL2T1 = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/true, + {1, 1}, + {2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL2T1h = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/true, + {2, 1}, + {3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL2T2 = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/true, + {1, 1}, + {2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL2T2h = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/true, + {2, 1}, + {3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL2T3 = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/true, + {1, 1}, + {2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL2T3h = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/true, + {2, 1}, + {3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL3T1 = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/true, + {1, 1, 1}, + {4, 2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL3T1h = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/true, + {4, 2, 1}, + {9, 3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL3T2 = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/true, + {1, 1, 1}, + {4, 2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL3T2h = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/true, + {4, 2, 1}, + {9, 3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL3T3 = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/true, + {1, 1, 1}, + {4, 2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigL3T3h = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/true, + {4, 2, 1}, + {9, 3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS2T1 = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/false, + {1, 1}, + {2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS2T1h = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/false, + {2, 1}, + {3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS2T2 = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/false, + {1, 1}, + {2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS2T2h = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/false, + {2, 1}, + {3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS2T3 = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/false, + {1, 1}, + {2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS2T3h = { + /*num_spatial_layers=*/2, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/false, + {2, 1}, + {3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS3T1 = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/false, + {1, 1, 1}, + {4, 2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS3T1h = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/1, + /*uses_reference_scaling=*/false, + {4, 2, 1}, + {9, 3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS3T2 = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/false, + {1, 1, 1}, + {4, 2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS3T2h = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/2, + /*uses_reference_scaling=*/false, + {4, 2, 1}, + {9, 3, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS3T3 = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/false, + {1, 1, 1}, + {4, 2, 1}}; + +constexpr ScalableVideoController::StreamLayersConfig kConfigS3T3h = { + /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3, + /*uses_reference_scaling=*/false, + {4, 2, 1}, + {9, 3, 1}}; + +constexpr NamedStructureFactory kFactories[] = { + {ScalabilityMode::kL1T1, Create<ScalableVideoControllerNoLayering>, + kConfigL1T1}, + {ScalabilityMode::kL1T2, Create<ScalabilityStructureL1T2>, kConfigL1T2}, + {ScalabilityMode::kL1T3, Create<ScalabilityStructureL1T3>, kConfigL1T3}, + {ScalabilityMode::kL2T1, Create<ScalabilityStructureL2T1>, kConfigL2T1}, + {ScalabilityMode::kL2T1h, CreateH<ScalabilityStructureL2T1>, kConfigL2T1h}, + {ScalabilityMode::kL2T1_KEY, Create<ScalabilityStructureL2T1Key>, + kConfigL2T1}, + {ScalabilityMode::kL2T2, Create<ScalabilityStructureL2T2>, kConfigL2T2}, + {ScalabilityMode::kL2T2h, CreateH<ScalabilityStructureL2T2>, kConfigL2T2h}, + {ScalabilityMode::kL2T2_KEY, Create<ScalabilityStructureL2T2Key>, + kConfigL2T2}, + {ScalabilityMode::kL2T2_KEY_SHIFT, Create<ScalabilityStructureL2T2KeyShift>, + kConfigL2T2}, + {ScalabilityMode::kL2T3, Create<ScalabilityStructureL2T3>, kConfigL2T3}, + {ScalabilityMode::kL2T3h, CreateH<ScalabilityStructureL2T3>, kConfigL2T3h}, + {ScalabilityMode::kL2T3_KEY, Create<ScalabilityStructureL2T3Key>, + kConfigL2T3}, + {ScalabilityMode::kL3T1, Create<ScalabilityStructureL3T1>, kConfigL3T1}, + {ScalabilityMode::kL3T1h, CreateH<ScalabilityStructureL3T1>, kConfigL3T1h}, + {ScalabilityMode::kL3T1_KEY, Create<ScalabilityStructureL3T1Key>, + kConfigL3T1}, + {ScalabilityMode::kL3T2, Create<ScalabilityStructureL3T2>, kConfigL3T2}, + {ScalabilityMode::kL3T2h, CreateH<ScalabilityStructureL3T2>, kConfigL3T2h}, + {ScalabilityMode::kL3T2_KEY, Create<ScalabilityStructureL3T2Key>, + kConfigL3T2}, + {ScalabilityMode::kL3T3, Create<ScalabilityStructureL3T3>, kConfigL3T3}, + {ScalabilityMode::kL3T3h, CreateH<ScalabilityStructureL3T3>, kConfigL3T3h}, + {ScalabilityMode::kL3T3_KEY, Create<ScalabilityStructureL3T3Key>, + kConfigL3T3}, + {ScalabilityMode::kS2T1, Create<ScalabilityStructureS2T1>, kConfigS2T1}, + {ScalabilityMode::kS2T1h, CreateH<ScalabilityStructureS2T1>, kConfigS2T1h}, + {ScalabilityMode::kS2T2, Create<ScalabilityStructureS2T2>, kConfigS2T2}, + {ScalabilityMode::kS2T2h, CreateH<ScalabilityStructureS2T2>, kConfigS2T2h}, + {ScalabilityMode::kS2T3, Create<ScalabilityStructureS2T3>, kConfigS2T3}, + {ScalabilityMode::kS2T3h, CreateH<ScalabilityStructureS2T3>, kConfigS2T3h}, + {ScalabilityMode::kS3T1, Create<ScalabilityStructureS3T1>, kConfigS3T1}, + {ScalabilityMode::kS3T1h, CreateH<ScalabilityStructureS3T1>, kConfigS3T1h}, + {ScalabilityMode::kS3T2, Create<ScalabilityStructureS3T2>, kConfigS3T2}, + {ScalabilityMode::kS3T2h, CreateH<ScalabilityStructureS3T2>, kConfigS3T2h}, + {ScalabilityMode::kS3T3, Create<ScalabilityStructureS3T3>, kConfigS3T3}, + {ScalabilityMode::kS3T3h, CreateH<ScalabilityStructureS3T3>, kConfigS3T3h}, +}; + +} // namespace + +std::unique_ptr<ScalableVideoController> CreateScalabilityStructure( + ScalabilityMode name) { + for (const auto& entry : kFactories) { + if (entry.name == name) { + return entry.factory(); + } + } + return nullptr; +} + +absl::optional<ScalableVideoController::StreamLayersConfig> +ScalabilityStructureConfig(ScalabilityMode name) { + for (const auto& entry : kFactories) { + if (entry.name == name) { + return entry.config; + } + } + return absl::nullopt; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.h b/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.h new file mode 100644 index 0000000000..3b67443693 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_CREATE_SCALABILITY_STRUCTURE_H_ +#define MODULES_VIDEO_CODING_SVC_CREATE_SCALABILITY_STRUCTURE_H_ + +#include <memory> +#include <vector> + +#include "absl/types/optional.h" +#include "api/video_codecs/scalability_mode.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +// Creates a structure by name according to +// https://w3c.github.io/webrtc-svc/#scalabilitymodes* +// Returns nullptr for unknown name. +std::unique_ptr<ScalableVideoController> CreateScalabilityStructure( + ScalabilityMode name); + +// Returns description of the scalability structure identified by 'name', +// Return nullopt for unknown name. +absl::optional<ScalableVideoController::StreamLayersConfig> +ScalabilityStructureConfig(ScalabilityMode name); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_CREATE_SCALABILITY_STRUCTURE_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.cc new file mode 100644 index 0000000000..35d66df203 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.cc @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/svc/scalability_mode_util.h" + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/video_codecs/scalability_mode.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +absl::optional<ScalabilityMode> ScalabilityModeFromString( + absl::string_view mode_string) { + if (mode_string == "L1T1") + return ScalabilityMode::kL1T1; + if (mode_string == "L1T2") + return ScalabilityMode::kL1T2; + if (mode_string == "L1T3") + return ScalabilityMode::kL1T3; + + if (mode_string == "L2T1") + return ScalabilityMode::kL2T1; + if (mode_string == "L2T1h") + return ScalabilityMode::kL2T1h; + if (mode_string == "L2T1_KEY") + return ScalabilityMode::kL2T1_KEY; + + if (mode_string == "L2T2") + return ScalabilityMode::kL2T2; + if (mode_string == "L2T2h") + return ScalabilityMode::kL2T2h; + if (mode_string == "L2T2_KEY") + return ScalabilityMode::kL2T2_KEY; + if (mode_string == "L2T2_KEY_SHIFT") + return ScalabilityMode::kL2T2_KEY_SHIFT; + if (mode_string == "L2T3") + return ScalabilityMode::kL2T3; + if (mode_string == "L2T3h") + return ScalabilityMode::kL2T3h; + if (mode_string == "L2T3_KEY") + return ScalabilityMode::kL2T3_KEY; + + if (mode_string == "L3T1") + return ScalabilityMode::kL3T1; + if (mode_string == "L3T1h") + return ScalabilityMode::kL3T1h; + if (mode_string == "L3T1_KEY") + return ScalabilityMode::kL3T1_KEY; + + if (mode_string == "L3T2") + return ScalabilityMode::kL3T2; + if (mode_string == "L3T2h") + return ScalabilityMode::kL3T2h; + if (mode_string == "L3T2_KEY") + return ScalabilityMode::kL3T2_KEY; + + if (mode_string == "L3T3") + return ScalabilityMode::kL3T3; + if (mode_string == "L3T3h") + return ScalabilityMode::kL3T3h; + if (mode_string == "L3T3_KEY") + return ScalabilityMode::kL3T3_KEY; + + if (mode_string == "S2T1") + return ScalabilityMode::kS2T1; + if (mode_string == "S2T1h") + return ScalabilityMode::kS2T1h; + if (mode_string == "S2T2") + return ScalabilityMode::kS2T2; + if (mode_string == "S2T2h") + return ScalabilityMode::kS2T2h; + if (mode_string == "S2T3") + return ScalabilityMode::kS2T3; + if (mode_string == "S2T3h") + return ScalabilityMode::kS2T3h; + if (mode_string == "S3T1") + return ScalabilityMode::kS3T1; + if (mode_string == "S3T1h") + return ScalabilityMode::kS3T1h; + if (mode_string == "S3T2") + return ScalabilityMode::kS3T2; + if (mode_string == "S3T2h") + return ScalabilityMode::kS3T2h; + if (mode_string == "S3T3") + return ScalabilityMode::kS3T3; + if (mode_string == "S3T3h") + return ScalabilityMode::kS3T3h; + + return absl::nullopt; +} + +InterLayerPredMode ScalabilityModeToInterLayerPredMode( + ScalabilityMode scalability_mode) { + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + case ScalabilityMode::kL1T2: + case ScalabilityMode::kL1T3: + case ScalabilityMode::kL2T1: + case ScalabilityMode::kL2T1h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL2T1_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL2T2: + case ScalabilityMode::kL2T2h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL2T2_KEY: + case ScalabilityMode::kL2T2_KEY_SHIFT: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL2T3: + case ScalabilityMode::kL2T3h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL2T3_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL3T1: + case ScalabilityMode::kL3T1h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL3T1_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL3T2: + case ScalabilityMode::kL3T2h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL3T2_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL3T3: + case ScalabilityMode::kL3T3h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL3T3_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kS2T1: + case ScalabilityMode::kS2T1h: + case ScalabilityMode::kS2T2: + case ScalabilityMode::kS2T2h: + case ScalabilityMode::kS2T3: + case ScalabilityMode::kS2T3h: + case ScalabilityMode::kS3T1: + case ScalabilityMode::kS3T1h: + case ScalabilityMode::kS3T2: + case ScalabilityMode::kS3T2h: + case ScalabilityMode::kS3T3: + case ScalabilityMode::kS3T3h: + return InterLayerPredMode::kOff; + } + RTC_CHECK_NOTREACHED(); +} + +int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode) { + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + case ScalabilityMode::kL1T2: + case ScalabilityMode::kL1T3: + return 1; + case ScalabilityMode::kL2T1: + case ScalabilityMode::kL2T1h: + case ScalabilityMode::kL2T1_KEY: + case ScalabilityMode::kL2T2: + case ScalabilityMode::kL2T2h: + case ScalabilityMode::kL2T2_KEY: + case ScalabilityMode::kL2T2_KEY_SHIFT: + case ScalabilityMode::kL2T3: + case ScalabilityMode::kL2T3h: + case ScalabilityMode::kL2T3_KEY: + return 2; + case ScalabilityMode::kL3T1: + case ScalabilityMode::kL3T1h: + case ScalabilityMode::kL3T1_KEY: + case ScalabilityMode::kL3T2: + case ScalabilityMode::kL3T2h: + case ScalabilityMode::kL3T2_KEY: + case ScalabilityMode::kL3T3: + case ScalabilityMode::kL3T3h: + case ScalabilityMode::kL3T3_KEY: + return 3; + case ScalabilityMode::kS2T1: + case ScalabilityMode::kS2T1h: + case ScalabilityMode::kS2T2: + case ScalabilityMode::kS2T2h: + case ScalabilityMode::kS2T3: + case ScalabilityMode::kS2T3h: + return 2; + case ScalabilityMode::kS3T1: + case ScalabilityMode::kS3T1h: + case ScalabilityMode::kS3T2: + case ScalabilityMode::kS3T2h: + case ScalabilityMode::kS3T3: + case ScalabilityMode::kS3T3h: + return 3; + } + RTC_CHECK_NOTREACHED(); +} + +int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode) { + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + return 1; + case ScalabilityMode::kL1T2: + return 2; + case ScalabilityMode::kL1T3: + return 3; + case ScalabilityMode::kL2T1: + case ScalabilityMode::kL2T1h: + case ScalabilityMode::kL2T1_KEY: + return 1; + case ScalabilityMode::kL2T2: + case ScalabilityMode::kL2T2h: + case ScalabilityMode::kL2T2_KEY: + case ScalabilityMode::kL2T2_KEY_SHIFT: + return 2; + case ScalabilityMode::kL2T3: + case ScalabilityMode::kL2T3h: + case ScalabilityMode::kL2T3_KEY: + return 3; + case ScalabilityMode::kL3T1: + case ScalabilityMode::kL3T1h: + case ScalabilityMode::kL3T1_KEY: + return 1; + case ScalabilityMode::kL3T2: + case ScalabilityMode::kL3T2h: + case ScalabilityMode::kL3T2_KEY: + return 2; + case ScalabilityMode::kL3T3: + case ScalabilityMode::kL3T3h: + case ScalabilityMode::kL3T3_KEY: + return 3; + case ScalabilityMode::kS2T1: + case ScalabilityMode::kS2T1h: + case ScalabilityMode::kS3T1: + case ScalabilityMode::kS3T1h: + return 1; + case ScalabilityMode::kS2T2: + case ScalabilityMode::kS2T2h: + case ScalabilityMode::kS3T2: + case ScalabilityMode::kS3T2h: + return 2; + case ScalabilityMode::kS2T3: + case ScalabilityMode::kS2T3h: + case ScalabilityMode::kS3T3: + case ScalabilityMode::kS3T3h: + return 3; + } + RTC_CHECK_NOTREACHED(); +} + +absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio( + ScalabilityMode scalability_mode) { + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + case ScalabilityMode::kL1T2: + case ScalabilityMode::kL1T3: + return absl::nullopt; + case ScalabilityMode::kL2T1: + case ScalabilityMode::kL2T1_KEY: + case ScalabilityMode::kL2T2: + case ScalabilityMode::kL2T2_KEY: + case ScalabilityMode::kL2T2_KEY_SHIFT: + case ScalabilityMode::kL2T3: + case ScalabilityMode::kL2T3_KEY: + case ScalabilityMode::kL3T1: + case ScalabilityMode::kL3T1_KEY: + case ScalabilityMode::kL3T2: + case ScalabilityMode::kL3T2_KEY: + case ScalabilityMode::kL3T3: + case ScalabilityMode::kL3T3_KEY: + case ScalabilityMode::kS2T1: + case ScalabilityMode::kS2T2: + case ScalabilityMode::kS2T3: + case ScalabilityMode::kS3T1: + case ScalabilityMode::kS3T2: + case ScalabilityMode::kS3T3: + return ScalabilityModeResolutionRatio::kTwoToOne; + case ScalabilityMode::kL2T1h: + case ScalabilityMode::kL2T2h: + case ScalabilityMode::kL2T3h: + case ScalabilityMode::kL3T1h: + case ScalabilityMode::kL3T2h: + case ScalabilityMode::kL3T3h: + case ScalabilityMode::kS2T1h: + case ScalabilityMode::kS2T2h: + case ScalabilityMode::kS2T3h: + case ScalabilityMode::kS3T1h: + case ScalabilityMode::kS3T2h: + case ScalabilityMode::kS3T3h: + return ScalabilityModeResolutionRatio::kThreeToTwo; + } + RTC_CHECK_NOTREACHED(); +} + +ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode, + int max_spatial_layers) { + int num_spatial_layers = ScalabilityModeToNumSpatialLayers(scalability_mode); + if (max_spatial_layers >= num_spatial_layers) { + return scalability_mode; + } + + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + return ScalabilityMode::kL1T1; + case ScalabilityMode::kL1T2: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kL1T3: + return ScalabilityMode::kL1T3; + case ScalabilityMode::kL2T1: + return ScalabilityMode::kL1T1; + case ScalabilityMode::kL2T1h: + return ScalabilityMode::kL1T1; + case ScalabilityMode::kL2T1_KEY: + return ScalabilityMode::kL1T1; + case ScalabilityMode::kL2T2: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kL2T2h: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kL2T2_KEY: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kL2T2_KEY_SHIFT: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kL2T3: + return ScalabilityMode::kL1T3; + case ScalabilityMode::kL2T3h: + return ScalabilityMode::kL1T3; + case ScalabilityMode::kL2T3_KEY: + return ScalabilityMode::kL1T3; + case ScalabilityMode::kL3T1: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T1 + : ScalabilityMode::kL1T1; + case ScalabilityMode::kL3T1h: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T1h + : ScalabilityMode::kL1T1; + case ScalabilityMode::kL3T1_KEY: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T1_KEY + : ScalabilityMode::kL1T1; + case ScalabilityMode::kL3T2: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T2 + : ScalabilityMode::kL1T2; + case ScalabilityMode::kL3T2h: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T2h + : ScalabilityMode::kL1T2; + case ScalabilityMode::kL3T2_KEY: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T2_KEY + : ScalabilityMode::kL1T2; + case ScalabilityMode::kL3T3: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T3 + : ScalabilityMode::kL1T3; + case ScalabilityMode::kL3T3h: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T3h + : ScalabilityMode::kL1T3; + case ScalabilityMode::kL3T3_KEY: + return max_spatial_layers == 2 ? ScalabilityMode::kL2T3_KEY + : ScalabilityMode::kL1T3; + case ScalabilityMode::kS2T1: + return ScalabilityMode::kL1T1; + case ScalabilityMode::kS2T1h: + return ScalabilityMode::kL1T1; + case ScalabilityMode::kS2T2: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kS2T2h: + return ScalabilityMode::kL1T2; + case ScalabilityMode::kS2T3: + return ScalabilityMode::kL1T3; + case ScalabilityMode::kS2T3h: + return ScalabilityMode::kL1T3; + case ScalabilityMode::kS3T1: + return max_spatial_layers == 2 ? ScalabilityMode::kS2T1 + : ScalabilityMode::kL1T1; + case ScalabilityMode::kS3T1h: + return max_spatial_layers == 2 ? ScalabilityMode::kS2T1h + : ScalabilityMode::kL1T1; + case ScalabilityMode::kS3T2: + return max_spatial_layers == 2 ? ScalabilityMode::kS2T2 + : ScalabilityMode::kL1T2; + case ScalabilityMode::kS3T2h: + return max_spatial_layers == 2 ? ScalabilityMode::kS2T2h + : ScalabilityMode::kL1T2; + case ScalabilityMode::kS3T3: + return max_spatial_layers == 2 ? ScalabilityMode::kS2T3 + : ScalabilityMode::kL1T3; + case ScalabilityMode::kS3T3h: + return max_spatial_layers == 2 ? ScalabilityMode::kS2T3h + : ScalabilityMode::kL1T3; + } + RTC_CHECK_NOTREACHED(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.h b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.h new file mode 100644 index 0000000000..9c8193e037 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_ + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/video_codec.h" + +namespace webrtc { + +enum class ScalabilityModeResolutionRatio { + kTwoToOne, // The resolution ratio between spatial layers is 2:1. + kThreeToTwo, // The resolution ratio between spatial layers is 1.5:1. +}; + +static constexpr char kDefaultScalabilityModeStr[] = "L1T2"; + +absl::optional<ScalabilityMode> ScalabilityModeFromString( + absl::string_view scalability_mode_string); + +InterLayerPredMode ScalabilityModeToInterLayerPredMode( + ScalabilityMode scalability_mode); + +int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode); + +int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode); + +absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio( + ScalabilityMode scalability_mode); + +ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode, + int max_spatial_layers); + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_gn/moz.build b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_gn/moz.build new file mode 100644 index 0000000000..f786b11616 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("scalability_mode_util_gn") diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_unittest.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_unittest.cc new file mode 100644 index 0000000000..448494ffcc --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_mode_util_unittest.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/svc/scalability_mode_util.h" + +#include <string> +#include <tuple> +#include <vector> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/video_codecs/scalability_mode.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +TEST(ScalabilityModeUtil, ConvertsL1T2) { + EXPECT_EQ(ScalabilityModeFromString("L1T2"), ScalabilityMode::kL1T2); + EXPECT_EQ(ScalabilityModeToString(ScalabilityMode::kL1T2), "L1T2"); +} + +TEST(ScalabilityModeUtil, RejectsUnknownString) { + EXPECT_EQ(ScalabilityModeFromString(""), absl::nullopt); + EXPECT_EQ(ScalabilityModeFromString("not-a-mode"), absl::nullopt); +} + +// Check roundtrip conversion of all enum values. +TEST(ScalabilityModeUtil, ConvertsAllToAndFromString) { + const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h; + for (int numerical_enum = 0; numerical_enum <= static_cast<int>(kLastEnum); + numerical_enum++) { + ScalabilityMode scalability_mode = + static_cast<ScalabilityMode>(numerical_enum); + absl::string_view scalability_mode_string = + ScalabilityModeToString(scalability_mode); + EXPECT_FALSE(scalability_mode_string.empty()); + EXPECT_EQ(ScalabilityModeFromString(scalability_mode_string), + scalability_mode); + } +} + +struct TestParams { + std::string scalability_mode; + std::vector<std::tuple<std::vector<int>, std::string>> + limited_scalability_mode; +}; + +class NumSpatialLayersTest : public ::testing::TestWithParam<TestParams> {}; + +INSTANTIATE_TEST_SUITE_P( + MaxLayers, + NumSpatialLayersTest, + ::testing::ValuesIn<TestParams>( + {{"L1T1", {{{0, 1}, "L1T1"}, {{2}, "L1T1"}, {{3}, "L1T1"}}}, + {"L1T2", {{{0, 1}, "L1T2"}, {{2}, "L1T2"}, {{3}, "L1T2"}}}, + {"L1T3", {{{0, 1}, "L1T3"}, {{2}, "L1T3"}, {{3}, "L1T3"}}}, + {"L2T1", {{{0, 1}, "L1T1"}, {{2}, "L2T1"}, {{3}, "L2T1"}}}, + {"L2T1h", {{{0, 1}, "L1T1"}, {{2}, "L2T1h"}, {{3}, "L2T1h"}}}, + {"L2T1_KEY", {{{0, 1}, "L1T1"}, {{2}, "L2T1_KEY"}, {{3}, "L2T1_KEY"}}}, + {"L2T2", {{{0, 1}, "L1T2"}, {{2}, "L2T2"}, {{3}, "L2T2"}}}, + {"L2T2h", {{{0, 1}, "L1T2"}, {{2}, "L2T2h"}, {{3}, "L2T2h"}}}, + {"L2T2_KEY", {{{0, 1}, "L1T2"}, {{2}, "L2T2_KEY"}, {{3}, "L2T2_KEY"}}}, + {"L2T2_KEY_SHIFT", + {{{0, 1}, "L1T2"}, {{2}, "L2T2_KEY_SHIFT"}, {{3}, "L2T2_KEY_SHIFT"}}}, + {"L2T3", {{{0, 1}, "L1T3"}, {{2}, "L2T3"}, {{3}, "L2T3"}}}, + {"L2T3h", {{{0, 1}, "L1T3"}, {{2}, "L2T3h"}, {{3}, "L2T3h"}}}, + {"L2T3_KEY", {{{0, 1}, "L1T3"}, {{2}, "L2T3_KEY"}, {{3}, "L2T3_KEY"}}}, + {"L3T1", {{{0, 1}, "L1T1"}, {{2}, "L2T1"}, {{3}, "L3T1"}}}, + {"L3T1h", {{{0, 1}, "L1T1"}, {{2}, "L2T1h"}, {{3}, "L3T1h"}}}, + {"L3T1_KEY", {{{0, 1}, "L1T1"}, {{2}, "L2T1_KEY"}, {{3}, "L3T1_KEY"}}}, + {"L3T2", {{{0, 1}, "L1T2"}, {{2}, "L2T2"}, {{3}, "L3T2"}}}, + {"L3T2h", {{{0, 1}, "L1T2"}, {{2}, "L2T2h"}, {{3}, "L3T2h"}}}, + {"L3T2_KEY", {{{0, 1}, "L1T2"}, {{2}, "L2T2_KEY"}, {{3}, "L3T2_KEY"}}}, + {"L3T3", {{{0, 1}, "L1T3"}, {{2}, "L2T3"}, {{3}, "L3T3"}}}, + {"L3T3h", {{{0, 1}, "L1T3"}, {{2}, "L2T3h"}, {{3}, "L3T3h"}}}, + {"L3T3_KEY", {{{0, 1}, "L1T3"}, {{2}, "L2T3_KEY"}, {{3}, "L3T3_KEY"}}}, + {"S2T1", {{{0, 1}, "L1T1"}, {{2}, "S2T1"}, {{3}, "S2T1"}}}, + {"S2T1h", {{{0, 1}, "L1T1"}, {{2}, "S2T1h"}, {{3}, "S2T1h"}}}, + {"S2T2", {{{0, 1}, "L1T2"}, {{2}, "S2T2"}, {{3}, "S2T2"}}}, + {"S2T2h", {{{0, 1}, "L1T2"}, {{2}, "S2T2h"}, {{3}, "S2T2h"}}}, + {"S2T3", {{{0, 1}, "L1T3"}, {{2}, "S2T3"}, {{3}, "S2T3"}}}, + {"S2T3h", {{{0, 1}, "L1T3"}, {{2}, "S2T3h"}, {{3}, "S2T3h"}}}, + {"S3T1", {{{0, 1}, "L1T1"}, {{2}, "S2T1"}, {{3}, "S3T1"}}}, + {"S3T1h", {{{0, 1}, "L1T1"}, {{2}, "S2T1h"}, {{3}, "S3T1h"}}}, + {"S3T2", {{{0, 1}, "L1T2"}, {{2}, "S2T2"}, {{3}, "S3T2"}}}, + {"S3T2h", {{{0, 1}, "L1T2"}, {{2}, "S2T2h"}, {{3}, "S3T2h"}}}, + {"S3T3", {{{0, 1}, "L1T3"}, {{2}, "S2T3"}, {{3}, "S3T3"}}}, + {"S3T3h", {{{0, 1}, "L1T3"}, {{2}, "S2T3h"}, {{3}, "S3T3h"}}}}), + [](const ::testing::TestParamInfo<TestParams>& info) { + return info.param.scalability_mode; + }); + +TEST_P(NumSpatialLayersTest, LimitsSpatialLayers) { + const ScalabilityMode mode = + *ScalabilityModeFromString(GetParam().scalability_mode); + for (const auto& param : GetParam().limited_scalability_mode) { + const std::vector<int> max_num_spatial_layers = + std::get<std::vector<int>>(param); + const ScalabilityMode expected_mode = + *ScalabilityModeFromString(std::get<std::string>(param)); + for (const auto& max_layers : max_num_spatial_layers) { + EXPECT_EQ(expected_mode, LimitNumSpatialLayers(mode, max_layers)); + } + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.cc new file mode 100644 index 0000000000..a262317597 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.cc @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_full_svc.h" + +#include <utility> +#include <vector> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers; +constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers; +constexpr absl::string_view ScalabilityStructureFullSvc::kFramePatternNames[]; + +ScalabilityStructureFullSvc::ScalabilityStructureFullSvc( + int num_spatial_layers, + int num_temporal_layers, + ScalingFactor resolution_factor) + : num_spatial_layers_(num_spatial_layers), + num_temporal_layers_(num_temporal_layers), + resolution_factor_(resolution_factor), + active_decode_targets_( + (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) { + RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers); + RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers); +} + +ScalabilityStructureFullSvc::~ScalabilityStructureFullSvc() = default; + +ScalabilityStructureFullSvc::StreamLayersConfig +ScalabilityStructureFullSvc::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = num_spatial_layers_; + result.num_temporal_layers = num_temporal_layers_; + result.scaling_factor_num[num_spatial_layers_ - 1] = 1; + result.scaling_factor_den[num_spatial_layers_ - 1] = 1; + for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) { + result.scaling_factor_num[sid - 1] = + resolution_factor_.num * result.scaling_factor_num[sid]; + result.scaling_factor_den[sid - 1] = + resolution_factor_.den * result.scaling_factor_den[sid]; + } + result.uses_reference_scaling = num_spatial_layers_ > 1; + return result; +} + +bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const { + if (tid >= num_temporal_layers_) { + return false; + } + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (DecodeTargetIsActive(sid, tid)) { + return true; + } + } + return false; +} + +DecodeTargetIndication ScalabilityStructureFullSvc::Dti( + int sid, + int tid, + const LayerFrameConfig& config) { + if (sid < config.SpatialId() || tid < config.TemporalId()) { + return DecodeTargetIndication::kNotPresent; + } + if (sid == config.SpatialId()) { + if (tid == 0) { + RTC_DCHECK_EQ(config.TemporalId(), 0); + return DecodeTargetIndication::kSwitch; + } + if (tid == config.TemporalId()) { + return DecodeTargetIndication::kDiscardable; + } + if (tid > config.TemporalId()) { + RTC_DCHECK_GT(tid, config.TemporalId()); + return DecodeTargetIndication::kSwitch; + } + } + RTC_DCHECK_GT(sid, config.SpatialId()); + RTC_DCHECK_GE(tid, config.TemporalId()); + if (config.IsKeyframe() || config.Id() == kKey) { + return DecodeTargetIndication::kSwitch; + } + return DecodeTargetIndication::kRequired; +} + +ScalabilityStructureFullSvc::FramePattern +ScalabilityStructureFullSvc::NextPattern() const { + switch (last_pattern_) { + case kNone: + return kKey; + case kDeltaT2B: + return kDeltaT0; + case kDeltaT2A: + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + case kDeltaT1: + if (TemporalLayerIsActive(2)) { + return kDeltaT2B; + } + return kDeltaT0; + case kKey: + case kDeltaT0: + if (TemporalLayerIsActive(2)) { + return kDeltaT2A; + } + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + } + RTC_DCHECK_NOTREACHED(); + return kNone; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { + std::vector<LayerFrameConfig> configs; + if (active_decode_targets_.none()) { + last_pattern_ = kNone; + return configs; + } + configs.reserve(num_spatial_layers_); + + if (last_pattern_ == kNone || restart) { + can_reference_t0_frame_for_spatial_id_.reset(); + last_pattern_ = kNone; + } + FramePattern current_pattern = NextPattern(); + + absl::optional<int> spatial_dependency_buffer_id; + switch (current_pattern) { + case kDeltaT0: + case kKey: + // Disallow temporal references cross T0 on higher temporal layers. + can_reference_t1_frame_for_spatial_id_.reset(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/0)) { + // Next frame from the spatial layer `sid` shouldn't depend on + // potentially old previous frame from the spatial layer `sid`. + can_reference_t0_frame_for_spatial_id_.reset(sid); + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(current_pattern).S(sid).T(0); + + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } else if (current_pattern == kKey) { + config.Keyframe(); + } + + if (can_reference_t0_frame_for_spatial_id_[sid]) { + config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); + } else { + // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame + // to ChainDiffCalculator + config.Update(BufferIndex(sid, /*tid=*/0)); + } + + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0); + } + break; + case kDeltaT1: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/1) || + !can_reference_t0_frame_for_spatial_id_[sid]) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(current_pattern).S(sid).T(1); + // Temporal reference. + config.Reference(BufferIndex(sid, /*tid=*/0)); + // Spatial reference unless this is the lowest active spatial layer. + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } + // No frame reference top layer frame, so no need save it into a buffer. + if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) { + config.Update(BufferIndex(sid, /*tid=*/1)); + } + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1); + } + break; + case kDeltaT2A: + case kDeltaT2B: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/2) || + !can_reference_t0_frame_for_spatial_id_[sid]) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(current_pattern).S(sid).T(2); + // Temporal reference. + if (current_pattern == kDeltaT2B && + can_reference_t1_frame_for_spatial_id_[sid]) { + config.Reference(BufferIndex(sid, /*tid=*/1)); + } else { + config.Reference(BufferIndex(sid, /*tid=*/0)); + } + // Spatial reference unless this is the lowest active spatial layer. + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } + // No frame reference top layer frame, so no need save it into a buffer. + if (sid < num_spatial_layers_ - 1) { + config.Update(BufferIndex(sid, /*tid=*/2)); + } + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2); + } + break; + case kNone: + RTC_DCHECK_NOTREACHED(); + break; + } + + if (configs.empty() && !restart) { + RTC_LOG(LS_WARNING) << "Failed to generate configuration for L" + << num_spatial_layers_ << "T" << num_temporal_layers_ + << " with active decode targets " + << active_decode_targets_.to_string('-').substr( + active_decode_targets_.size() - + num_spatial_layers_ * num_temporal_layers_) + << " and transition from " + << kFramePatternNames[last_pattern_] << " to " + << kFramePatternNames[current_pattern] + << ". Resetting."; + return NextFrameConfig(/*restart=*/true); + } + + return configs; +} + +GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone( + const LayerFrameConfig& config) { + // When encoder drops all frames for a temporal unit, it is better to reuse + // old temporal pattern rather than switch to next one, thus switch to next + // pattern defered here from the `NextFrameConfig`. + // In particular creating VP9 references rely on this behavior. + last_pattern_ = static_cast<FramePattern>(config.Id()); + if (config.TemporalId() == 0) { + can_reference_t0_frame_for_spatial_id_.set(config.SpatialId()); + } + if (config.TemporalId() == 1) { + can_reference_t1_frame_for_spatial_id_.set(config.SpatialId()); + } + + GenericFrameInfo frame_info; + frame_info.spatial_id = config.SpatialId(); + frame_info.temporal_id = config.TemporalId(); + frame_info.encoder_buffers = config.Buffers(); + frame_info.decode_target_indications.reserve(num_spatial_layers_ * + num_temporal_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + frame_info.decode_target_indications.push_back(Dti(sid, tid, config)); + } + } + if (config.TemporalId() == 0) { + frame_info.part_of_chain.resize(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + frame_info.part_of_chain[sid] = config.SpatialId() <= sid; + } + } else { + frame_info.part_of_chain.assign(num_spatial_layers_, false); + } + frame_info.active_decode_targets = active_decode_targets_; + return frame_info; +} + +void ScalabilityStructureFullSvc::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Enable/disable spatial layers independetely. + bool active = true; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + // To enable temporal layer, require bitrates for lower temporal layers. + active = active && bitrates.GetBitrate(sid, tid) > 0; + SetDecodeTargetIsActive(sid, tid, active); + } + } +} + +FrameDependencyStructure ScalabilityStructureL1T2::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 2; + structure.num_chains = 1; + structure.decode_target_protected_by_chain = {0, 0}; + structure.templates.resize(3); + structure.templates[0].T(0).Dtis("SS").ChainDiffs({0}); + structure.templates[1].T(0).Dtis("SS").ChainDiffs({2}).FrameDiffs({2}); + structure.templates[2].T(1).Dtis("-D").ChainDiffs({1}).FrameDiffs({1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 3; + structure.num_chains = 1; + structure.decode_target_protected_by_chain = {0, 0, 0}; + structure.templates.resize(5); + structure.templates[0].T(0).Dtis("SSS").ChainDiffs({0}); + structure.templates[1].T(0).Dtis("SSS").ChainDiffs({4}).FrameDiffs({4}); + structure.templates[2].T(1).Dtis("-DS").ChainDiffs({2}).FrameDiffs({2}); + structure.templates[3].T(2).Dtis("--D").ChainDiffs({1}).FrameDiffs({1}); + structure.templates[4].T(2).Dtis("--D").ChainDiffs({3}).FrameDiffs({1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL2T1::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 2; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 1}; + structure.templates.resize(4); + structure.templates[0].S(0).Dtis("SR").ChainDiffs({2, 1}).FrameDiffs({2}); + structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0}); + structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({2, 1}); + structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 4; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 1, 1}; + structure.templates.resize(6); + auto& templates = structure.templates; + templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0}); + templates[1].S(0).T(0).Dtis("SSRR").ChainDiffs({4, 3}).FrameDiffs({4}); + templates[2].S(0).T(1).Dtis("-D-R").ChainDiffs({2, 1}).FrameDiffs({2}); + templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1}); + templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({4, 1}); + templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2, 1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL2T3::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 6; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1}; + auto& t = structure.templates; + t.resize(10); + t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0}); + t[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1}); + t[3].S(0).T(2).Dtis("--D--R").ChainDiffs({2, 1}).FrameDiffs({2}); + t[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2, 1}); + t[2].S(0).T(1).Dtis("-DS-RR").ChainDiffs({4, 3}).FrameDiffs({4}); + t[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4, 1}); + t[4].S(0).T(2).Dtis("--D--R").ChainDiffs({6, 5}).FrameDiffs({2}); + t[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2, 1}); + t[0].S(0).T(0).Dtis("SSSRRR").ChainDiffs({8, 7}).FrameDiffs({8}); + t[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({8, 1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL3T1::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 3; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 1, 2}; + auto& templates = structure.templates; + templates.resize(6); + templates[0].S(0).Dtis("SRR").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + templates[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0}); + templates[2].S(1).Dtis("-SR").ChainDiffs({1, 1, 1}).FrameDiffs({3, 1}); + templates[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); + templates[4].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({3, 1}); + templates[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL3T2::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 6; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2}; + auto& t = structure.templates; + t.resize(9); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. + t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0}); + t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); + t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1}); + t[2].S(0).T(1).Dtis("-D-R-R").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[5].S(1).T(1).Dtis("---D-R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1}); + t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1}); + t[0].S(0).T(0).Dtis("SSRRRR").ChainDiffs({6, 5, 4}).FrameDiffs({6}); + t[3].S(1).T(0).Dtis("--SSRR").ChainDiffs({1, 1, 1}).FrameDiffs({6, 1}); + t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({6, 1}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 9; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2}; + auto& t = structure.templates; + t.resize(15); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. Indexes are written in hex for nicer alignment. + t[0x1].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0}); + t[0x6].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); + t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1}); + t[0x3].S(0).T(2).Dtis("--D--R--R").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[0x8].S(1).T(2).Dtis("-----D--R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1}); + t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1}); + t[0x2].S(0).T(1).Dtis("-DS-RR-RR").ChainDiffs({6, 5, 4}).FrameDiffs({6}); + t[0x7].S(1).T(1).Dtis("----DS-RR").ChainDiffs({7, 6, 5}).FrameDiffs({6, 1}); + t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6, 1}); + t[0x4].S(0).T(2).Dtis("--D--R--R").ChainDiffs({9, 8, 7}).FrameDiffs({3}); + t[0x9].S(1).T(2).Dtis("-----D--R").ChainDiffs({10, 9, 8}).FrameDiffs({3, 1}); + t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3, 1}); + t[0x0].S(0).T(0).Dtis("SSSRRRRRR").ChainDiffs({12, 11, 10}).FrameDiffs({12}); + t[0x5].S(1).T(0).Dtis("---SSSRRR").ChainDiffs({1, 1, 1}).FrameDiffs({12, 1}); + t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({12, 1}); + return structure; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.h b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.h new file mode 100644 index 0000000000..a4ede69342 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_FULL_SVC_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_FULL_SVC_H_ + +#include <bitset> +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +class ScalabilityStructureFullSvc : public ScalableVideoController { + public: + struct ScalingFactor { + int num = 1; + int den = 2; + }; + ScalabilityStructureFullSvc(int num_spatial_layers, + int num_temporal_layers, + ScalingFactor resolution_factor); + ~ScalabilityStructureFullSvc() override; + + StreamLayersConfig StreamConfig() const override; + + std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override; + GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + + private: + enum FramePattern { + kNone, + kKey, + kDeltaT2A, + kDeltaT1, + kDeltaT2B, + kDeltaT0, + }; + static constexpr absl::string_view kFramePatternNames[] = { + "None", "Key", "DeltaT2A", "DeltaT1", "DeltaT2B", "DeltaT0"}; + static constexpr int kMaxNumSpatialLayers = 3; + static constexpr int kMaxNumTemporalLayers = 3; + + // Index of the buffer to store last frame for layer (`sid`, `tid`) + int BufferIndex(int sid, int tid) const { + return tid * num_spatial_layers_ + sid; + } + bool DecodeTargetIsActive(int sid, int tid) const { + return active_decode_targets_[sid * num_temporal_layers_ + tid]; + } + void SetDecodeTargetIsActive(int sid, int tid, bool value) { + active_decode_targets_.set(sid * num_temporal_layers_ + tid, value); + } + FramePattern NextPattern() const; + bool TemporalLayerIsActive(int tid) const; + static DecodeTargetIndication Dti(int sid, + int tid, + const LayerFrameConfig& frame); + + const int num_spatial_layers_; + const int num_temporal_layers_; + const ScalingFactor resolution_factor_; + + FramePattern last_pattern_ = kNone; + std::bitset<kMaxNumSpatialLayers> can_reference_t0_frame_for_spatial_id_ = 0; + std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_ = 0; + std::bitset<32> active_decode_targets_; +}; + +// T1 0 0 +// / / / ... +// T0 0---0---0-- +// Time-> 0 1 2 3 4 +class ScalabilityStructureL1T2 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL1T2(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(1, 2, resolution_factor) {} + ~ScalabilityStructureL1T2() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// T2 0 0 0 0 +// | / | / +// T1 / 0 / 0 ... +// |_/ |_/ +// T0 0-------0------ +// Time-> 0 1 2 3 4 5 6 7 +class ScalabilityStructureL1T3 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL1T3(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(1, 3, resolution_factor) {} + ~ScalabilityStructureL1T3() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// S1 0--0--0- +// | | | ... +// S0 0--0--0- +class ScalabilityStructureL2T1 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL2T1(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(2, 1, resolution_factor) {} + ~ScalabilityStructureL2T1() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// S1T1 0 0 +// /| /| / +// S1T0 0-+-0-+-0 +// | | | | | ... +// S0T1 | 0 | 0 | +// |/ |/ |/ +// S0T0 0---0---0-- +// Time-> 0 1 2 3 4 +class ScalabilityStructureL2T2 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL2T2(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(2, 2, resolution_factor) {} + ~ScalabilityStructureL2T2() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// S1T2 4 ,8 +// S1T1 / | 6' | +// S1T0 2--+-'+--+-... +// | | | | +// S0T2 | 3 | ,7 +// S0T1 | / 5' +// S0T0 1----'-----... +// Time-> 0 1 2 3 +class ScalabilityStructureL2T3 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL2T3(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(2, 3, resolution_factor) {} + ~ScalabilityStructureL2T3() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// S2 0-0-0- +// | | | +// S1 0-0-0-... +// | | | +// S0 0-0-0- +// Time-> 0 1 2 +class ScalabilityStructureL3T1 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL3T1(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(3, 1, resolution_factor) {} + ~ScalabilityStructureL3T1() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// https://www.w3.org/TR/webrtc-svc/#L3T2* +class ScalabilityStructureL3T2 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL3T2(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(3, 2, resolution_factor) {} + ~ScalabilityStructureL3T2() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// https://www.w3.org/TR/webrtc-svc/#L3T3* +class ScalabilityStructureL3T3 : public ScalabilityStructureFullSvc { + public: + explicit ScalabilityStructureL3T3(ScalingFactor resolution_factor = {}) + : ScalabilityStructureFullSvc(3, 3, resolution_factor) {} + ~ScalabilityStructureL3T3() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_FULL_SVC_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc new file mode 100644 index 0000000000..1c0a8be8f1 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_full_svc.h" + +#include <vector> + +#include "modules/video_coding/svc/scalability_structure_test_helpers.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::IsEmpty; +using ::testing::SizeIs; + +TEST(ScalabilityStructureL3T3Test, SkipT0FrameByEncoderKeepsReferencesValid) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + // Only S0T0 decode target is enabled. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/1, /*s1=*/0)); + // Encoder generates S0T0 key frame. + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + EXPECT_THAT(frames, SizeIs(1)); + // Spatial layers 1 is enabled. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/1, /*s1=*/1)); + // Encoder tries to generate S0T0 and S1T0 delta frames but they are dropped. + structure.NextFrameConfig(/*restart=*/false); + // Encoder successfully generates S0T0 and S1T0 delta frames. + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + EXPECT_THAT(frames, SizeIs(3)); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3Test, SkipS1T1FrameKeepsStructureValid) { + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3)); + auto frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].temporal_id, 0); + + frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].temporal_id, 2); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/0)); + frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(1)); + EXPECT_EQ(frames[0].temporal_id, 1); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3)); + // Rely on checks inside GenerateFrames frame references are valid. + frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].temporal_id, 2); +} + +TEST(ScalabilityStructureL3T3Test, SkipT1FrameByEncoderKeepsReferencesValid) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + // 1st 2 temporal units (T0 and T2) + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + // Simulate T1 frame dropped by the encoder, + // i.e. retrieve config, but skip calling OnEncodeDone. + structure.NextFrameConfig(/*restart=*/false); + // one more temporal units (T2) + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3Test, + SkippingFrameReusePreviousFrameConfiguration) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + // 1st 2 temporal units (T0 and T2) + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(6)); + ASSERT_EQ(frames[0].temporal_id, 0); + ASSERT_EQ(frames[3].temporal_id, 2); + + // Simulate a frame dropped by the encoder, + // i.e. retrieve config, but skip calling OnEncodeDone. + structure.NextFrameConfig(/*restart=*/false); + // two more temporal unit, expect temporal pattern continues + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(12)); + // Expect temporal pattern continues as if there were no dropped frames. + EXPECT_EQ(frames[6].temporal_id, 1); + EXPECT_EQ(frames[9].temporal_id, 2); +} + +TEST(ScalabilityStructureL3T3Test, SwitchSpatialLayerBeforeT1Frame) { + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/0)); + EXPECT_THAT(wrapper.GenerateFrames(1), SizeIs(1)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/2)); + auto frames = wrapper.GenerateFrames(1); + ASSERT_THAT(frames, SizeIs(1)); + EXPECT_THAT(frames[0].frame_diffs, IsEmpty()); + EXPECT_EQ(frames[0].temporal_id, 0); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.cc new file mode 100644 index 0000000000..0e6fecfae9 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.cc @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_key_svc.h" + +#include <bitset> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +constexpr int ScalabilityStructureKeySvc::kMaxNumSpatialLayers; +constexpr int ScalabilityStructureKeySvc::kMaxNumTemporalLayers; + +ScalabilityStructureKeySvc::ScalabilityStructureKeySvc(int num_spatial_layers, + int num_temporal_layers) + : num_spatial_layers_(num_spatial_layers), + num_temporal_layers_(num_temporal_layers), + active_decode_targets_( + (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) { + // There is no point to use this structure without spatial scalability. + RTC_DCHECK_GT(num_spatial_layers, 1); + RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers); + RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers); +} + +ScalabilityStructureKeySvc::~ScalabilityStructureKeySvc() = default; + +ScalableVideoController::StreamLayersConfig +ScalabilityStructureKeySvc::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = num_spatial_layers_; + result.num_temporal_layers = num_temporal_layers_; + result.scaling_factor_num[num_spatial_layers_ - 1] = 1; + result.scaling_factor_den[num_spatial_layers_ - 1] = 1; + for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) { + result.scaling_factor_num[sid - 1] = 1; + result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid]; + } + result.uses_reference_scaling = true; + return result; +} + +bool ScalabilityStructureKeySvc::TemporalLayerIsActive(int tid) const { + if (tid >= num_temporal_layers_) { + return false; + } + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (DecodeTargetIsActive(sid, tid)) { + return true; + } + } + return false; +} + +DecodeTargetIndication ScalabilityStructureKeySvc::Dti( + int sid, + int tid, + const LayerFrameConfig& config) { + if (config.IsKeyframe() || config.Id() == kKey) { + RTC_DCHECK_EQ(config.TemporalId(), 0); + return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent + : DecodeTargetIndication::kSwitch; + } + + if (sid != config.SpatialId() || tid < config.TemporalId()) { + return DecodeTargetIndication::kNotPresent; + } + if (tid == config.TemporalId() && tid > 0) { + return DecodeTargetIndication::kDiscardable; + } + return DecodeTargetIndication::kSwitch; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureKeySvc::KeyframeConfig() { + std::vector<LayerFrameConfig> configs; + configs.reserve(num_spatial_layers_); + absl::optional<int> spatial_dependency_buffer_id; + spatial_id_is_enabled_.reset(); + // Disallow temporal references cross T0 on higher temporal layers. + can_reference_t1_frame_for_spatial_id_.reset(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/0)) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(kKey).S(sid).T(0); + + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } else { + config.Keyframe(); + } + config.Update(BufferIndex(sid, /*tid=*/0)); + + spatial_id_is_enabled_.set(sid); + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0); + } + return configs; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureKeySvc::T0Config() { + std::vector<LayerFrameConfig> configs; + configs.reserve(num_spatial_layers_); + // Disallow temporal references cross T0 on higher temporal layers. + can_reference_t1_frame_for_spatial_id_.reset(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/0)) { + spatial_id_is_enabled_.reset(sid); + continue; + } + configs.emplace_back(); + configs.back().Id(kDeltaT0).S(sid).T(0).ReferenceAndUpdate( + BufferIndex(sid, /*tid=*/0)); + } + return configs; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureKeySvc::T1Config() { + std::vector<LayerFrameConfig> configs; + configs.reserve(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/1)) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(kDeltaT1).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0)); + if (num_temporal_layers_ > 2) { + config.Update(BufferIndex(sid, /*tid=*/1)); + } + } + return configs; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureKeySvc::T2Config(FramePattern pattern) { + std::vector<LayerFrameConfig> configs; + configs.reserve(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/2)) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(pattern).S(sid).T(2); + if (can_reference_t1_frame_for_spatial_id_[sid]) { + config.Reference(BufferIndex(sid, /*tid=*/1)); + } else { + config.Reference(BufferIndex(sid, /*tid=*/0)); + } + } + return configs; +} + +ScalabilityStructureKeySvc::FramePattern +ScalabilityStructureKeySvc::NextPattern(FramePattern last_pattern) const { + switch (last_pattern) { + case kNone: + return kKey; + case kDeltaT2B: + return kDeltaT0; + case kDeltaT2A: + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + case kDeltaT1: + if (TemporalLayerIsActive(2)) { + return kDeltaT2B; + } + return kDeltaT0; + case kDeltaT0: + case kKey: + if (TemporalLayerIsActive(2)) { + return kDeltaT2A; + } + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + } + RTC_DCHECK_NOTREACHED(); + return kNone; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureKeySvc::NextFrameConfig(bool restart) { + if (active_decode_targets_.none()) { + last_pattern_ = kNone; + return {}; + } + + if (restart) { + last_pattern_ = kNone; + } + + FramePattern current_pattern = NextPattern(last_pattern_); + switch (current_pattern) { + case kKey: + return KeyframeConfig(); + case kDeltaT0: + return T0Config(); + case kDeltaT1: + return T1Config(); + case kDeltaT2A: + case kDeltaT2B: + return T2Config(current_pattern); + case kNone: + break; + } + RTC_DCHECK_NOTREACHED(); + return {}; +} + +GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone( + const LayerFrameConfig& config) { + // When encoder drops all frames for a temporal unit, it is better to reuse + // old temporal pattern rather than switch to next one, thus switch to next + // pattern defered here from the `NextFrameConfig`. + // In particular creating VP9 references rely on this behavior. + last_pattern_ = static_cast<FramePattern>(config.Id()); + if (config.TemporalId() == 1) { + can_reference_t1_frame_for_spatial_id_.set(config.SpatialId()); + } + + GenericFrameInfo frame_info; + frame_info.spatial_id = config.SpatialId(); + frame_info.temporal_id = config.TemporalId(); + frame_info.encoder_buffers = config.Buffers(); + frame_info.decode_target_indications.reserve(num_spatial_layers_ * + num_temporal_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + frame_info.decode_target_indications.push_back(Dti(sid, tid, config)); + } + } + frame_info.part_of_chain.assign(num_spatial_layers_, false); + if (config.IsKeyframe() || config.Id() == kKey) { + RTC_DCHECK_EQ(config.TemporalId(), 0); + for (int sid = config.SpatialId(); sid < num_spatial_layers_; ++sid) { + frame_info.part_of_chain[sid] = true; + } + } else if (config.TemporalId() == 0) { + frame_info.part_of_chain[config.SpatialId()] = true; + } + frame_info.active_decode_targets = active_decode_targets_; + return frame_info; +} + +void ScalabilityStructureKeySvc::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Enable/disable spatial layers independetely. + bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0; + SetDecodeTargetIsActive(sid, /*tid=*/0, active); + if (!spatial_id_is_enabled_[sid] && active) { + // Key frame is required to reenable any spatial layer. + last_pattern_ = kNone; + } + + for (int tid = 1; tid < num_temporal_layers_; ++tid) { + // To enable temporal layer, require bitrates for lower temporal layers. + active = active && bitrates.GetBitrate(sid, tid) > 0; + SetDecodeTargetIsActive(sid, tid, active); + } + } +} + +ScalabilityStructureL2T1Key::~ScalabilityStructureL2T1Key() = default; + +FrameDependencyStructure ScalabilityStructureL2T1Key::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 2; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 1}; + structure.templates.resize(4); + structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2}); + structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0}); + structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2}); + structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1}); + return structure; +} + +ScalabilityStructureL2T2Key::~ScalabilityStructureL2T2Key() = default; + +FrameDependencyStructure ScalabilityStructureL2T2Key::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 4; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 1, 1}; + structure.templates.resize(6); + auto& templates = structure.templates; + templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0}); + templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4}); + templates[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2}); + templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1}); + templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4}); + templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2}); + return structure; +} + +ScalabilityStructureL2T3Key::~ScalabilityStructureL2T3Key() = default; + +FrameDependencyStructure ScalabilityStructureL2T3Key::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 6; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1}; + auto& templates = structure.templates; + templates.resize(10); + templates[0].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0}); + templates[1].S(0).T(0).Dtis("SSS---").ChainDiffs({8, 7}).FrameDiffs({8}); + templates[2].S(0).T(1).Dtis("-DS---").ChainDiffs({4, 3}).FrameDiffs({4}); + templates[3].S(0).T(2).Dtis("--D---").ChainDiffs({2, 1}).FrameDiffs({2}); + templates[4].S(0).T(2).Dtis("--D---").ChainDiffs({6, 5}).FrameDiffs({2}); + templates[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1}); + templates[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 8}).FrameDiffs({8}); + templates[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4}); + templates[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2}); + templates[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2}); + return structure; +} + +ScalabilityStructureL3T1Key::~ScalabilityStructureL3T1Key() = default; + +FrameDependencyStructure ScalabilityStructureL3T1Key::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 3; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 1, 2}; + auto& t = structure.templates; + t.resize(6); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. + t[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0}); + t[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); + t[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1}); + t[0].S(0).Dtis("S--").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[2].S(1).Dtis("-S-").ChainDiffs({1, 3, 2}).FrameDiffs({3}); + t[4].S(2).Dtis("--S").ChainDiffs({2, 1, 3}).FrameDiffs({3}); + return structure; +} + +ScalabilityStructureL3T2Key::~ScalabilityStructureL3T2Key() = default; + +FrameDependencyStructure ScalabilityStructureL3T2Key::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 6; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2}; + auto& t = structure.templates; + t.resize(9); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. + t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0}); + t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); + t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1}); + t[2].S(0).T(1).Dtis("-D----").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[5].S(1).T(1).Dtis("---D--").ChainDiffs({4, 3, 2}).FrameDiffs({3}); + t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3}); + t[0].S(0).T(0).Dtis("SS----").ChainDiffs({6, 5, 4}).FrameDiffs({6}); + t[3].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 6, 5}).FrameDiffs({6}); + t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 6}).FrameDiffs({6}); + return structure; +} + +ScalabilityStructureL3T3Key::~ScalabilityStructureL3T3Key() = default; + +FrameDependencyStructure ScalabilityStructureL3T3Key::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 9; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2}; + auto& t = structure.templates; + t.resize(15); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. Indexes are written in hex for nicer alignment. + t[0x0].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0}); + t[0x5].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1}); + t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1}); + t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3}); + t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3}); + t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6}); + t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6}); + t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6}); + t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3}); + t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3}); + t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3}); + t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12}); + t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12}); + t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12}); + return structure; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.h b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.h new file mode 100644 index 0000000000..54760da431 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_KEY_SVC_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_KEY_SVC_H_ + +#include <bitset> +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +class ScalabilityStructureKeySvc : public ScalableVideoController { + public: + ScalabilityStructureKeySvc(int num_spatial_layers, int num_temporal_layers); + ~ScalabilityStructureKeySvc() override; + + StreamLayersConfig StreamConfig() const override; + + std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override; + GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + + private: + enum FramePattern : int { + kNone, + kKey, + kDeltaT0, + kDeltaT2A, + kDeltaT1, + kDeltaT2B, + }; + static constexpr int kMaxNumSpatialLayers = 3; + static constexpr int kMaxNumTemporalLayers = 3; + + // Index of the buffer to store last frame for layer (`sid`, `tid`) + int BufferIndex(int sid, int tid) const { + return tid * num_spatial_layers_ + sid; + } + bool DecodeTargetIsActive(int sid, int tid) const { + return active_decode_targets_[sid * num_temporal_layers_ + tid]; + } + void SetDecodeTargetIsActive(int sid, int tid, bool value) { + active_decode_targets_.set(sid * num_temporal_layers_ + tid, value); + } + bool TemporalLayerIsActive(int tid) const; + static DecodeTargetIndication Dti(int sid, + int tid, + const LayerFrameConfig& config); + + std::vector<LayerFrameConfig> KeyframeConfig(); + std::vector<LayerFrameConfig> T0Config(); + std::vector<LayerFrameConfig> T1Config(); + std::vector<LayerFrameConfig> T2Config(FramePattern pattern); + + FramePattern NextPattern(FramePattern last_pattern) const; + + const int num_spatial_layers_; + const int num_temporal_layers_; + + FramePattern last_pattern_ = kNone; + std::bitset<kMaxNumSpatialLayers> spatial_id_is_enabled_; + std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_; + std::bitset<32> active_decode_targets_; +}; + +// S1 0--0--0- +// | ... +// S0 0--0--0- +class ScalabilityStructureL2T1Key : public ScalabilityStructureKeySvc { + public: + ScalabilityStructureL2T1Key() : ScalabilityStructureKeySvc(2, 1) {} + ~ScalabilityStructureL2T1Key() override; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// S1T1 0 0 +// / / / +// S1T0 0---0---0 +// | ... +// S0T1 | 0 0 +// |/ / / +// S0T0 0---0---0 +// Time-> 0 1 2 3 4 +class ScalabilityStructureL2T2Key : public ScalabilityStructureKeySvc { + public: + ScalabilityStructureL2T2Key() : ScalabilityStructureKeySvc(2, 2) {} + ~ScalabilityStructureL2T2Key() override; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureL2T3Key : public ScalabilityStructureKeySvc { + public: + ScalabilityStructureL2T3Key() : ScalabilityStructureKeySvc(2, 3) {} + ~ScalabilityStructureL2T3Key() override; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureL3T1Key : public ScalabilityStructureKeySvc { + public: + ScalabilityStructureL3T1Key() : ScalabilityStructureKeySvc(3, 1) {} + ~ScalabilityStructureL3T1Key() override; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureL3T2Key : public ScalabilityStructureKeySvc { + public: + ScalabilityStructureL3T2Key() : ScalabilityStructureKeySvc(3, 2) {} + ~ScalabilityStructureL3T2Key() override; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureL3T3Key : public ScalabilityStructureKeySvc { + public: + ScalabilityStructureL3T3Key() : ScalabilityStructureKeySvc(3, 3) {} + ~ScalabilityStructureL3T3Key() override; + + FrameDependencyStructure DependencyStructure() const override; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_KEY_SVC_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc new file mode 100644 index 0000000000..5f923bb487 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_key_svc.h" + +#include <vector> + +#include "api/array_view.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalability_structure_test_helpers.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::ElementsAre; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +TEST(ScalabilityStructureL3T3KeyTest, + SkipingT1FrameOnOneSpatialLayerKeepsStructureValid) { + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3)); + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + EXPECT_THAT(frames, SizeIs(4)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/1)); + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + EXPECT_THAT(frames, SizeIs(5)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3)); + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + ASSERT_THAT(frames, SizeIs(7)); + + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[1].temporal_id, 0); + EXPECT_EQ(frames[2].temporal_id, 2); + EXPECT_EQ(frames[3].temporal_id, 2); + EXPECT_EQ(frames[4].temporal_id, 1); + EXPECT_EQ(frames[5].temporal_id, 2); + EXPECT_EQ(frames[6].temporal_id, 2); + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3KeyTest, + SkipT1FrameByEncoderKeepsReferencesValid) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + + // 1st 2 temporal units (T0 and T2) + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + // Simulate T1 frame dropped by the encoder, + // i.e. retrieve config, but skip calling OnEncodeDone. + structure.NextFrameConfig(/*restart=*/false); + // one more temporal unit. + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + + EXPECT_THAT(frames, SizeIs(9)); + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3KeyTest, + SkippingFrameReusePreviousFrameConfiguration) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + + // 1st 2 temporal units (T0 and T2) + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(6)); + ASSERT_EQ(frames[0].temporal_id, 0); + ASSERT_EQ(frames[3].temporal_id, 2); + + // Simulate a frame dropped by the encoder, + // i.e. retrieve config, but skip calling OnEncodeDone. + structure.NextFrameConfig(/*restart=*/false); + // two more temporal unit, expect temporal pattern continues + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(12)); + // Expect temporal pattern continues as if there were no dropped frames. + EXPECT_EQ(frames[6].temporal_id, 1); + EXPECT_EQ(frames[9].temporal_id, 2); +} + +TEST(ScalabilityStructureL3T3KeyTest, SkippingKeyFrameTriggersNewKeyFrame) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + + // Ask for a key frame config, but do not return any frames + structure.NextFrameConfig(/*restart=*/false); + + // Ask for more frames, expect they start with a key frame. + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(6)); + ASSERT_EQ(frames[0].temporal_id, 0); + ASSERT_EQ(frames[3].temporal_id, 2); + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3KeyTest, + SkippingT2FrameAndDisablingT2LayerProduceT1AsNextFrame) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + // Ask for next (T2) frame config, but do not return any frames + auto config = structure.NextFrameConfig(/*restart=*/false); + ASSERT_THAT(config, Not(IsEmpty())); + ASSERT_EQ(config.front().TemporalId(), 2); + + // Disable T2 layer, + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2, /*s2=*/2)); + // Expect instead of reusing unused config, T1 config is generated. + config = structure.NextFrameConfig(/*restart=*/false); + ASSERT_THAT(config, Not(IsEmpty())); + EXPECT_EQ(config.front().TemporalId(), 1); +} + +TEST(ScalabilityStructureL3T3KeyTest, EnableT2LayerWhileProducingT1Frame) { + std::vector<GenericFrameInfo> frames; + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + + // Disable T2 layer, + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2, /*s2=*/2)); + + // Generate the key frame. + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + ASSERT_THAT(frames, SizeIs(3)); + EXPECT_EQ(frames[0].temporal_id, 0); + + // Ask for next (T1) frame config, but do not return any frames yet. + auto config = structure.NextFrameConfig(/*restart=*/false); + ASSERT_THAT(config, Not(IsEmpty())); + ASSERT_EQ(config.front().TemporalId(), 1); + + // Reenable T2 layer. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3, /*s2=*/3)); + + // Finish encoding previously requested config. + for (auto layer_config : config) { + GenericFrameInfo info = structure.OnEncodeDone(layer_config); + EXPECT_EQ(info.temporal_id, 1); + frames.push_back(info); + } + ASSERT_THAT(frames, SizeIs(6)); + + // Generate more frames, expect T2 pattern resumes. + wrapper.GenerateFrames(/*num_temporal_units=*/4, frames); + ASSERT_THAT(frames, SizeIs(18)); + EXPECT_EQ(frames[6].temporal_id, 2); + EXPECT_EQ(frames[9].temporal_id, 0); + EXPECT_EQ(frames[12].temporal_id, 2); + EXPECT_EQ(frames[15].temporal_id, 1); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3KeyTest, + ReenablingSpatialLayerBeforeMissedT0FrameDoesntTriggerAKeyFrame) { + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2)); + wrapper.GenerateFrames(1, frames); + EXPECT_THAT(frames, SizeIs(2)); + // Drop a spatial layer. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/0)); + wrapper.GenerateFrames(1, frames); + EXPECT_THAT(frames, SizeIs(3)); + // Reenable a spatial layer before T0 frame is encoded. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2)); + wrapper.GenerateFrames(1, frames); + EXPECT_THAT(frames, SizeIs(5)); + + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[1].temporal_id, 0); + EXPECT_EQ(frames[2].temporal_id, 1); + EXPECT_EQ(frames[3].temporal_id, 0); + EXPECT_EQ(frames[4].temporal_id, 0); + EXPECT_THAT(frames[3].frame_diffs, SizeIs(1)); + EXPECT_THAT(frames[4].frame_diffs, SizeIs(1)); + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL3T3KeyTest, ReenablingSpatialLayerTriggersKeyFrame) { + ScalabilityStructureL3T3Key structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + // Start with all spatial layers enabled. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2, /*s2=*/2)); + wrapper.GenerateFrames(3, frames); + EXPECT_THAT(frames, SizeIs(9)); + // Drop a spatial layer. Two remaining spatial layers should just continue. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/0, /*s2=*/2)); + wrapper.GenerateFrames(2, frames); + EXPECT_THAT(frames, SizeIs(13)); + // Reenable spatial layer, expect a full restart. + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2, /*s2=*/2)); + wrapper.GenerateFrames(1, frames); + ASSERT_THAT(frames, SizeIs(16)); + + // First 3 temporal units with all spatial layers enabled. + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[3].temporal_id, 1); + EXPECT_EQ(frames[6].temporal_id, 0); + // 2 temporal units with spatial layer 1 disabled. + EXPECT_EQ(frames[9].spatial_id, 0); + EXPECT_EQ(frames[9].temporal_id, 1); + EXPECT_EQ(frames[10].spatial_id, 2); + EXPECT_EQ(frames[10].temporal_id, 1); + // T0 frames were encoded while spatial layer 1 is disabled. + EXPECT_EQ(frames[11].spatial_id, 0); + EXPECT_EQ(frames[11].temporal_id, 0); + EXPECT_EQ(frames[12].spatial_id, 2); + EXPECT_EQ(frames[12].temporal_id, 0); + // Key frame to reenable spatial layer 1. + EXPECT_THAT(frames[13].frame_diffs, IsEmpty()); + EXPECT_THAT(frames[14].frame_diffs, ElementsAre(1)); + EXPECT_THAT(frames[15].frame_diffs, ElementsAre(1)); + EXPECT_EQ(frames[13].temporal_id, 0); + EXPECT_EQ(frames[14].temporal_id, 0); + EXPECT_EQ(frames[15].temporal_id, 0); + auto all_frames = rtc::MakeArrayView(frames.data(), frames.size()); + EXPECT_TRUE(wrapper.FrameReferencesAreValid(all_frames.subview(0, 13))); + // Frames starting from the frame#13 should not reference any earlier frames. + EXPECT_TRUE(wrapper.FrameReferencesAreValid(all_frames.subview(13))); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.cc new file mode 100644 index 0000000000..4d15942d3e --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.cc @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_l2t2_key_shift.h" + +#include <utility> +#include <vector> + +#include "absl/base/macros.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +DecodeTargetIndication +Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) { + if (config.IsKeyframe()) { + RTC_DCHECK_EQ(config.TemporalId(), 0); + return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent + : DecodeTargetIndication::kSwitch; + } + + if (sid != config.SpatialId() || tid < config.TemporalId()) { + return DecodeTargetIndication::kNotPresent; + } + if (tid == config.TemporalId() && tid > 0) { + return DecodeTargetIndication::kDiscardable; + } + return DecodeTargetIndication::kSwitch; +} + +} // namespace + +constexpr int ScalabilityStructureL2T2KeyShift::kNumSpatialLayers; +constexpr int ScalabilityStructureL2T2KeyShift::kNumTemporalLayers; + +ScalabilityStructureL2T2KeyShift::~ScalabilityStructureL2T2KeyShift() = default; + +ScalableVideoController::StreamLayersConfig +ScalabilityStructureL2T2KeyShift::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = 2; + result.num_temporal_layers = 2; + result.scaling_factor_num[0] = 1; + result.scaling_factor_den[0] = 2; + result.uses_reference_scaling = true; + return result; +} + +FrameDependencyStructure ScalabilityStructureL2T2KeyShift::DependencyStructure() + const { + FrameDependencyStructure structure; + structure.num_decode_targets = 4; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 1, 1}; + structure.templates.resize(7); + auto& templates = structure.templates; + templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0}); + templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({2, 1}).FrameDiffs({2}); + templates[2].S(0).T(0).Dtis("SS--").ChainDiffs({4, 1}).FrameDiffs({4}); + templates[3].S(0).T(1).Dtis("-D--").ChainDiffs({2, 3}).FrameDiffs({2}); + templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1}); + templates[5].S(1).T(0).Dtis("--SS").ChainDiffs({3, 4}).FrameDiffs({4}); + templates[6].S(1).T(1).Dtis("---D").ChainDiffs({1, 2}).FrameDiffs({2}); + return structure; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureL2T2KeyShift::NextFrameConfig(bool restart) { + std::vector<LayerFrameConfig> configs; + configs.reserve(2); + if (restart) { + next_pattern_ = kKey; + } + + // Buffer0 keeps latest S0T0 frame, + // Buffer1 keeps latest S1T0 frame. + switch (next_pattern_) { + case kKey: + if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) { + configs.emplace_back(); + configs.back().S(0).T(0).Update(0).Keyframe(); + } + if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/0)) { + configs.emplace_back(); + configs.back().S(1).T(0).Update(1); + if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) { + configs.back().Reference(0); + } else { + configs.back().Keyframe(); + } + } + next_pattern_ = kDelta0; + break; + case kDelta0: + if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) { + configs.emplace_back(); + configs.back().S(0).T(0).ReferenceAndUpdate(0); + } + if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/1)) { + configs.emplace_back(); + configs.back().S(1).T(1).Reference(1); + } + if (configs.empty() && DecodeTargetIsActive(/*sid=*/1, /*tid=*/0)) { + configs.emplace_back(); + configs.back().S(1).T(0).ReferenceAndUpdate(1); + } + next_pattern_ = kDelta1; + break; + case kDelta1: + if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/1)) { + configs.emplace_back(); + configs.back().S(0).T(1).Reference(0); + } + if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/0)) { + configs.emplace_back(); + configs.back().S(1).T(0).ReferenceAndUpdate(1); + } + if (configs.empty() && DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) { + configs.emplace_back(); + configs.back().S(0).T(0).ReferenceAndUpdate(0); + } + next_pattern_ = kDelta0; + break; + } + + RTC_DCHECK(!configs.empty() || active_decode_targets_.none()); + return configs; +} + +GenericFrameInfo ScalabilityStructureL2T2KeyShift::OnEncodeDone( + const LayerFrameConfig& config) { + GenericFrameInfo frame_info; + frame_info.spatial_id = config.SpatialId(); + frame_info.temporal_id = config.TemporalId(); + frame_info.encoder_buffers = config.Buffers(); + for (int sid = 0; sid < kNumSpatialLayers; ++sid) { + for (int tid = 0; tid < kNumTemporalLayers; ++tid) { + frame_info.decode_target_indications.push_back(Dti(sid, tid, config)); + } + } + if (config.IsKeyframe()) { + frame_info.part_of_chain = {true, true}; + } else if (config.TemporalId() == 0) { + frame_info.part_of_chain = {config.SpatialId() == 0, + config.SpatialId() == 1}; + } else { + frame_info.part_of_chain = {false, false}; + } + return frame_info; +} + +void ScalabilityStructureL2T2KeyShift::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + for (int sid = 0; sid < kNumSpatialLayers; ++sid) { + // Enable/disable spatial layers independetely. + bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0; + if (!DecodeTargetIsActive(sid, /*tid=*/0) && active) { + // Key frame is required to reenable any spatial layer. + next_pattern_ = kKey; + } + + SetDecodeTargetIsActive(sid, /*tid=*/0, active); + SetDecodeTargetIsActive(sid, /*tid=*/1, + active && bitrates.GetBitrate(sid, /*tid=*/1) > 0); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.h b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.h new file mode 100644 index 0000000000..26d1afcb29 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_L2T2_KEY_SHIFT_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_L2T2_KEY_SHIFT_H_ + +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +// S1T1 0 0 +// / / / +// S1T0 0---0---0 +// | ... +// S0T1 | 0 0 +// | / / +// S0T0 0-0---0-- +// Time-> 0 1 2 3 4 +class ScalabilityStructureL2T2KeyShift : public ScalableVideoController { + public: + ~ScalabilityStructureL2T2KeyShift() override; + + StreamLayersConfig StreamConfig() const override; + FrameDependencyStructure DependencyStructure() const override; + + std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override; + GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + + private: + enum FramePattern { + kKey, + kDelta0, + kDelta1, + }; + + static constexpr int kNumSpatialLayers = 2; + static constexpr int kNumTemporalLayers = 2; + + bool DecodeTargetIsActive(int sid, int tid) const { + return active_decode_targets_[sid * kNumTemporalLayers + tid]; + } + void SetDecodeTargetIsActive(int sid, int tid, bool value) { + active_decode_targets_.set(sid * kNumTemporalLayers + tid, value); + } + + FramePattern next_pattern_ = kKey; + std::bitset<32> active_decode_targets_ = 0b1111; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_L2T2_KEY_SHIFT_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift_unittest.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift_unittest.cc new file mode 100644 index 0000000000..40fecf1812 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift_unittest.cc @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_l2t2_key_shift.h" + +#include <vector> + +#include "api/array_view.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalability_structure_test_helpers.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::ElementsAre; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +// S1T1 3 7 +// / / +// S1T0 1---5---9 +// | +// S0T1 | 4 8 +// | / / +// S0T0 0-2---6 +// Time-> 0 1 2 3 4 +TEST(ScalabilityStructureL2T2KeyShiftTest, DecodeTargetsAreEnabledByDefault) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + wrapper.GenerateFrames(/*num_temporal_units=*/5, frames); + ASSERT_THAT(frames, SizeIs(10)); + + EXPECT_EQ(frames[0].spatial_id, 0); + EXPECT_EQ(frames[1].spatial_id, 1); + EXPECT_EQ(frames[2].spatial_id, 0); + EXPECT_EQ(frames[3].spatial_id, 1); + EXPECT_EQ(frames[4].spatial_id, 0); + EXPECT_EQ(frames[5].spatial_id, 1); + EXPECT_EQ(frames[6].spatial_id, 0); + EXPECT_EQ(frames[7].spatial_id, 1); + EXPECT_EQ(frames[8].spatial_id, 0); + EXPECT_EQ(frames[9].spatial_id, 1); + + // spatial_id = 0 has the temporal shift. + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[2].temporal_id, 0); + EXPECT_EQ(frames[4].temporal_id, 1); + EXPECT_EQ(frames[6].temporal_id, 0); + EXPECT_EQ(frames[8].temporal_id, 1); + + // spatial_id = 1 hasn't temporal shift. + EXPECT_EQ(frames[1].temporal_id, 0); + EXPECT_EQ(frames[3].temporal_id, 1); + EXPECT_EQ(frames[5].temporal_id, 0); + EXPECT_EQ(frames[7].temporal_id, 1); + EXPECT_EQ(frames[9].temporal_id, 0); + + // Key frame diff. + EXPECT_THAT(frames[0].frame_diffs, IsEmpty()); + EXPECT_THAT(frames[1].frame_diffs, ElementsAre(1)); + // S0T0 frame diffs + EXPECT_THAT(frames[2].frame_diffs, ElementsAre(2)); + EXPECT_THAT(frames[6].frame_diffs, ElementsAre(4)); + // S1T0 frame diffs + EXPECT_THAT(frames[5].frame_diffs, ElementsAre(4)); + EXPECT_THAT(frames[9].frame_diffs, ElementsAre(4)); + // T1 frames refer T0 frame of same spatial layer which is 2 frame ids away. + EXPECT_THAT(frames[3].frame_diffs, ElementsAre(2)); + EXPECT_THAT(frames[4].frame_diffs, ElementsAre(2)); + EXPECT_THAT(frames[7].frame_diffs, ElementsAre(2)); + EXPECT_THAT(frames[8].frame_diffs, ElementsAre(2)); +} + +// S1T0 1---4---7 +// | +// S0T1 | 3 6 +// | / / +// S0T0 0-2---5-- +// Time-> 0 1 2 3 4 +TEST(ScalabilityStructureL2T2KeyShiftTest, DisableS1T1Layer) { + ScalabilityStructureL2T2KeyShift structure; + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/1)); + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + wrapper.GenerateFrames(/*num_temporal_units=*/5, frames); + ASSERT_THAT(frames, SizeIs(8)); + + EXPECT_EQ(frames[0].spatial_id, 0); + EXPECT_EQ(frames[1].spatial_id, 1); + EXPECT_EQ(frames[2].spatial_id, 0); + EXPECT_EQ(frames[3].spatial_id, 0); + EXPECT_EQ(frames[4].spatial_id, 1); + EXPECT_EQ(frames[5].spatial_id, 0); + EXPECT_EQ(frames[6].spatial_id, 0); + EXPECT_EQ(frames[7].spatial_id, 1); + + // spatial_id = 0 has the temporal shift. + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[2].temporal_id, 0); + EXPECT_EQ(frames[3].temporal_id, 1); + EXPECT_EQ(frames[5].temporal_id, 0); + EXPECT_EQ(frames[6].temporal_id, 1); + + // spatial_id = 1 has single temporal layer. + EXPECT_EQ(frames[1].temporal_id, 0); + EXPECT_EQ(frames[4].temporal_id, 0); + EXPECT_EQ(frames[5].temporal_id, 0); +} + +// S1T1 3 | +// / | +// S1T0 1---5+--7 +// | | +// S0T1 | 4| +// | / | +// S0T0 0-2--+6---8 +// Time-> 0 1 2 3 4 5 +TEST(ScalabilityStructureL2T2KeyShiftTest, DisableT1LayersAfterFewFrames) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + EXPECT_THAT(frames, SizeIs(6)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/1, /*s1=*/1)); + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + ASSERT_THAT(frames, SizeIs(9)); + + // Skip validation before T1 was disabled as that is covered by the test + // where no layers are disabled. + EXPECT_EQ(frames[6].spatial_id, 0); + EXPECT_EQ(frames[7].spatial_id, 1); + EXPECT_EQ(frames[8].spatial_id, 0); + + EXPECT_EQ(frames[6].temporal_id, 0); + EXPECT_EQ(frames[7].temporal_id, 0); + EXPECT_EQ(frames[8].temporal_id, 0); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +// S1T1 1 3 +// / / +// S1T0 0---2 +// Time-> 0 1 2 3 4 5 +TEST(ScalabilityStructureL2T2KeyShiftTest, DisableS0FromTheStart) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/2)); + wrapper.GenerateFrames(/*num_temporal_units=*/4, frames); + EXPECT_THAT(frames, SizeIs(4)); + + EXPECT_EQ(frames[0].spatial_id, 1); + EXPECT_EQ(frames[1].spatial_id, 1); + EXPECT_EQ(frames[2].spatial_id, 1); + EXPECT_EQ(frames[3].spatial_id, 1); + + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[1].temporal_id, 1); + EXPECT_EQ(frames[2].temporal_id, 0); + EXPECT_EQ(frames[3].temporal_id, 1); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +// S1T1 3 |6 8 +// / / / +// S1T0 1---5+--7 +// | | +// S0T1 | 4| +// | / | +// S0T0 0-2 | +// Time-> 0 1 2 3 4 5 +TEST(ScalabilityStructureL2T2KeyShiftTest, DisableS0AfterFewFrames) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + EXPECT_THAT(frames, SizeIs(6)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/2)); + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + ASSERT_THAT(frames, SizeIs(9)); + + // Expect frame[6] is delta frame. + EXPECT_THAT(frames[6].frame_diffs, ElementsAre(1)); + // Skip validation before S0 was disabled as that should be covered by + // test where no layers are disabled. + EXPECT_EQ(frames[6].spatial_id, 1); + EXPECT_EQ(frames[7].spatial_id, 1); + EXPECT_EQ(frames[8].spatial_id, 1); + + EXPECT_EQ(frames[6].temporal_id, 1); + EXPECT_EQ(frames[7].temporal_id, 0); + EXPECT_EQ(frames[8].temporal_id, 1); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +// S1T1 3| | 8 +// / | | / +// S1T0 1 | |6 +// | | || +// S0T1 | |4|| +// | / || +// S0T0 0-2| |5-7 +// Time-> 0 1 2 3 4 5 +TEST(ScalabilityStructureL2T2KeyShiftTest, ReenableS1TriggersKeyFrame) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + EXPECT_THAT(frames, SizeIs(4)); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/0)); + wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); + EXPECT_THAT(frames, SizeIs(5)); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2)); + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(9)); + + EXPECT_THAT(frames[4].spatial_id, 0); + EXPECT_THAT(frames[4].temporal_id, 1); + + // Expect frame[5] to be a key frame. + EXPECT_TRUE(wrapper.FrameReferencesAreValid( + rtc::MakeArrayView(frames.data() + 5, 4))); + + EXPECT_THAT(frames[5].spatial_id, 0); + EXPECT_THAT(frames[6].spatial_id, 1); + EXPECT_THAT(frames[7].spatial_id, 0); + EXPECT_THAT(frames[8].spatial_id, 1); + + // S0 should do temporal shift after the key frame. + EXPECT_THAT(frames[5].temporal_id, 0); + EXPECT_THAT(frames[7].temporal_id, 0); + + // No temporal shift for the top spatial layer. + EXPECT_THAT(frames[6].temporal_id, 0); + EXPECT_THAT(frames[8].temporal_id, 1); +} + +TEST(ScalabilityStructureL2T2KeyShiftTest, EnableOnlyS0T0FromTheStart) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/1, /*s1=*/0)); + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + ASSERT_THAT(frames, SizeIs(3)); + + EXPECT_EQ(frames[0].spatial_id, 0); + EXPECT_EQ(frames[1].spatial_id, 0); + EXPECT_EQ(frames[2].spatial_id, 0); + + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[1].temporal_id, 0); + EXPECT_EQ(frames[2].temporal_id, 0); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +// S1T1 3| +// / | +// S1T0 1 | +// | | +// S0T1 | | +// | | +// S0T0 0-2+4-5-6 +// Time-> 0 1 2 3 4 +TEST(ScalabilityStructureL2T2KeyShiftTest, EnableOnlyS0T0AfterFewFrames) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + EXPECT_THAT(frames, SizeIs(4)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/1, /*s1=*/0)); + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + ASSERT_THAT(frames, SizeIs(7)); + + EXPECT_EQ(frames[4].spatial_id, 0); + EXPECT_EQ(frames[5].spatial_id, 0); + EXPECT_EQ(frames[6].spatial_id, 0); + + EXPECT_EQ(frames[4].temporal_id, 0); + EXPECT_EQ(frames[5].temporal_id, 0); + EXPECT_EQ(frames[6].temporal_id, 0); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +TEST(ScalabilityStructureL2T2KeyShiftTest, EnableOnlyS1T0FromTheStart) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/1)); + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + ASSERT_THAT(frames, SizeIs(3)); + + EXPECT_EQ(frames[0].spatial_id, 1); + EXPECT_EQ(frames[1].spatial_id, 1); + EXPECT_EQ(frames[2].spatial_id, 1); + + EXPECT_EQ(frames[0].temporal_id, 0); + EXPECT_EQ(frames[1].temporal_id, 0); + EXPECT_EQ(frames[2].temporal_id, 0); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +// S1T1 3| +// / | +// S1T0 1--+4-5-6 +// | | +// S0T1 | | +// | | +// S0T0 0-2| +// Time-> 0 1 2 3 4 +TEST(ScalabilityStructureL2T2KeyShiftTest, EnableOnlyS1T0AfterFewFrames) { + ScalabilityStructureL2T2KeyShift structure; + ScalabilityStructureWrapper wrapper(structure); + std::vector<GenericFrameInfo> frames; + + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + EXPECT_THAT(frames, SizeIs(4)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/1)); + wrapper.GenerateFrames(/*num_temporal_units=*/3, frames); + ASSERT_THAT(frames, SizeIs(7)); + + EXPECT_EQ(frames[4].spatial_id, 1); + EXPECT_EQ(frames[5].spatial_id, 1); + EXPECT_EQ(frames[6].spatial_id, 1); + + EXPECT_EQ(frames[4].temporal_id, 0); + EXPECT_EQ(frames[5].temporal_id, 0); + EXPECT_EQ(frames[6].temporal_id, 0); + + EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.cc new file mode 100644 index 0000000000..54e27fda5c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.cc @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_simulcast.h" + +#include <utility> +#include <vector> + +#include "absl/base/macros.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +DecodeTargetIndication +Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) { + if (sid != config.SpatialId() || tid < config.TemporalId()) { + return DecodeTargetIndication::kNotPresent; + } + if (tid == 0) { + RTC_DCHECK_EQ(config.TemporalId(), 0); + return DecodeTargetIndication::kSwitch; + } + if (tid == config.TemporalId()) { + return DecodeTargetIndication::kDiscardable; + } + RTC_DCHECK_GT(tid, config.TemporalId()); + return DecodeTargetIndication::kSwitch; +} + +} // namespace + +constexpr int ScalabilityStructureSimulcast::kMaxNumSpatialLayers; +constexpr int ScalabilityStructureSimulcast::kMaxNumTemporalLayers; + +ScalabilityStructureSimulcast::ScalabilityStructureSimulcast( + int num_spatial_layers, + int num_temporal_layers, + ScalingFactor resolution_factor) + : num_spatial_layers_(num_spatial_layers), + num_temporal_layers_(num_temporal_layers), + resolution_factor_(resolution_factor), + active_decode_targets_( + (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) { + RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers); + RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers); +} + +ScalabilityStructureSimulcast::~ScalabilityStructureSimulcast() = default; + +ScalableVideoController::StreamLayersConfig +ScalabilityStructureSimulcast::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = num_spatial_layers_; + result.num_temporal_layers = num_temporal_layers_; + result.scaling_factor_num[num_spatial_layers_ - 1] = 1; + result.scaling_factor_den[num_spatial_layers_ - 1] = 1; + for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) { + result.scaling_factor_num[sid - 1] = + resolution_factor_.num * result.scaling_factor_num[sid]; + result.scaling_factor_den[sid - 1] = + resolution_factor_.den * result.scaling_factor_den[sid]; + } + result.uses_reference_scaling = false; + return result; +} + +bool ScalabilityStructureSimulcast::TemporalLayerIsActive(int tid) const { + if (tid >= num_temporal_layers_) { + return false; + } + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (DecodeTargetIsActive(sid, tid)) { + return true; + } + } + return false; +} + +ScalabilityStructureSimulcast::FramePattern +ScalabilityStructureSimulcast::NextPattern() const { + switch (last_pattern_) { + case kNone: + case kDeltaT2B: + return kDeltaT0; + case kDeltaT2A: + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + case kDeltaT1: + if (TemporalLayerIsActive(2)) { + return kDeltaT2B; + } + return kDeltaT0; + case kDeltaT0: + if (TemporalLayerIsActive(2)) { + return kDeltaT2A; + } + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + } + RTC_DCHECK_NOTREACHED(); + return kDeltaT0; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalabilityStructureSimulcast::NextFrameConfig(bool restart) { + std::vector<LayerFrameConfig> configs; + if (active_decode_targets_.none()) { + last_pattern_ = kNone; + return configs; + } + configs.reserve(num_spatial_layers_); + + if (last_pattern_ == kNone || restart) { + can_reference_t0_frame_for_spatial_id_.reset(); + last_pattern_ = kNone; + } + FramePattern current_pattern = NextPattern(); + + switch (current_pattern) { + case kDeltaT0: + // Disallow temporal references cross T0 on higher temporal layers. + can_reference_t1_frame_for_spatial_id_.reset(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/0)) { + // Next frame from the spatial layer `sid` shouldn't depend on + // potentially old previous frame from the spatial layer `sid`. + can_reference_t0_frame_for_spatial_id_.reset(sid); + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(current_pattern).S(sid).T(0); + + if (can_reference_t0_frame_for_spatial_id_[sid]) { + config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); + } else { + config.Keyframe().Update(BufferIndex(sid, /*tid=*/0)); + } + can_reference_t0_frame_for_spatial_id_.set(sid); + } + break; + case kDeltaT1: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/1) || + !can_reference_t0_frame_for_spatial_id_[sid]) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(current_pattern) + .S(sid) + .T(1) + .Reference(BufferIndex(sid, /*tid=*/0)); + // Save frame only if there is a higher temporal layer that may need it. + if (num_temporal_layers_ > 2) { + config.Update(BufferIndex(sid, /*tid=*/1)); + } + } + break; + case kDeltaT2A: + case kDeltaT2B: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/2) || + !can_reference_t0_frame_for_spatial_id_[sid]) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(current_pattern).S(sid).T(2); + if (can_reference_t1_frame_for_spatial_id_[sid]) { + config.Reference(BufferIndex(sid, /*tid=*/1)); + } else { + config.Reference(BufferIndex(sid, /*tid=*/0)); + } + } + break; + case kNone: + RTC_DCHECK_NOTREACHED(); + break; + } + + return configs; +} + +GenericFrameInfo ScalabilityStructureSimulcast::OnEncodeDone( + const LayerFrameConfig& config) { + last_pattern_ = static_cast<FramePattern>(config.Id()); + if (config.TemporalId() == 1) { + can_reference_t1_frame_for_spatial_id_.set(config.SpatialId()); + } + GenericFrameInfo frame_info; + frame_info.spatial_id = config.SpatialId(); + frame_info.temporal_id = config.TemporalId(); + frame_info.encoder_buffers = config.Buffers(); + frame_info.decode_target_indications.reserve(num_spatial_layers_ * + num_temporal_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + frame_info.decode_target_indications.push_back(Dti(sid, tid, config)); + } + } + frame_info.part_of_chain.assign(num_spatial_layers_, false); + if (config.TemporalId() == 0) { + frame_info.part_of_chain[config.SpatialId()] = true; + } + frame_info.active_decode_targets = active_decode_targets_; + return frame_info; +} + +void ScalabilityStructureSimulcast::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Enable/disable spatial layers independetely. + bool active = true; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + // To enable temporal layer, require bitrates for lower temporal layers. + active = active && bitrates.GetBitrate(sid, tid) > 0; + SetDecodeTargetIsActive(sid, tid, active); + } + } +} + +FrameDependencyStructure ScalabilityStructureS2T1::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 2; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 1}; + structure.templates.resize(4); + structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2}); + structure.templates[1].S(0).Dtis("S-").ChainDiffs({0, 0}); + structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2}); + structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 0}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureS2T2::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 4; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 1, 1}; + auto& t = structure.templates; + t.resize(6); + t[1].S(0).T(0).Dtis("SS--").ChainDiffs({0, 0}); + t[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 0}); + t[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2}); + t[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2}); + t[0].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4}); + t[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureS2T3::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 6; + structure.num_chains = 2; + structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1}; + auto& t = structure.templates; + t.resize(10); + t[1].S(0).T(0).Dtis("SSS---").ChainDiffs({0, 0}); + t[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 0}); + t[3].S(0).T(2).Dtis("--D---").ChainDiffs({2, 1}).FrameDiffs({2}); + t[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2}); + t[2].S(0).T(1).Dtis("-DS---").ChainDiffs({4, 3}).FrameDiffs({4}); + t[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4}); + t[4].S(0).T(2).Dtis("--D---").ChainDiffs({6, 5}).FrameDiffs({2}); + t[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2}); + t[0].S(0).T(0).Dtis("SSS---").ChainDiffs({8, 7}).FrameDiffs({8}); + t[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 8}).FrameDiffs({8}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureS3T1::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 3; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 1, 2}; + auto& t = structure.templates; + t.resize(6); + t[1].S(0).T(0).Dtis("S--").ChainDiffs({0, 0, 0}); + t[3].S(1).T(0).Dtis("-S-").ChainDiffs({1, 0, 0}); + t[5].S(2).T(0).Dtis("--S").ChainDiffs({2, 1, 0}); + t[0].S(0).T(0).Dtis("S--").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[2].S(1).T(0).Dtis("-S-").ChainDiffs({1, 3, 2}).FrameDiffs({3}); + t[4].S(2).T(0).Dtis("--S").ChainDiffs({2, 1, 3}).FrameDiffs({3}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureS3T2::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 6; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2}; + auto& t = structure.templates; + t.resize(9); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. + t[1].S(0).T(0).Dtis("SS----").ChainDiffs({0, 0, 0}); + t[4].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 0, 0}); + t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 0}); + t[2].S(0).T(1).Dtis("-D----").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[5].S(1).T(1).Dtis("---D--").ChainDiffs({4, 3, 2}).FrameDiffs({3}); + t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3}); + t[0].S(0).T(0).Dtis("SS----").ChainDiffs({6, 5, 4}).FrameDiffs({6}); + t[3].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 6, 5}).FrameDiffs({6}); + t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 6}).FrameDiffs({6}); + return structure; +} + +FrameDependencyStructure ScalabilityStructureS3T3::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 9; + structure.num_chains = 3; + structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2}; + auto& t = structure.templates; + t.resize(15); + // Templates are shown in the order frames following them appear in the + // stream, but in `structure.templates` array templates are sorted by + // (`spatial_id`, `temporal_id`) since that is a dependency descriptor + // requirement. Indexes are written in hex for nicer alignment. + t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({0, 0, 0}); + t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 0, 0}); + t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 0}); + t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3}); + t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3}); + t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3}); + t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6}); + t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6}); + t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6}); + t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3}); + t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3}); + t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3}); + t[0x0].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12}); + t[0x5].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12}); + t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12}); + return structure; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.h b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.h new file mode 100644 index 0000000000..99be9f0d58 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_SIMULCAST_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_SIMULCAST_H_ + +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +// Scalability structure with multiple independent spatial layers each with the +// same temporal layering. +class ScalabilityStructureSimulcast : public ScalableVideoController { + public: + struct ScalingFactor { + int num = 1; + int den = 2; + }; + ScalabilityStructureSimulcast(int num_spatial_layers, + int num_temporal_layers, + ScalingFactor resolution_factor); + ~ScalabilityStructureSimulcast() override; + + StreamLayersConfig StreamConfig() const override; + std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override; + GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + + private: + enum FramePattern { + kNone, + kDeltaT2A, + kDeltaT1, + kDeltaT2B, + kDeltaT0, + }; + static constexpr int kMaxNumSpatialLayers = 3; + static constexpr int kMaxNumTemporalLayers = 3; + + // Index of the buffer to store last frame for layer (`sid`, `tid`) + int BufferIndex(int sid, int tid) const { + return tid * num_spatial_layers_ + sid; + } + bool DecodeTargetIsActive(int sid, int tid) const { + return active_decode_targets_[sid * num_temporal_layers_ + tid]; + } + void SetDecodeTargetIsActive(int sid, int tid, bool value) { + active_decode_targets_.set(sid * num_temporal_layers_ + tid, value); + } + FramePattern NextPattern() const; + bool TemporalLayerIsActive(int tid) const; + + const int num_spatial_layers_; + const int num_temporal_layers_; + const ScalingFactor resolution_factor_; + + FramePattern last_pattern_ = kNone; + std::bitset<kMaxNumSpatialLayers> can_reference_t0_frame_for_spatial_id_ = 0; + std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_ = 0; + std::bitset<32> active_decode_targets_; +}; + +// S1 0--0--0- +// ... +// S0 0--0--0- +class ScalabilityStructureS2T1 : public ScalabilityStructureSimulcast { + public: + explicit ScalabilityStructureS2T1(ScalingFactor resolution_factor = {}) + : ScalabilityStructureSimulcast(2, 1, resolution_factor) {} + ~ScalabilityStructureS2T1() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureS2T2 : public ScalabilityStructureSimulcast { + public: + explicit ScalabilityStructureS2T2(ScalingFactor resolution_factor = {}) + : ScalabilityStructureSimulcast(2, 2, resolution_factor) {} + ~ScalabilityStructureS2T2() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +// S1T2 3 7 +// | / +// S1T1 / 5 +// |_/ +// S1T0 1-------9... +// +// S0T2 2 6 +// | / +// S0T1 / 4 +// |_/ +// S0T0 0-------8... +// Time-> 0 1 2 3 4 +class ScalabilityStructureS2T3 : public ScalabilityStructureSimulcast { + public: + explicit ScalabilityStructureS2T3(ScalingFactor resolution_factor = {}) + : ScalabilityStructureSimulcast(2, 3, resolution_factor) {} + ~ScalabilityStructureS2T3() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureS3T1 : public ScalabilityStructureSimulcast { + public: + explicit ScalabilityStructureS3T1(ScalingFactor resolution_factor = {}) + : ScalabilityStructureSimulcast(3, 1, resolution_factor) {} + ~ScalabilityStructureS3T1() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureS3T2 : public ScalabilityStructureSimulcast { + public: + explicit ScalabilityStructureS3T2(ScalingFactor resolution_factor = {}) + : ScalabilityStructureSimulcast(3, 2, resolution_factor) {} + ~ScalabilityStructureS3T2() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +class ScalabilityStructureS3T3 : public ScalabilityStructureSimulcast { + public: + explicit ScalabilityStructureS3T3(ScalingFactor resolution_factor = {}) + : ScalabilityStructureSimulcast(3, 3, resolution_factor) {} + ~ScalabilityStructureS3T3() override = default; + + FrameDependencyStructure DependencyStructure() const override; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_SIMULCAST_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.cc new file mode 100644 index 0000000000..aeb4d88f1a --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalability_structure_test_helpers.h" + +#include <stdint.h> + +#include <utility> +#include <vector> + +#include "api/array_view.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "modules/video_coding/chain_diff_calculator.h" +#include "modules/video_coding/frame_dependencies_calculator.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "test/gtest.h" + +namespace webrtc { + +VideoBitrateAllocation EnableTemporalLayers(int s0, int s1, int s2) { + VideoBitrateAllocation bitrate; + for (int tid = 0; tid < s0; ++tid) { + bitrate.SetBitrate(0, tid, 1'000'000); + } + for (int tid = 0; tid < s1; ++tid) { + bitrate.SetBitrate(1, tid, 1'000'000); + } + for (int tid = 0; tid < s2; ++tid) { + bitrate.SetBitrate(2, tid, 1'000'000); + } + return bitrate; +} + +void ScalabilityStructureWrapper::GenerateFrames( + int num_temporal_units, + std::vector<GenericFrameInfo>& frames) { + for (int i = 0; i < num_temporal_units; ++i) { + for (auto& layer_frame : + structure_controller_.NextFrameConfig(/*restart=*/false)) { + int64_t frame_id = ++frame_id_; + bool is_keyframe = layer_frame.IsKeyframe(); + + GenericFrameInfo frame_info = + structure_controller_.OnEncodeDone(layer_frame); + if (is_keyframe) { + chain_diff_calculator_.Reset(frame_info.part_of_chain); + } + frame_info.chain_diffs = + chain_diff_calculator_.From(frame_id, frame_info.part_of_chain); + for (int64_t base_frame_id : frame_deps_calculator_.FromBuffersUsage( + frame_id, frame_info.encoder_buffers)) { + frame_info.frame_diffs.push_back(frame_id - base_frame_id); + } + + frames.push_back(std::move(frame_info)); + } + } +} + +bool ScalabilityStructureWrapper::FrameReferencesAreValid( + rtc::ArrayView<const GenericFrameInfo> frames) const { + bool valid = true; + // VP9 and AV1 supports up to 8 buffers. Expect no more buffers are not used. + std::bitset<8> buffer_contains_frame; + for (size_t i = 0; i < frames.size(); ++i) { + const GenericFrameInfo& frame = frames[i]; + for (const CodecBufferUsage& buffer_usage : frame.encoder_buffers) { + if (buffer_usage.id < 0 || buffer_usage.id >= 8) { + ADD_FAILURE() << "Invalid buffer id " << buffer_usage.id + << " for frame#" << i + << ". Up to 8 buffers are supported."; + valid = false; + continue; + } + if (buffer_usage.referenced && !buffer_contains_frame[buffer_usage.id]) { + ADD_FAILURE() << "buffer " << buffer_usage.id << " for frame#" << i + << " was reference before updated."; + valid = false; + } + if (buffer_usage.updated) { + buffer_contains_frame.set(buffer_usage.id); + } + } + for (int fdiff : frame.frame_diffs) { + if (fdiff <= 0 || static_cast<size_t>(fdiff) > i) { + ADD_FAILURE() << "Invalid frame diff " << fdiff << " for frame#" << i; + valid = false; + } + } + } + return valid; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.h b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.h new file mode 100644 index 0000000000..d183be4766 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_test_helpers.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_TEST_HELPERS_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_TEST_HELPERS_H_ + +#include <stdint.h> + +#include <vector> + +#include "api/array_view.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/chain_diff_calculator.h" +#include "modules/video_coding/frame_dependencies_calculator.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +// Creates bitrate allocation with non-zero bitrate for given number of temporal +// layers for each spatial layer. +VideoBitrateAllocation EnableTemporalLayers(int s0, int s1 = 0, int s2 = 0); + +class ScalabilityStructureWrapper { + public: + explicit ScalabilityStructureWrapper(ScalableVideoController& structure) + : structure_controller_(structure) {} + + std::vector<GenericFrameInfo> GenerateFrames(int num_temporal_units) { + std::vector<GenericFrameInfo> frames; + GenerateFrames(num_temporal_units, frames); + return frames; + } + void GenerateFrames(int num_temporal_units, + std::vector<GenericFrameInfo>& frames); + + // Returns false and ADD_FAILUREs for frames with invalid references. + // In particular validates no frame frame reference to frame before frames[0]. + // In error messages frames are indexed starting with 0. + bool FrameReferencesAreValid( + rtc::ArrayView<const GenericFrameInfo> frames) const; + + private: + ScalableVideoController& structure_controller_; + FrameDependenciesCalculator frame_deps_calculator_; + ChainDiffCalculator chain_diff_calculator_; + int64_t frame_id_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_TEST_HELPERS_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_unittest.cc b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_unittest.cc new file mode 100644 index 0000000000..2d517c5825 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_unittest.cc @@ -0,0 +1,395 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> +#include <stdint.h> + +#include <memory> +#include <ostream> +#include <string> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalability_structure_test_helpers.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::AllOf; +using ::testing::Contains; +using ::testing::Each; +using ::testing::ElementsAreArray; +using ::testing::Field; +using ::testing::Ge; +using ::testing::IsEmpty; +using ::testing::Le; +using ::testing::Lt; +using ::testing::Not; +using ::testing::NotNull; +using ::testing::SizeIs; +using ::testing::TestWithParam; +using ::testing::Values; + +std::string FrameDependencyTemplateToString(const FrameDependencyTemplate& t) { + rtc::StringBuilder sb; + sb << "S" << t.spatial_id << "T" << t.temporal_id; + sb << ": dtis = "; + for (const auto dtis : t.decode_target_indications) { + switch (dtis) { + case DecodeTargetIndication::kNotPresent: + sb << "-"; + break; + case DecodeTargetIndication::kDiscardable: + sb << "D"; + break; + case DecodeTargetIndication::kSwitch: + sb << "S"; + break; + case DecodeTargetIndication::kRequired: + sb << "R"; + break; + default: + sb << "?"; + break; + } + } + sb << ", frame diffs = { "; + for (int d : t.frame_diffs) { + sb << d << ", "; + } + sb << "}, chain diffs = { "; + for (int d : t.chain_diffs) { + sb << d << ", "; + } + sb << "}"; + return sb.Release(); +} + +struct SvcTestParam { + friend std::ostream& operator<<(std::ostream& os, const SvcTestParam& param) { + return os << param.name; + } + + ScalabilityMode GetScalabilityMode() const { + absl::optional<ScalabilityMode> scalability_mode = + ScalabilityModeFromString(name); + RTC_CHECK(scalability_mode.has_value()); + return *scalability_mode; + } + + std::string name; + int num_temporal_units; +}; + +class ScalabilityStructureTest : public TestWithParam<SvcTestParam> {}; + +TEST_P(ScalabilityStructureTest, + StaticConfigMatchesConfigReturnedByController) { + std::unique_ptr<ScalableVideoController> controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + absl::optional<ScalableVideoController::StreamLayersConfig> static_config = + ScalabilityStructureConfig(GetParam().GetScalabilityMode()); + ASSERT_THAT(controller, NotNull()); + ASSERT_NE(static_config, absl::nullopt); + ScalableVideoController::StreamLayersConfig config = + controller->StreamConfig(); + EXPECT_EQ(config.num_spatial_layers, static_config->num_spatial_layers); + EXPECT_EQ(config.num_temporal_layers, static_config->num_temporal_layers); + EXPECT_THAT( + rtc::MakeArrayView(config.scaling_factor_num, config.num_spatial_layers), + ElementsAreArray(static_config->scaling_factor_num, + static_config->num_spatial_layers)); + EXPECT_THAT( + rtc::MakeArrayView(config.scaling_factor_den, config.num_spatial_layers), + ElementsAreArray(static_config->scaling_factor_den, + static_config->num_spatial_layers)); +} + +TEST_P(ScalabilityStructureTest, + NumberOfDecodeTargetsAndChainsAreInRangeAndConsistent) { + FrameDependencyStructure structure = + CreateScalabilityStructure(GetParam().GetScalabilityMode()) + ->DependencyStructure(); + EXPECT_GT(structure.num_decode_targets, 0); + EXPECT_LE(structure.num_decode_targets, + DependencyDescriptor::kMaxDecodeTargets); + EXPECT_GE(structure.num_chains, 0); + EXPECT_LE(structure.num_chains, structure.num_decode_targets); + if (structure.num_chains == 0) { + EXPECT_THAT(structure.decode_target_protected_by_chain, IsEmpty()); + } else { + EXPECT_THAT(structure.decode_target_protected_by_chain, + AllOf(SizeIs(structure.num_decode_targets), Each(Ge(0)), + Each(Lt(structure.num_chains)))); + } + EXPECT_THAT(structure.templates, + SizeIs(Lt(size_t{DependencyDescriptor::kMaxTemplates}))); +} + +TEST_P(ScalabilityStructureTest, TemplatesAreSortedByLayerId) { + FrameDependencyStructure structure = + CreateScalabilityStructure(GetParam().GetScalabilityMode()) + ->DependencyStructure(); + ASSERT_THAT(structure.templates, Not(IsEmpty())); + const auto& first_templates = structure.templates.front(); + EXPECT_EQ(first_templates.spatial_id, 0); + EXPECT_EQ(first_templates.temporal_id, 0); + for (size_t i = 1; i < structure.templates.size(); ++i) { + const auto& prev_template = structure.templates[i - 1]; + const auto& next_template = structure.templates[i]; + if (next_template.spatial_id == prev_template.spatial_id && + next_template.temporal_id == prev_template.temporal_id) { + // Same layer, next_layer_idc == 0 + } else if (next_template.spatial_id == prev_template.spatial_id && + next_template.temporal_id == prev_template.temporal_id + 1) { + // Next temporal layer, next_layer_idc == 1 + } else if (next_template.spatial_id == prev_template.spatial_id + 1 && + next_template.temporal_id == 0) { + // Next spatial layer, next_layer_idc == 2 + } else { + // everything else is invalid. + ADD_FAILURE() << "Invalid templates order. Template #" << i + << " with layer (" << next_template.spatial_id << "," + << next_template.temporal_id + << ") follows template with layer (" + << prev_template.spatial_id << "," + << prev_template.temporal_id << ")."; + } + } +} + +TEST_P(ScalabilityStructureTest, TemplatesMatchNumberOfDecodeTargetsAndChains) { + FrameDependencyStructure structure = + CreateScalabilityStructure(GetParam().GetScalabilityMode()) + ->DependencyStructure(); + EXPECT_THAT( + structure.templates, + Each(AllOf(Field(&FrameDependencyTemplate::decode_target_indications, + SizeIs(structure.num_decode_targets)), + Field(&FrameDependencyTemplate::chain_diffs, + SizeIs(structure.num_chains))))); +} + +TEST_P(ScalabilityStructureTest, FrameInfoMatchesFrameDependencyStructure) { + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + FrameDependencyStructure structure = svc_controller->DependencyStructure(); + std::vector<GenericFrameInfo> frame_infos = + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); + for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) { + const auto& frame = frame_infos[frame_id]; + EXPECT_GE(frame.spatial_id, 0) << " for frame " << frame_id; + EXPECT_GE(frame.temporal_id, 0) << " for frame " << frame_id; + EXPECT_THAT(frame.decode_target_indications, + SizeIs(structure.num_decode_targets)) + << " for frame " << frame_id; + EXPECT_THAT(frame.part_of_chain, SizeIs(structure.num_chains)) + << " for frame " << frame_id; + } +} + +TEST_P(ScalabilityStructureTest, ThereIsAPerfectTemplateForEachFrame) { + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + FrameDependencyStructure structure = svc_controller->DependencyStructure(); + std::vector<GenericFrameInfo> frame_infos = + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); + for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) { + EXPECT_THAT(structure.templates, Contains(frame_infos[frame_id])) + << " for frame " << frame_id << ", Expected " + << FrameDependencyTemplateToString(frame_infos[frame_id]); + } +} + +TEST_P(ScalabilityStructureTest, FrameDependsOnSameOrLowerLayer) { + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + std::vector<GenericFrameInfo> frame_infos = + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); + int64_t num_frames = frame_infos.size(); + + for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) { + const auto& frame = frame_infos[frame_id]; + for (int frame_diff : frame.frame_diffs) { + int64_t base_frame_id = frame_id - frame_diff; + const auto& base_frame = frame_infos[base_frame_id]; + EXPECT_GE(frame.spatial_id, base_frame.spatial_id) + << "Frame " << frame_id << " depends on frame " << base_frame_id; + EXPECT_GE(frame.temporal_id, base_frame.temporal_id) + << "Frame " << frame_id << " depends on frame " << base_frame_id; + } + } +} + +TEST_P(ScalabilityStructureTest, NoFrameDependsOnDiscardableOrNotPresent) { + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + std::vector<GenericFrameInfo> frame_infos = + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); + int64_t num_frames = frame_infos.size(); + FrameDependencyStructure structure = svc_controller->DependencyStructure(); + + for (int dt = 0; dt < structure.num_decode_targets; ++dt) { + for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) { + const auto& frame = frame_infos[frame_id]; + if (frame.decode_target_indications[dt] == + DecodeTargetIndication::kNotPresent) { + continue; + } + for (int frame_diff : frame.frame_diffs) { + int64_t base_frame_id = frame_id - frame_diff; + const auto& base_frame = frame_infos[base_frame_id]; + EXPECT_NE(base_frame.decode_target_indications[dt], + DecodeTargetIndication::kNotPresent) + << "Frame " << frame_id << " depends on frame " << base_frame_id + << " that is not part of decode target#" << dt; + EXPECT_NE(base_frame.decode_target_indications[dt], + DecodeTargetIndication::kDiscardable) + << "Frame " << frame_id << " depends on frame " << base_frame_id + << " that is discardable for decode target#" << dt; + } + } + } +} + +TEST_P(ScalabilityStructureTest, NoFrameDependsThroughSwitchIndication) { + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + FrameDependencyStructure structure = svc_controller->DependencyStructure(); + std::vector<GenericFrameInfo> frame_infos = + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); + int64_t num_frames = frame_infos.size(); + std::vector<std::set<int64_t>> full_deps(num_frames); + + // For each frame calculate set of all frames it depends on, both directly and + // indirectly. + for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) { + std::set<int64_t> all_base_frames; + for (int frame_diff : frame_infos[frame_id].frame_diffs) { + int64_t base_frame_id = frame_id - frame_diff; + all_base_frames.insert(base_frame_id); + const auto& indirect = full_deps[base_frame_id]; + all_base_frames.insert(indirect.begin(), indirect.end()); + } + full_deps[frame_id] = std::move(all_base_frames); + } + + // Now check the switch indication: frames after the switch indication mustn't + // depend on any addition frames before the switch indications. + for (int dt = 0; dt < structure.num_decode_targets; ++dt) { + for (int64_t switch_frame_id = 0; switch_frame_id < num_frames; + ++switch_frame_id) { + if (frame_infos[switch_frame_id].decode_target_indications[dt] != + DecodeTargetIndication::kSwitch) { + continue; + } + for (int64_t later_frame_id = switch_frame_id + 1; + later_frame_id < num_frames; ++later_frame_id) { + if (frame_infos[later_frame_id].decode_target_indications[dt] == + DecodeTargetIndication::kNotPresent) { + continue; + } + for (int frame_diff : frame_infos[later_frame_id].frame_diffs) { + int64_t early_frame_id = later_frame_id - frame_diff; + if (early_frame_id < switch_frame_id) { + EXPECT_THAT(full_deps[switch_frame_id], Contains(early_frame_id)) + << "For decode target #" << dt << " frame " << later_frame_id + << " depends on the frame " << early_frame_id + << " that switch indication frame " << switch_frame_id + << " doesn't directly on indirectly depend on."; + } + } + } + } + } +} + +TEST_P(ScalabilityStructureTest, ProduceNoFrameForDisabledLayers) { + std::unique_ptr<ScalableVideoController> svc_controller = + CreateScalabilityStructure(GetParam().GetScalabilityMode()); + ScalableVideoController::StreamLayersConfig structure = + svc_controller->StreamConfig(); + + VideoBitrateAllocation all_bitrates; + for (int sid = 0; sid < structure.num_spatial_layers; ++sid) { + for (int tid = 0; tid < structure.num_temporal_layers; ++tid) { + all_bitrates.SetBitrate(sid, tid, 100'000); + } + } + + svc_controller->OnRatesUpdated(all_bitrates); + ScalabilityStructureWrapper wrapper(*svc_controller); + std::vector<GenericFrameInfo> frames = + wrapper.GenerateFrames(GetParam().num_temporal_units); + + for (int sid = 0; sid < structure.num_spatial_layers; ++sid) { + for (int tid = 0; tid < structure.num_temporal_layers; ++tid) { + // When all layers were enabled, expect there was a frame for each layer. + EXPECT_THAT(frames, + Contains(AllOf(Field(&GenericFrameInfo::spatial_id, sid), + Field(&GenericFrameInfo::temporal_id, tid)))) + << "For layer (" << sid << "," << tid << ")"; + // Restore bitrates for all layers before disabling single layer. + VideoBitrateAllocation bitrates = all_bitrates; + bitrates.SetBitrate(sid, tid, 0); + svc_controller->OnRatesUpdated(bitrates); + // With layer (sid, tid) disabled, expect no frames are produced for it. + EXPECT_THAT( + wrapper.GenerateFrames(GetParam().num_temporal_units), + Not(Contains(AllOf(Field(&GenericFrameInfo::spatial_id, sid), + Field(&GenericFrameInfo::temporal_id, tid))))) + << "For layer (" << sid << "," << tid << ")"; + } + } +} + +INSTANTIATE_TEST_SUITE_P( + Svc, + ScalabilityStructureTest, + Values(SvcTestParam{"L1T1", /*num_temporal_units=*/3}, + SvcTestParam{"L1T2", /*num_temporal_units=*/4}, + SvcTestParam{"L1T3", /*num_temporal_units=*/8}, + SvcTestParam{"L2T1", /*num_temporal_units=*/3}, + SvcTestParam{"L2T1_KEY", /*num_temporal_units=*/3}, + SvcTestParam{"L3T1", /*num_temporal_units=*/3}, + SvcTestParam{"L3T1_KEY", /*num_temporal_units=*/3}, + SvcTestParam{"L3T3", /*num_temporal_units=*/8}, + SvcTestParam{"S2T1", /*num_temporal_units=*/3}, + SvcTestParam{"S2T2", /*num_temporal_units=*/4}, + SvcTestParam{"S2T3", /*num_temporal_units=*/8}, + SvcTestParam{"S3T1", /*num_temporal_units=*/3}, + SvcTestParam{"S3T2", /*num_temporal_units=*/4}, + SvcTestParam{"S3T3", /*num_temporal_units=*/8}, + SvcTestParam{"L2T2", /*num_temporal_units=*/4}, + SvcTestParam{"L2T2_KEY", /*num_temporal_units=*/4}, + SvcTestParam{"L2T2_KEY_SHIFT", /*num_temporal_units=*/4}, + SvcTestParam{"L2T3", /*num_temporal_units=*/8}, + SvcTestParam{"L2T3_KEY", /*num_temporal_units=*/8}, + SvcTestParam{"L3T2", /*num_temporal_units=*/4}, + SvcTestParam{"L3T2_KEY", /*num_temporal_units=*/4}, + SvcTestParam{"L3T3_KEY", /*num_temporal_units=*/8}), + [](const testing::TestParamInfo<SvcTestParam>& info) { + return info.param.name; + }); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalability_structures_gn/moz.build b/third_party/libwebrtc/modules/video_coding/svc/scalability_structures_gn/moz.build new file mode 100644 index 0000000000..a3e45b1cd7 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalability_structures_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_key_svc.cc", + "/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_simulcast.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/svc/create_scalability_structure.cc", + "/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_full_svc.cc", + "/third_party/libwebrtc/modules/video_coding/svc/scalability_structure_l2t2_key_shift.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("scalability_structures_gn") diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller.h b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller.h new file mode 100644 index 0000000000..c7362657ec --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_H_ + +#include <vector> + +#include "absl/container/inlined_vector.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" + +namespace webrtc { + +// Controls how video should be encoded to be scalable. Outputs results as +// buffer usage configuration for encoder and enough details to communicate the +// scalability structure via dependency descriptor rtp header extension. +class ScalableVideoController { + public: + struct StreamLayersConfig { + int num_spatial_layers = 1; + int num_temporal_layers = 1; + // Indicates if frames can reference frames of a different resolution. + bool uses_reference_scaling = true; + // Spatial layers scaling. Frames with spatial_id = i expected to be encoded + // with original_resolution * scaling_factor_num[i] / scaling_factor_den[i]. + int scaling_factor_num[DependencyDescriptor::kMaxSpatialIds] = {1, 1, 1, 1}; + int scaling_factor_den[DependencyDescriptor::kMaxSpatialIds] = {1, 1, 1, 1}; + }; + class LayerFrameConfig { + public: + // Builders/setters. + LayerFrameConfig& Id(int value); + LayerFrameConfig& Keyframe(); + LayerFrameConfig& S(int value); + LayerFrameConfig& T(int value); + LayerFrameConfig& Reference(int buffer_id); + LayerFrameConfig& Update(int buffer_id); + LayerFrameConfig& ReferenceAndUpdate(int buffer_id); + + // Getters. + int Id() const { return id_; } + bool IsKeyframe() const { return is_keyframe_; } + int SpatialId() const { return spatial_id_; } + int TemporalId() const { return temporal_id_; } + const absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers>& Buffers() + const { + return buffers_; + } + + private: + // Id to match configuration returned by NextFrameConfig with + // (possibly modified) configuration passed back via OnEncoderDone. + // The meaning of the id is an implementation detail of + // the ScalableVideoController. + int id_ = 0; + + // Indication frame should be encoded as a key frame. In particular when + // `is_keyframe=true` property `CodecBufferUsage::referenced` should be + // ignored and treated as false. + bool is_keyframe_ = false; + + int spatial_id_ = 0; + int temporal_id_ = 0; + // Describes how encoder which buffers encoder allowed to reference and + // which buffers encoder should update. + absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers> buffers_; + }; + + virtual ~ScalableVideoController() = default; + + // Returns video structure description for encoder to configure itself. + virtual StreamLayersConfig StreamConfig() const = 0; + + // Returns video structure description in format compatible with + // dependency descriptor rtp header extension. + virtual FrameDependencyStructure DependencyStructure() const = 0; + + // Notifies Controller with updated bitrates per layer. In particular notifies + // when certain layers should be disabled. + // Controller shouldn't produce LayerFrameConfig for disabled layers. + virtual void OnRatesUpdated(const VideoBitrateAllocation& bitrates) = 0; + + // When `restart` is true, first `LayerFrameConfig` should have `is_keyframe` + // set to true. + // Returned vector shouldn't be empty. + virtual std::vector<LayerFrameConfig> NextFrameConfig(bool restart) = 0; + + // Returns configuration to pass to EncoderCallback. + virtual GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) = 0; +}; + +// Below are implementation details. +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::Id(int value) { + id_ = value; + return *this; +} +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::Keyframe() { + is_keyframe_ = true; + return *this; +} +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::S(int value) { + spatial_id_ = value; + return *this; +} +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::T(int value) { + temporal_id_ = value; + return *this; +} +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::Reference(int buffer_id) { + buffers_.emplace_back(buffer_id, /*referenced=*/true, /*updated=*/false); + return *this; +} +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::Update(int buffer_id) { + buffers_.emplace_back(buffer_id, /*referenced=*/false, /*updated=*/true); + return *this; +} +inline ScalableVideoController::LayerFrameConfig& +ScalableVideoController::LayerFrameConfig::ReferenceAndUpdate(int buffer_id) { + buffers_.emplace_back(buffer_id, /*referenced=*/true, /*updated=*/true); + return *this; +} + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_gn/moz.build b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_gn/moz.build new file mode 100644 index 0000000000..42b8307362 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("scalable_video_controller_gn") diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.cc b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.cc new file mode 100644 index 0000000000..a9d530dd9d --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" + +#include <utility> +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +ScalableVideoControllerNoLayering::~ScalableVideoControllerNoLayering() = + default; + +ScalableVideoController::StreamLayersConfig +ScalableVideoControllerNoLayering::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = 1; + result.num_temporal_layers = 1; + result.uses_reference_scaling = false; + return result; +} + +FrameDependencyStructure +ScalableVideoControllerNoLayering::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 1; + structure.num_chains = 1; + structure.decode_target_protected_by_chain = {0}; + + FrameDependencyTemplate key_frame; + key_frame.decode_target_indications = {DecodeTargetIndication::kSwitch}; + key_frame.chain_diffs = {0}; + structure.templates.push_back(key_frame); + + FrameDependencyTemplate delta_frame; + delta_frame.decode_target_indications = {DecodeTargetIndication::kSwitch}; + delta_frame.chain_diffs = {1}; + delta_frame.frame_diffs = {1}; + structure.templates.push_back(delta_frame); + + return structure; +} + +std::vector<ScalableVideoController::LayerFrameConfig> +ScalableVideoControllerNoLayering::NextFrameConfig(bool restart) { + if (!enabled_) { + return {}; + } + std::vector<LayerFrameConfig> result(1); + if (restart || start_) { + result[0].Id(0).Keyframe().Update(0); + } else { + result[0].Id(0).ReferenceAndUpdate(0); + } + start_ = false; + return result; +} + +GenericFrameInfo ScalableVideoControllerNoLayering::OnEncodeDone( + const LayerFrameConfig& config) { + RTC_DCHECK_EQ(config.Id(), 0); + GenericFrameInfo frame_info; + frame_info.encoder_buffers = config.Buffers(); + if (config.IsKeyframe()) { + for (auto& buffer : frame_info.encoder_buffers) { + buffer.referenced = false; + } + } + frame_info.decode_target_indications = {DecodeTargetIndication::kSwitch}; + frame_info.part_of_chain = {true}; + return frame_info; +} + +void ScalableVideoControllerNoLayering::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + enabled_ = bitrates.GetBitrate(0, 0) > 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.h b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.h new file mode 100644 index 0000000000..6d66b61c8b --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/scalable_video_controller_no_layering.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_ +#define MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_ + +#include <vector> + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +class ScalableVideoControllerNoLayering : public ScalableVideoController { + public: + ~ScalableVideoControllerNoLayering() override; + + StreamLayersConfig StreamConfig() const override; + FrameDependencyStructure DependencyStructure() const override; + + std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override; + GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + + private: + bool start_ = true; + bool enabled_ = true; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.cc b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.cc new file mode 100644 index 0000000000..b6ae0d7430 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.cc @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/svc/svc_rate_allocator.h" + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <numeric> +#include <vector> + +#include "absl/container/inlined_vector.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kSpatialLayeringRateScalingFactor = 0.55f; +constexpr float kTemporalLayeringRateScalingFactor = 0.55f; + +struct ActiveSpatialLayers { + size_t first = 0; + size_t num = 0; +}; + +ActiveSpatialLayers GetActiveSpatialLayers(const VideoCodec& codec, + size_t num_spatial_layers) { + ActiveSpatialLayers active; + for (active.first = 0; active.first < num_spatial_layers; ++active.first) { + if (codec.spatialLayers[active.first].active) { + break; + } + } + + size_t last_active_layer = active.first; + for (; last_active_layer < num_spatial_layers; ++last_active_layer) { + if (!codec.spatialLayers[last_active_layer].active) { + break; + } + } + active.num = last_active_layer - active.first; + + return active; +} + +std::vector<DataRate> AdjustAndVerify( + const VideoCodec& codec, + size_t first_active_layer, + const std::vector<DataRate>& spatial_layer_rates) { + std::vector<DataRate> adjusted_spatial_layer_rates; + // Keep track of rate that couldn't be applied to the previous layer due to + // max bitrate constraint, try to pass it forward to the next one. + DataRate excess_rate = DataRate::Zero(); + for (size_t sl_idx = 0; sl_idx < spatial_layer_rates.size(); ++sl_idx) { + DataRate min_rate = DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + sl_idx].minBitrate); + DataRate max_rate = DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + sl_idx].maxBitrate); + + DataRate layer_rate = spatial_layer_rates[sl_idx] + excess_rate; + if (layer_rate < min_rate) { + // Not enough rate to reach min bitrate for desired number of layers, + // abort allocation. + if (spatial_layer_rates.size() == 1) { + return spatial_layer_rates; + } + return adjusted_spatial_layer_rates; + } + + if (layer_rate <= max_rate) { + excess_rate = DataRate::Zero(); + adjusted_spatial_layer_rates.push_back(layer_rate); + } else { + excess_rate = layer_rate - max_rate; + adjusted_spatial_layer_rates.push_back(max_rate); + } + } + + return adjusted_spatial_layer_rates; +} + +static std::vector<DataRate> SplitBitrate(size_t num_layers, + DataRate total_bitrate, + float rate_scaling_factor) { + std::vector<DataRate> bitrates; + + double denominator = 0.0; + for (size_t layer_idx = 0; layer_idx < num_layers; ++layer_idx) { + denominator += std::pow(rate_scaling_factor, layer_idx); + } + + double numerator = std::pow(rate_scaling_factor, num_layers - 1); + for (size_t layer_idx = 0; layer_idx < num_layers; ++layer_idx) { + bitrates.push_back(numerator * total_bitrate / denominator); + numerator /= rate_scaling_factor; + } + + const DataRate sum = + std::accumulate(bitrates.begin(), bitrates.end(), DataRate::Zero()); + + // Keep the sum of split bitrates equal to the total bitrate by adding or + // subtracting bits, which were lost due to rounding, to the latest layer. + if (total_bitrate > sum) { + bitrates.back() += total_bitrate - sum; + } else if (total_bitrate < sum) { + bitrates.back() -= sum - total_bitrate; + } + + return bitrates; +} + +// Returns the minimum bitrate needed for `num_active_layers` spatial layers to +// become active using the configuration specified by `codec`. +DataRate FindLayerTogglingThreshold(const VideoCodec& codec, + size_t first_active_layer, + size_t num_active_layers) { + if (num_active_layers == 1) { + return DataRate::KilobitsPerSec(codec.spatialLayers[0].minBitrate); + } + + if (codec.mode == VideoCodecMode::kRealtimeVideo) { + DataRate lower_bound = DataRate::Zero(); + DataRate upper_bound = DataRate::Zero(); + if (num_active_layers > 1) { + for (size_t i = 0; i < num_active_layers - 1; ++i) { + lower_bound += DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + i].minBitrate); + upper_bound += DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + i].maxBitrate); + } + } + upper_bound += DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + num_active_layers - 1] + .minBitrate); + + // Do a binary search until upper and lower bound is the highest bitrate for + // `num_active_layers` - 1 layers and lowest bitrate for `num_active_layers` + // layers respectively. + while (upper_bound - lower_bound > DataRate::BitsPerSec(1)) { + DataRate try_rate = (lower_bound + upper_bound) / 2; + if (AdjustAndVerify(codec, first_active_layer, + SplitBitrate(num_active_layers, try_rate, + kSpatialLayeringRateScalingFactor)) + .size() == num_active_layers) { + upper_bound = try_rate; + } else { + lower_bound = try_rate; + } + } + return upper_bound; + } else { + DataRate toggling_rate = DataRate::Zero(); + for (size_t i = 0; i < num_active_layers - 1; ++i) { + toggling_rate += DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + i].targetBitrate); + } + toggling_rate += DataRate::KilobitsPerSec( + codec.spatialLayers[first_active_layer + num_active_layers - 1] + .minBitrate); + return toggling_rate; + } +} + +} // namespace + +SvcRateAllocator::NumLayers SvcRateAllocator::GetNumLayers( + const VideoCodec& codec) { + NumLayers layers; + if (absl::optional<ScalabilityMode> scalability_mode = + codec.GetScalabilityMode(); + scalability_mode.has_value()) { + if (auto structure = CreateScalabilityStructure(*scalability_mode)) { + ScalableVideoController::StreamLayersConfig config = + structure->StreamConfig(); + layers.spatial = config.num_spatial_layers; + layers.temporal = config.num_temporal_layers; + return layers; + } + } + if (codec.codecType == kVideoCodecVP9) { + layers.spatial = codec.VP9().numberOfSpatialLayers; + layers.temporal = codec.VP9().numberOfTemporalLayers; + return layers; + } + layers.spatial = 1; + layers.temporal = 1; + return layers; +} + +SvcRateAllocator::SvcRateAllocator(const VideoCodec& codec) + : codec_(codec), + num_layers_(GetNumLayers(codec)), + experiment_settings_(StableTargetRateExperiment::ParseFromFieldTrials()), + cumulative_layer_start_bitrates_(GetLayerStartBitrates(codec)), + last_active_layer_count_(0) { + RTC_DCHECK_GT(num_layers_.spatial, 0); + RTC_DCHECK_LE(num_layers_.spatial, kMaxSpatialLayers); + RTC_DCHECK_GT(num_layers_.temporal, 0); + RTC_DCHECK_LE(num_layers_.temporal, 3); + for (size_t layer_idx = 0; layer_idx < num_layers_.spatial; ++layer_idx) { + // Verify min <= target <= max. + if (codec.spatialLayers[layer_idx].active) { + RTC_DCHECK_GT(codec.spatialLayers[layer_idx].maxBitrate, 0); + RTC_DCHECK_GE(codec.spatialLayers[layer_idx].maxBitrate, + codec.spatialLayers[layer_idx].minBitrate); + RTC_DCHECK_GE(codec.spatialLayers[layer_idx].targetBitrate, + codec.spatialLayers[layer_idx].minBitrate); + RTC_DCHECK_GE(codec.spatialLayers[layer_idx].maxBitrate, + codec.spatialLayers[layer_idx].targetBitrate); + } + } +} + +VideoBitrateAllocation SvcRateAllocator::Allocate( + VideoBitrateAllocationParameters parameters) { + DataRate total_bitrate = parameters.total_bitrate; + if (codec_.maxBitrate != 0) { + total_bitrate = + std::min(total_bitrate, DataRate::KilobitsPerSec(codec_.maxBitrate)); + } + + if (codec_.spatialLayers[0].targetBitrate == 0) { + // Delegate rate distribution to encoder wrapper if bitrate thresholds + // are not set. + VideoBitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate(0, 0, total_bitrate.bps()); + return bitrate_allocation; + } + + const ActiveSpatialLayers active_layers = + GetActiveSpatialLayers(codec_, num_layers_.spatial); + size_t num_spatial_layers = active_layers.num; + + if (num_spatial_layers == 0) { + return VideoBitrateAllocation(); // All layers are deactivated. + } + + // Figure out how many spatial layers should be active. + if (experiment_settings_.IsEnabled() && + parameters.stable_bitrate > DataRate::Zero()) { + double hysteresis_factor; + if (codec_.mode == VideoCodecMode::kScreensharing) { + hysteresis_factor = experiment_settings_.GetScreenshareHysteresisFactor(); + } else { + hysteresis_factor = experiment_settings_.GetVideoHysteresisFactor(); + } + + DataRate stable_rate = + std::min(parameters.total_bitrate, parameters.stable_bitrate); + // First check if bitrate has grown large enough to enable new layers. + size_t num_enabled_with_hysteresis = + FindNumEnabledLayers(stable_rate / hysteresis_factor); + if (num_enabled_with_hysteresis >= last_active_layer_count_) { + num_spatial_layers = num_enabled_with_hysteresis; + } else { + // We could not enable new layers, check if any should be disabled. + num_spatial_layers = + std::min(last_active_layer_count_, FindNumEnabledLayers(stable_rate)); + } + } else { + num_spatial_layers = FindNumEnabledLayers(parameters.total_bitrate); + } + last_active_layer_count_ = num_spatial_layers; + + VideoBitrateAllocation allocation; + if (codec_.mode == VideoCodecMode::kRealtimeVideo) { + allocation = GetAllocationNormalVideo(total_bitrate, active_layers.first, + num_spatial_layers); + } else { + allocation = GetAllocationScreenSharing(total_bitrate, active_layers.first, + num_spatial_layers); + } + allocation.set_bw_limited(num_spatial_layers < active_layers.num); + return allocation; +} + +VideoBitrateAllocation SvcRateAllocator::GetAllocationNormalVideo( + DataRate total_bitrate, + size_t first_active_layer, + size_t num_spatial_layers) const { + std::vector<DataRate> spatial_layer_rates; + if (num_spatial_layers == 0) { + // Not enough rate for even the base layer. Force allocation at the total + // bitrate anyway. + num_spatial_layers = 1; + spatial_layer_rates.push_back(total_bitrate); + } else { + spatial_layer_rates = + AdjustAndVerify(codec_, first_active_layer, + SplitBitrate(num_spatial_layers, total_bitrate, + kSpatialLayeringRateScalingFactor)); + RTC_DCHECK_EQ(spatial_layer_rates.size(), num_spatial_layers); + } + + VideoBitrateAllocation bitrate_allocation; + + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + std::vector<DataRate> temporal_layer_rates = + SplitBitrate(num_layers_.temporal, spatial_layer_rates[sl_idx], + kTemporalLayeringRateScalingFactor); + + // Distribute rate across temporal layers. Allocate more bits to lower + // layers since they are used for prediction of higher layers and their + // references are far apart. + if (num_layers_.temporal == 1) { + bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 0, + temporal_layer_rates[0].bps()); + } else if (num_layers_.temporal == 2) { + bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 0, + temporal_layer_rates[1].bps()); + bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 1, + temporal_layer_rates[0].bps()); + } else { + RTC_CHECK_EQ(num_layers_.temporal, 3); + // In case of three temporal layers the high layer has two frames and the + // middle layer has one frame within GOP (in between two consecutive low + // layer frames). Thus high layer requires more bits (comparing pure + // bitrate of layer, excluding bitrate of base layers) to keep quality on + // par with lower layers. + bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 0, + temporal_layer_rates[2].bps()); + bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 1, + temporal_layer_rates[0].bps()); + bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 2, + temporal_layer_rates[1].bps()); + } + } + + return bitrate_allocation; +} + +// Bit-rate is allocated in such a way, that the highest enabled layer will have +// between min and max bitrate, and all others will have exactly target +// bit-rate allocated. +VideoBitrateAllocation SvcRateAllocator::GetAllocationScreenSharing( + DataRate total_bitrate, + size_t first_active_layer, + size_t num_spatial_layers) const { + VideoBitrateAllocation bitrate_allocation; + + if (num_spatial_layers == 0 || + total_bitrate < + DataRate::KilobitsPerSec( + codec_.spatialLayers[first_active_layer].minBitrate)) { + // Always enable at least one layer. + bitrate_allocation.SetBitrate(first_active_layer, 0, total_bitrate.bps()); + return bitrate_allocation; + } + + DataRate allocated_rate = DataRate::Zero(); + DataRate top_layer_rate = DataRate::Zero(); + size_t sl_idx; + for (sl_idx = first_active_layer; + sl_idx < first_active_layer + num_spatial_layers; ++sl_idx) { + const DataRate min_rate = + DataRate::KilobitsPerSec(codec_.spatialLayers[sl_idx].minBitrate); + const DataRate target_rate = + DataRate::KilobitsPerSec(codec_.spatialLayers[sl_idx].targetBitrate); + + if (allocated_rate + min_rate > total_bitrate) { + // Use stable rate to determine if layer should be enabled. + break; + } + + top_layer_rate = std::min(target_rate, total_bitrate - allocated_rate); + bitrate_allocation.SetBitrate(sl_idx, 0, top_layer_rate.bps()); + allocated_rate += top_layer_rate; + } + + if (sl_idx > 0 && total_bitrate - allocated_rate > DataRate::Zero()) { + // Add leftover to the last allocated layer. + top_layer_rate = std::min( + top_layer_rate + (total_bitrate - allocated_rate), + DataRate::KilobitsPerSec(codec_.spatialLayers[sl_idx - 1].maxBitrate)); + bitrate_allocation.SetBitrate(sl_idx - 1, 0, top_layer_rate.bps()); + } + + return bitrate_allocation; +} + +size_t SvcRateAllocator::FindNumEnabledLayers(DataRate target_rate) const { + if (cumulative_layer_start_bitrates_.empty()) { + return 0; + } + + size_t num_enabled_layers = 0; + for (DataRate start_rate : cumulative_layer_start_bitrates_) { + // First layer is always enabled. + if (num_enabled_layers == 0 || start_rate <= target_rate) { + ++num_enabled_layers; + } else { + break; + } + } + + return num_enabled_layers; +} + +DataRate SvcRateAllocator::GetMaxBitrate(const VideoCodec& codec) { + const NumLayers num_layers = GetNumLayers(codec); + const ActiveSpatialLayers active_layers = + GetActiveSpatialLayers(codec, num_layers.spatial); + + DataRate max_bitrate = DataRate::Zero(); + for (size_t sl_idx = 0; sl_idx < active_layers.num; ++sl_idx) { + max_bitrate += DataRate::KilobitsPerSec( + codec.spatialLayers[active_layers.first + sl_idx].maxBitrate); + } + + if (codec.maxBitrate != 0) { + max_bitrate = + std::min(max_bitrate, DataRate::KilobitsPerSec(codec.maxBitrate)); + } + + return max_bitrate; +} + +DataRate SvcRateAllocator::GetPaddingBitrate(const VideoCodec& codec) { + auto start_bitrate = GetLayerStartBitrates(codec); + if (start_bitrate.empty()) { + return DataRate::Zero(); // All layers are deactivated. + } + + return start_bitrate.back(); +} + +absl::InlinedVector<DataRate, kMaxSpatialLayers> +SvcRateAllocator::GetLayerStartBitrates(const VideoCodec& codec) { + absl::InlinedVector<DataRate, kMaxSpatialLayers> start_bitrates; + const NumLayers num_layers = GetNumLayers(codec); + const ActiveSpatialLayers active_layers = + GetActiveSpatialLayers(codec, num_layers.spatial); + DataRate last_rate = DataRate::Zero(); + for (size_t i = 1; i <= active_layers.num; ++i) { + DataRate layer_toggling_rate = + FindLayerTogglingThreshold(codec, active_layers.first, i); + start_bitrates.push_back(layer_toggling_rate); + RTC_DCHECK_LE(last_rate, layer_toggling_rate); + last_rate = layer_toggling_rate; + } + return start_bitrates; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h new file mode 100644 index 0000000000..bd75fca284 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_SVC_SVC_RATE_ALLOCATOR_H_ +#define MODULES_VIDEO_CODING_SVC_SVC_RATE_ALLOCATOR_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/container/inlined_vector.h" +#include "api/video/video_bitrate_allocation.h" +#include "api/video/video_bitrate_allocator.h" +#include "api/video/video_codec_constants.h" +#include "api/video_codecs/video_codec.h" +#include "rtc_base/experiments/stable_target_rate_experiment.h" + +namespace webrtc { + +class SvcRateAllocator : public VideoBitrateAllocator { + public: + explicit SvcRateAllocator(const VideoCodec& codec); + + VideoBitrateAllocation Allocate( + VideoBitrateAllocationParameters parameters) override; + + static DataRate GetMaxBitrate(const VideoCodec& codec); + static DataRate GetPaddingBitrate(const VideoCodec& codec); + static absl::InlinedVector<DataRate, kMaxSpatialLayers> GetLayerStartBitrates( + const VideoCodec& codec); + + private: + struct NumLayers { + size_t spatial = 1; + size_t temporal = 1; + }; + + static NumLayers GetNumLayers(const VideoCodec& codec); + VideoBitrateAllocation GetAllocationNormalVideo( + DataRate total_bitrate, + size_t first_active_layer, + size_t num_spatial_layers) const; + + VideoBitrateAllocation GetAllocationScreenSharing( + DataRate total_bitrate, + size_t first_active_layer, + size_t num_spatial_layers) const; + + // Returns the number of layers that are active and have enough bitrate to + // actually be enabled. + size_t FindNumEnabledLayers(DataRate target_rate) const; + + const VideoCodec codec_; + const NumLayers num_layers_; + const StableTargetRateExperiment experiment_settings_; + const absl::InlinedVector<DataRate, kMaxSpatialLayers> + cumulative_layer_start_bitrates_; + size_t last_active_layer_count_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SVC_RATE_ALLOCATOR_H_ diff --git a/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_gn/moz.build b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_gn/moz.build new file mode 100644 index 0000000000..5d7d952a69 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("svc_rate_allocator_gn") diff --git a/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_unittest.cc b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_unittest.cc new file mode 100644 index 0000000000..b3a365d722 --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator_unittest.cc @@ -0,0 +1,584 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/svc/svc_rate_allocator.h" + +#include <algorithm> +#include <vector> + +#include "modules/video_coding/codecs/vp9/svc_config.h" +#include "rtc_base/checks.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { +static VideoCodec Configure(size_t width, + size_t height, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing) { + VideoCodec codec; + codec.width = width; + codec.height = height; + codec.codecType = kVideoCodecVP9; + codec.mode = is_screen_sharing ? VideoCodecMode::kScreensharing + : VideoCodecMode::kRealtimeVideo; + + std::vector<SpatialLayer> spatial_layers = + GetSvcConfig(width, height, 30, /*first_active_layer=*/0, + num_spatial_layers, num_temporal_layers, is_screen_sharing); + RTC_CHECK_LE(spatial_layers.size(), kMaxSpatialLayers); + + codec.VP9()->numberOfSpatialLayers = + std::min<unsigned char>(num_spatial_layers, spatial_layers.size()); + codec.VP9()->numberOfTemporalLayers = std::min<unsigned char>( + num_temporal_layers, spatial_layers.back().numberOfTemporalLayers); + + for (size_t sl_idx = 0; sl_idx < spatial_layers.size(); ++sl_idx) { + codec.spatialLayers[sl_idx] = spatial_layers[sl_idx]; + } + + return codec; +} + +} // namespace + +TEST(SvcRateAllocatorTest, SingleLayerFor320x180Input) { + VideoCodec codec = Configure(320, 180, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(1000 * 1000, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_EQ(allocation.GetSpatialLayerSum(1), 0u); +} + +TEST(SvcRateAllocatorTest, TwoLayersFor640x360Input) { + VideoCodec codec = Configure(640, 360, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(1000 * 1000, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0u); + EXPECT_EQ(allocation.GetSpatialLayerSum(2), 0u); +} + +TEST(SvcRateAllocatorTest, ThreeLayersFor1280x720Input) { + VideoCodec codec = Configure(1280, 720, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(1000 * 1000, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(2), 0u); +} + +TEST(SvcRateAllocatorTest, + BaseLayerNonZeroBitrateEvenIfTotalIfLessThanMinimum) { + VideoCodec codec = Configure(1280, 720, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + const SpatialLayer* layers = codec.spatialLayers; + + VideoBitrateAllocation allocation = allocator.Allocate( + VideoBitrateAllocationParameters(layers[0].minBitrate * 1000 / 2, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_LT(allocation.GetSpatialLayerSum(0), layers[0].minBitrate * 1000); + EXPECT_EQ(allocation.GetSpatialLayerSum(1), 0u); +} + +TEST(SvcRateAllocatorTest, Disable640x360Layer) { + VideoCodec codec = Configure(1280, 720, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + const SpatialLayer* layers = codec.spatialLayers; + + size_t min_bitrate_for_640x360_layer_kbps = + layers[0].minBitrate + layers[1].minBitrate; + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters( + min_bitrate_for_640x360_layer_kbps * 1000 - 1, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_EQ(allocation.GetSpatialLayerSum(1), 0u); +} + +TEST(SvcRateAllocatorTest, Disable1280x720Layer) { + VideoCodec codec = Configure(1280, 720, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + const SpatialLayer* layers = codec.spatialLayers; + + size_t min_bitrate_for_1280x720_layer_kbps = + layers[0].minBitrate + layers[1].minBitrate + layers[2].minBitrate; + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters( + min_bitrate_for_1280x720_layer_kbps * 1000 - 1, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0u); + EXPECT_EQ(allocation.GetSpatialLayerSum(2), 0u); +} + +TEST(SvcRateAllocatorTest, BitrateIsCapped) { + VideoCodec codec = Configure(1280, 720, 3, 3, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + const SpatialLayer* layers = codec.spatialLayers; + + const uint32_t link_mbps = 100; + VideoBitrateAllocation allocation = allocator.Allocate( + VideoBitrateAllocationParameters(link_mbps * 1000000, 30)); + + EXPECT_EQ(allocation.get_sum_kbps(), + layers[0].maxBitrate + layers[1].maxBitrate + layers[2].maxBitrate); + EXPECT_EQ(allocation.GetSpatialLayerSum(0) / 1000, layers[0].maxBitrate); + EXPECT_EQ(allocation.GetSpatialLayerSum(1) / 1000, layers[1].maxBitrate); + EXPECT_EQ(allocation.GetSpatialLayerSum(2) / 1000, layers[2].maxBitrate); +} + +TEST(SvcRateAllocatorTest, MinBitrateToGetQualityLayer) { + VideoCodec codec = Configure(1280, 720, 3, 1, true); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + const SpatialLayer* layers = codec.spatialLayers; + + EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U); + + VideoBitrateAllocation allocation = allocator.Allocate( + VideoBitrateAllocationParameters(layers[0].minBitrate * 1000, 30)); + EXPECT_EQ(allocation.GetSpatialLayerSum(0) / 1000, layers[0].minBitrate); + EXPECT_EQ(allocation.GetSpatialLayerSum(1), 0UL); + + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + (layers[0].targetBitrate + layers[1].minBitrate) * 1000, 30)); + EXPECT_EQ(allocation.GetSpatialLayerSum(0) / 1000, layers[0].targetBitrate); + EXPECT_EQ(allocation.GetSpatialLayerSum(1) / 1000, layers[1].minBitrate); +} + +TEST(SvcRateAllocatorTest, DeactivateHigherLayers) { + for (int deactivated_idx = 2; deactivated_idx >= 0; --deactivated_idx) { + VideoCodec codec = Configure(1280, 720, 3, 1, false); + EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U); + + for (int i = deactivated_idx; i < 3; ++i) + codec.spatialLayers[i].active = false; + + SvcRateAllocator allocator = SvcRateAllocator(codec); + + VideoBitrateAllocation allocation = allocator.Allocate( + VideoBitrateAllocationParameters(10 * 1000 * 1000, 30)); + + // Ensure layers spatial_idx < deactivated_idx are activated. + for (int spatial_idx = 0; spatial_idx < deactivated_idx; ++spatial_idx) { + EXPECT_GT(allocation.GetSpatialLayerSum(spatial_idx), 0UL); + } + + // Ensure layers spatial_idx >= deactivated_idx are deactivated. + for (int spatial_idx = deactivated_idx; spatial_idx < 3; ++spatial_idx) { + EXPECT_EQ(allocation.GetSpatialLayerSum(spatial_idx), 0UL); + } + } +} + +TEST(SvcRateAllocatorTest, DeactivateLowerLayers) { + for (int deactivated_idx = 0; deactivated_idx < 3; ++deactivated_idx) { + VideoCodec codec = Configure(1280, 720, 3, 1, false); + EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U); + + for (int i = deactivated_idx; i >= 0; --i) + codec.spatialLayers[i].active = false; + + SvcRateAllocator allocator = SvcRateAllocator(codec); + + VideoBitrateAllocation allocation = allocator.Allocate( + VideoBitrateAllocationParameters(10 * 1000 * 1000, 30)); + + // Ensure layers spatial_idx <= deactivated_idx are deactivated. + for (int spatial_idx = 0; spatial_idx <= deactivated_idx; ++spatial_idx) { + EXPECT_EQ(allocation.GetSpatialLayerSum(spatial_idx), 0UL); + } + + // Ensure layers spatial_idx > deactivated_idx are activated. + for (int spatial_idx = deactivated_idx + 1; spatial_idx < 3; + ++spatial_idx) { + EXPECT_GT(allocation.GetSpatialLayerSum(spatial_idx), 0UL); + } + } +} + +TEST(SvcRateAllocatorTest, SignalsBwLimited) { + VideoCodec codec = Configure(1280, 720, 3, 1, false); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + // Rough estimate calculated by hand. + uint32_t min_to_enable_all = 900000; + + EXPECT_TRUE( + allocator + .Allocate(VideoBitrateAllocationParameters(min_to_enable_all / 2, 30)) + .is_bw_limited()); + + EXPECT_FALSE( + allocator + .Allocate(VideoBitrateAllocationParameters(min_to_enable_all, 30)) + .is_bw_limited()); +} + +TEST(SvcRateAllocatorTest, NoPaddingIfAllLayersAreDeactivated) { + VideoCodec codec = Configure(1280, 720, 3, 1, false); + EXPECT_EQ(codec.VP9()->numberOfSpatialLayers, 3U); + // Deactivation of base layer deactivates all layers. + codec.spatialLayers[0].active = false; + codec.spatialLayers[1].active = false; + codec.spatialLayers[2].active = false; + DataRate padding_rate = SvcRateAllocator::GetPaddingBitrate(codec); + EXPECT_EQ(padding_rate, DataRate::Zero()); +} + +TEST(SvcRateAllocatorTest, FindLayerTogglingThreshold) { + // Let's unit test a utility method of the unit test... + + // Predetermined constants indicating the min bitrate needed for two and three + // layers to be enabled respectively, using the config from Configure() with + // 1280x720 resolution and three spatial layers. + const DataRate kTwoLayerMinRate = DataRate::BitsPerSec(299150); + const DataRate kThreeLayerMinRate = DataRate::BitsPerSec(891052); + + VideoCodec codec = Configure(1280, 720, 3, 1, false); + absl::InlinedVector<DataRate, kMaxSpatialLayers> layer_start_bitrates = + SvcRateAllocator::GetLayerStartBitrates(codec); + ASSERT_EQ(layer_start_bitrates.size(), 3u); + EXPECT_EQ(layer_start_bitrates[1], kTwoLayerMinRate); + EXPECT_EQ(layer_start_bitrates[2], kThreeLayerMinRate); +} + +TEST(SvcRateAllocatorTest, SupportsAv1) { + VideoCodec codec; + codec.width = 640; + codec.height = 360; + codec.codecType = kVideoCodecAV1; + codec.SetScalabilityMode(ScalabilityMode::kL3T3); + codec.spatialLayers[0].active = true; + codec.spatialLayers[0].minBitrate = 30; + codec.spatialLayers[0].targetBitrate = 51; + codec.spatialLayers[0].maxBitrate = 73; + codec.spatialLayers[1].active = true; + codec.spatialLayers[1].minBitrate = 49; + codec.spatialLayers[1].targetBitrate = 64; + codec.spatialLayers[1].maxBitrate = 97; + codec.spatialLayers[2].active = true; + codec.spatialLayers[2].minBitrate = 193; + codec.spatialLayers[2].targetBitrate = 305; + codec.spatialLayers[2].maxBitrate = 418; + + SvcRateAllocator allocator(codec); + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(1'000'000, 30)); + + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(2), 0u); +} + +TEST(SvcRateAllocatorTest, SupportsAv1WithSkippedLayer) { + VideoCodec codec; + codec.width = 640; + codec.height = 360; + codec.codecType = kVideoCodecAV1; + codec.SetScalabilityMode(ScalabilityMode::kL3T3); + codec.spatialLayers[0].active = false; + codec.spatialLayers[0].minBitrate = 30; + codec.spatialLayers[0].targetBitrate = 51; + codec.spatialLayers[0].maxBitrate = 73; + codec.spatialLayers[1].active = true; + codec.spatialLayers[1].minBitrate = 49; + codec.spatialLayers[1].targetBitrate = 64; + codec.spatialLayers[1].maxBitrate = 97; + codec.spatialLayers[2].active = true; + codec.spatialLayers[2].minBitrate = 193; + codec.spatialLayers[2].targetBitrate = 305; + codec.spatialLayers[2].maxBitrate = 418; + + SvcRateAllocator allocator(codec); + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(1'000'000, 30)); + + EXPECT_EQ(allocation.GetSpatialLayerSum(0), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0u); + EXPECT_GT(allocation.GetSpatialLayerSum(2), 0u); +} + +TEST(SvcRateAllocatorTest, UsesScalabilityModeToGetNumberOfLayers) { + VideoCodec codec; + codec.width = 640; + codec.height = 360; + codec.codecType = kVideoCodecAV1; + codec.SetScalabilityMode(ScalabilityMode::kL2T2); + codec.spatialLayers[0].active = true; + codec.spatialLayers[0].minBitrate = 30; + codec.spatialLayers[0].targetBitrate = 51; + codec.spatialLayers[0].maxBitrate = 73; + codec.spatialLayers[1].active = true; + codec.spatialLayers[1].minBitrate = 49; + codec.spatialLayers[1].targetBitrate = 64; + codec.spatialLayers[1].maxBitrate = 97; + codec.spatialLayers[2].active = true; + codec.spatialLayers[2].minBitrate = 193; + codec.spatialLayers[2].targetBitrate = 305; + codec.spatialLayers[2].maxBitrate = 418; + + SvcRateAllocator allocator(codec); + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(1'000'000, 30)); + + // Expect bitrates for 2 temporal layers. + EXPECT_TRUE(allocation.HasBitrate(1, /*temporal_index=*/0)); + EXPECT_TRUE(allocation.HasBitrate(1, /*temporal_index=*/1)); + EXPECT_FALSE(allocation.HasBitrate(1, /*temporal_index=*/2)); + + // expect codec.spatialLayers[2].active is ignored because scability mode uses + // just 2 spatial layers. + EXPECT_EQ(allocation.GetSpatialLayerSum(2), 0u); +} + +class SvcRateAllocatorTestParametrizedContentType + : public ::testing::Test, + public ::testing::WithParamInterface<bool> { + public: + SvcRateAllocatorTestParametrizedContentType() + : is_screen_sharing_(GetParam()) {} + + const bool is_screen_sharing_; +}; + +TEST_P(SvcRateAllocatorTestParametrizedContentType, MaxBitrate) { + VideoCodec codec = Configure(1280, 720, 3, 1, is_screen_sharing_); + EXPECT_EQ(SvcRateAllocator::GetMaxBitrate(codec), + DataRate::KilobitsPerSec(codec.spatialLayers[0].maxBitrate + + codec.spatialLayers[1].maxBitrate + + codec.spatialLayers[2].maxBitrate)); + + // Deactivate middle layer. This causes deactivation of top layer as well. + codec.spatialLayers[1].active = false; + EXPECT_EQ(SvcRateAllocator::GetMaxBitrate(codec), + DataRate::KilobitsPerSec(codec.spatialLayers[0].maxBitrate)); +} + +TEST_P(SvcRateAllocatorTestParametrizedContentType, PaddingBitrate) { + VideoCodec codec = Configure(1280, 720, 3, 1, is_screen_sharing_); + SvcRateAllocator allocator = SvcRateAllocator(codec); + + DataRate padding_bitrate = SvcRateAllocator::GetPaddingBitrate(codec); + + VideoBitrateAllocation allocation = + allocator.Allocate(VideoBitrateAllocationParameters(padding_bitrate, 30)); + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0UL); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0UL); + EXPECT_GT(allocation.GetSpatialLayerSum(2), 0UL); + + // Allocate 90% of padding bitrate. Top layer should be disabled. + allocation = allocator.Allocate( + VideoBitrateAllocationParameters(9 * padding_bitrate / 10, 30)); + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0UL); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0UL); + EXPECT_EQ(allocation.GetSpatialLayerSum(2), 0UL); + + // Deactivate top layer. + codec.spatialLayers[2].active = false; + + padding_bitrate = SvcRateAllocator::GetPaddingBitrate(codec); + allocation = + allocator.Allocate(VideoBitrateAllocationParameters(padding_bitrate, 30)); + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0UL); + EXPECT_GT(allocation.GetSpatialLayerSum(1), 0UL); + EXPECT_EQ(allocation.GetSpatialLayerSum(2), 0UL); + + allocation = allocator.Allocate( + VideoBitrateAllocationParameters(9 * padding_bitrate / 10, 30)); + EXPECT_GT(allocation.GetSpatialLayerSum(0), 0UL); + EXPECT_EQ(allocation.GetSpatialLayerSum(1), 0UL); + EXPECT_EQ(allocation.GetSpatialLayerSum(2), 0UL); + + // Deactivate all layers. + codec.spatialLayers[0].active = false; + codec.spatialLayers[1].active = false; + codec.spatialLayers[2].active = false; + + padding_bitrate = SvcRateAllocator::GetPaddingBitrate(codec); + // No padding expected. + EXPECT_EQ(DataRate::Zero(), padding_bitrate); +} + +TEST_P(SvcRateAllocatorTestParametrizedContentType, StableBitrate) { + ScopedFieldTrials field_trial( + "WebRTC-StableTargetRate/enabled:true,video_hysteresis_factor:1.0," + "screenshare_hysteresis_factor:1.0/"); + + const VideoCodec codec = Configure(1280, 720, 3, 1, is_screen_sharing_); + const auto start_rates = SvcRateAllocator::GetLayerStartBitrates(codec); + const DataRate min_rate_two_layers = start_rates[1]; + const DataRate min_rate_three_layers = start_rates[2]; + + const DataRate max_rate_one_layer = + DataRate::KilobitsPerSec(codec.spatialLayers[0].maxBitrate); + const DataRate max_rate_two_layers = + is_screen_sharing_ + ? DataRate::KilobitsPerSec(codec.spatialLayers[0].targetBitrate + + codec.spatialLayers[1].maxBitrate) + : DataRate::KilobitsPerSec(codec.spatialLayers[0].maxBitrate + + codec.spatialLayers[1].maxBitrate); + + SvcRateAllocator allocator = SvcRateAllocator(codec); + + // Two layers, stable and target equal. + auto allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/min_rate_two_layers, + /*stable_bitrate=*/min_rate_two_layers, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_EQ(allocation.get_sum_bps(), min_rate_two_layers.bps()); + + // Two layers, stable bitrate too low for two layers. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/min_rate_two_layers, + /*stable_bitrate=*/min_rate_two_layers - DataRate::BitsPerSec(1), + /*fps=*/30.0)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(1)); + EXPECT_EQ(DataRate::BitsPerSec(allocation.get_sum_bps()), + std::min(min_rate_two_layers - DataRate::BitsPerSec(1), + max_rate_one_layer)); + + // Three layers, stable and target equal. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/min_rate_three_layers, + /*stable_bitrate=*/min_rate_three_layers, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(2)); + EXPECT_EQ(allocation.get_sum_bps(), min_rate_three_layers.bps()); + + // Three layers, stable bitrate too low for three layers. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/min_rate_three_layers, + /*stable_bitrate=*/min_rate_three_layers - DataRate::BitsPerSec(1), + /*fps=*/30.0)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + EXPECT_EQ(DataRate::BitsPerSec(allocation.get_sum_bps()), + std::min(min_rate_three_layers - DataRate::BitsPerSec(1), + max_rate_two_layers)); +} + +TEST_P(SvcRateAllocatorTestParametrizedContentType, + StableBitrateWithHysteresis) { + const VideoCodec codec = Configure(1280, 720, 3, 1, is_screen_sharing_); + const auto start_rates = SvcRateAllocator::GetLayerStartBitrates(codec); + const DataRate min_rate_single_layer = start_rates[0]; + const DataRate min_rate_two_layers = start_rates[1]; + const DataRate min_rate_three_layers = start_rates[2]; + + ScopedFieldTrials field_trial( + "WebRTC-StableTargetRate/enabled:true,video_hysteresis_factor:1.1," + "screenshare_hysteresis_factor:1.1/"); + SvcRateAllocator allocator = SvcRateAllocator(codec); + // Always use max bitrate as target, verify only stable is used for layer + // count selection. + const DataRate max_bitrate = allocator.GetMaxBitrate(codec); + + // Start with a single layer. + auto allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_single_layer, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + + // Min bitrate not enough to enable second layer due to 10% hysteresis. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_two_layers, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + + // Add hysteresis, second layer should turn on. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_two_layers * 1.1, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + + // Remove hysteresis, second layer should stay on. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_two_layers, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + + // Going below min for two layers, second layer should turn off again. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_two_layers - DataRate::BitsPerSec(1), + /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + + // Min bitrate not enough to enable third layer due to 10% hysteresis. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_three_layers, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); + + // Add hysteresis, third layer should turn on. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_three_layers * 1.1, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(2)); + + // Remove hysteresis, third layer should stay on. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_three_layers, /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(2)); + + // Going below min for three layers, third layer should turn off again. + allocation = allocator.Allocate(VideoBitrateAllocationParameters( + /*total_bitrate=*/max_bitrate, + /*stable_bitrate=*/min_rate_three_layers - DataRate::BitsPerSec(1), + /*fps=*/30.0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(0)); + EXPECT_TRUE(allocation.IsSpatialLayerUsed(1)); + EXPECT_FALSE(allocation.IsSpatialLayerUsed(2)); +} + +INSTANTIATE_TEST_SUITE_P(_, + SvcRateAllocatorTestParametrizedContentType, + ::testing::Bool()); + +} // namespace test +} // namespace webrtc |