diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/common_video | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/common_video')
64 files changed, 10632 insertions, 0 deletions
diff --git a/third_party/libwebrtc/common_video/BUILD.gn b/third_party/libwebrtc/common_video/BUILD.gn new file mode 100644 index 0000000000..4736aa5fb6 --- /dev/null +++ b/third_party/libwebrtc/common_video/BUILD.gn @@ -0,0 +1,182 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../webrtc.gni") + +rtc_library("common_video") { + visibility = [ "*" ] + + sources = [ + "bitrate_adjuster.cc", + "frame_rate_estimator.cc", + "frame_rate_estimator.h", + "framerate_controller.cc", + "framerate_controller.h", + "h264/h264_bitstream_parser.cc", + "h264/h264_bitstream_parser.h", + "h264/h264_common.cc", + "h264/h264_common.h", + "h264/pps_parser.cc", + "h264/pps_parser.h", + "h264/sps_parser.cc", + "h264/sps_parser.h", + "h264/sps_vui_rewriter.cc", + "h264/sps_vui_rewriter.h", + "include/bitrate_adjuster.h", + "include/quality_limitation_reason.h", + "include/video_frame_buffer.h", + "include/video_frame_buffer_pool.h", + "libyuv/include/webrtc_libyuv.h", + "libyuv/webrtc_libyuv.cc", + "video_frame_buffer.cc", + "video_frame_buffer_pool.cc", + ] + + if (rtc_use_h265) { + sources += [ + "h265/h265_bitstream_parser.cc", + "h265/h265_bitstream_parser.h", + "h265/h265_common.cc", + "h265/h265_common.h", + "h265/h265_inline.cc", + "h265/h265_inline.h", + "h265/h265_pps_parser.cc", + "h265/h265_pps_parser.h", + "h265/h265_sps_parser.cc", + "h265/h265_sps_parser.h", + "h265/h265_vps_parser.cc", + "h265/h265_vps_parser.h", + ] + } + + deps = [ + "../api:array_view", + "../api:make_ref_counted", + "../api:scoped_refptr", + "../api:sequence_checker", + "../api/task_queue", + "../api/units:time_delta", + "../api/units:timestamp", + "../api/video:encoded_image", + "../api/video:video_bitrate_allocation", + "../api/video:video_bitrate_allocator", + "../api/video:video_frame", + "../api/video:video_frame_i010", + "../api/video:video_rtp_headers", + "../api/video_codecs:bitstream_parser_api", + "../api/video_codecs:video_codecs_api", + "../rtc_base:bit_buffer", + "../rtc_base:bitstream_reader", + "../rtc_base:buffer", + "../rtc_base:checks", + "../rtc_base:event_tracer", + "../rtc_base:logging", + "../rtc_base:macromagic", + "../rtc_base:race_checker", + "../rtc_base:rate_statistics", + "../rtc_base:refcount", + "../rtc_base:rtc_task_queue", + "../rtc_base:safe_minmax", + "../rtc_base:timeutils", + "../rtc_base/synchronization:mutex", + "../rtc_base/system:rtc_export", + "../system_wrappers:metrics", + "//third_party/libyuv", + ] + if (rtc_use_h265) { + deps += [ + "../rtc_base:compile_assert_c", + "../rtc_base/containers:flat_map", + ] + } + absl_deps = [ + "//third_party/abseil-cpp/absl/numeric:bits", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_source_set("frame_counts") { + visibility = [ "*" ] + + sources = [ "frame_counts.h" ] +} + +if (rtc_include_tests && !build_with_chromium) { + common_video_resources = [ "../resources/foreman_cif.yuv" ] + + if (is_ios) { + bundle_data("common_video_unittests_bundle_data") { + testonly = true + sources = common_video_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_test("common_video_unittests") { + testonly = true + + sources = [ + "bitrate_adjuster_unittest.cc", + "frame_rate_estimator_unittest.cc", + "framerate_controller_unittest.cc", + "h264/h264_bitstream_parser_unittest.cc", + "h264/pps_parser_unittest.cc", + "h264/sps_parser_unittest.cc", + "h264/sps_vui_rewriter_unittest.cc", + "libyuv/libyuv_unittest.cc", + "video_frame_buffer_pool_unittest.cc", + "video_frame_unittest.cc", + ] + + if (rtc_use_h265) { + sources += [ + "h265/h265_bitstream_parser_unittest.cc", + "h265/h265_pps_parser_unittest.cc", + "h265/h265_sps_parser_unittest.cc", + "h265/h265_vps_parser_unittest.cc", + ] + } + + deps = [ + ":common_video", + "../api:scoped_refptr", + "../api/units:time_delta", + "../api/video:video_frame", + "../api/video:video_frame_i010", + "../api/video:video_rtp_headers", + "../api/video_codecs:video_codecs_api", + "../rtc_base:bit_buffer", + "../rtc_base:buffer", + "../rtc_base:checks", + "../rtc_base:logging", + "../rtc_base:macromagic", + "../rtc_base:rtc_base_tests_utils", + "../rtc_base:timeutils", + "../system_wrappers:system_wrappers", + "../test:fileutils", + "../test:frame_utils", + "../test:test_main", + "../test:test_support", + "../test:video_test_common", + "//testing/gtest", + "//third_party/libyuv", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + + data = common_video_resources + if (is_android) { + deps += [ "//testing/android/native_test:native_test_support" ] + shard_timeout = 900 + } + + if (is_ios) { + deps += [ ":common_video_unittests_bundle_data" ] + } + } +} diff --git a/third_party/libwebrtc/common_video/DEPS b/third_party/libwebrtc/common_video/DEPS new file mode 100644 index 0000000000..6a97a5ea52 --- /dev/null +++ b/third_party/libwebrtc/common_video/DEPS @@ -0,0 +1,7 @@ +include_rules = [ + "+media/base", + "+system_wrappers", + # TODO(magjed): This is temporary, remove once external clients are updated. + "+sdk/objc", + "+third_party/libyuv", +] diff --git a/third_party/libwebrtc/common_video/OWNERS b/third_party/libwebrtc/common_video/OWNERS new file mode 100644 index 0000000000..455e247d90 --- /dev/null +++ b/third_party/libwebrtc/common_video/OWNERS @@ -0,0 +1,4 @@ +magjed@webrtc.org +marpan@webrtc.org +sprang@webrtc.org +stefan@webrtc.org diff --git a/third_party/libwebrtc/common_video/bitrate_adjuster.cc b/third_party/libwebrtc/common_video/bitrate_adjuster.cc new file mode 100644 index 0000000000..c53c3a02f6 --- /dev/null +++ b/third_party/libwebrtc/common_video/bitrate_adjuster.cc @@ -0,0 +1,159 @@ +/* + * Copyright 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/include/bitrate_adjuster.h" + +#include <algorithm> +#include <cmath> + +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +// Update bitrate at most once every second. +const uint32_t BitrateAdjuster::kBitrateUpdateIntervalMs = 1000; + +// Update bitrate at most once every 30 frames. +const uint32_t BitrateAdjuster::kBitrateUpdateFrameInterval = 30; + +// 10 percent of original. +const float BitrateAdjuster::kBitrateTolerancePct = .1f; + +const float BitrateAdjuster::kBytesPerMsToBitsPerSecond = 8 * 1000; + +BitrateAdjuster::BitrateAdjuster(float min_adjusted_bitrate_pct, + float max_adjusted_bitrate_pct) + : min_adjusted_bitrate_pct_(min_adjusted_bitrate_pct), + max_adjusted_bitrate_pct_(max_adjusted_bitrate_pct), + bitrate_tracker_(1.5 * kBitrateUpdateIntervalMs, + kBytesPerMsToBitsPerSecond) { + Reset(); +} + +void BitrateAdjuster::SetTargetBitrateBps(uint32_t bitrate_bps) { + MutexLock lock(&mutex_); + // If the change in target bitrate is large, update the adjusted bitrate + // immediately since it's likely we have gained or lost a sizeable amount of + // bandwidth and we'll want to respond quickly. + // If the change in target bitrate fits within the existing tolerance of + // encoder output, wait for the next adjustment time to preserve + // existing penalties and not forcibly reset the adjusted bitrate to target. + // However, if we received many small deltas within an update time + // window and one of them exceeds the tolerance when compared to the last + // target we updated against, treat it as a large change in target bitrate. + if (!IsWithinTolerance(bitrate_bps, target_bitrate_bps_) || + !IsWithinTolerance(bitrate_bps, last_adjusted_target_bitrate_bps_)) { + adjusted_bitrate_bps_ = bitrate_bps; + last_adjusted_target_bitrate_bps_ = bitrate_bps; + } + target_bitrate_bps_ = bitrate_bps; +} + +uint32_t BitrateAdjuster::GetTargetBitrateBps() const { + MutexLock lock(&mutex_); + return target_bitrate_bps_; +} + +uint32_t BitrateAdjuster::GetAdjustedBitrateBps() const { + MutexLock lock(&mutex_); + return adjusted_bitrate_bps_; +} + +absl::optional<uint32_t> BitrateAdjuster::GetEstimatedBitrateBps() { + MutexLock lock(&mutex_); + return bitrate_tracker_.Rate(rtc::TimeMillis()); +} + +void BitrateAdjuster::Update(size_t frame_size) { + MutexLock lock(&mutex_); + uint32_t current_time_ms = rtc::TimeMillis(); + bitrate_tracker_.Update(frame_size, current_time_ms); + UpdateBitrate(current_time_ms); +} + +bool BitrateAdjuster::IsWithinTolerance(uint32_t bitrate_bps, + uint32_t target_bitrate_bps) { + if (target_bitrate_bps == 0) { + return false; + } + float delta = std::abs(static_cast<float>(bitrate_bps) - + static_cast<float>(target_bitrate_bps)); + float delta_pct = delta / target_bitrate_bps; + return delta_pct < kBitrateTolerancePct; +} + +uint32_t BitrateAdjuster::GetMinAdjustedBitrateBps() const { + return min_adjusted_bitrate_pct_ * target_bitrate_bps_; +} + +uint32_t BitrateAdjuster::GetMaxAdjustedBitrateBps() const { + return max_adjusted_bitrate_pct_ * target_bitrate_bps_; +} + +// Only safe to call this after Update calls have stopped +void BitrateAdjuster::Reset() { + MutexLock lock(&mutex_); + target_bitrate_bps_ = 0; + adjusted_bitrate_bps_ = 0; + last_adjusted_target_bitrate_bps_ = 0; + last_bitrate_update_time_ms_ = 0; + frames_since_last_update_ = 0; + bitrate_tracker_.Reset(); +} + +void BitrateAdjuster::UpdateBitrate(uint32_t current_time_ms) { + uint32_t time_since_last_update_ms = + current_time_ms - last_bitrate_update_time_ms_; + // Don't attempt to update bitrate unless enough time and frames have passed. + ++frames_since_last_update_; + if (time_since_last_update_ms < kBitrateUpdateIntervalMs || + frames_since_last_update_ < kBitrateUpdateFrameInterval) { + return; + } + float target_bitrate_bps = target_bitrate_bps_; + float estimated_bitrate_bps = + bitrate_tracker_.Rate(current_time_ms).value_or(target_bitrate_bps); + float error = target_bitrate_bps - estimated_bitrate_bps; + + // Adjust if we've overshot by any amount or if we've undershot too much. + if (estimated_bitrate_bps > target_bitrate_bps || + error > kBitrateTolerancePct * target_bitrate_bps) { + // Adjust the bitrate by a fraction of the error. + float adjustment = .5 * error; + float adjusted_bitrate_bps = target_bitrate_bps + adjustment; + + // Clamp the adjustment. + float min_bitrate_bps = GetMinAdjustedBitrateBps(); + float max_bitrate_bps = GetMaxAdjustedBitrateBps(); + adjusted_bitrate_bps = std::max(adjusted_bitrate_bps, min_bitrate_bps); + adjusted_bitrate_bps = std::min(adjusted_bitrate_bps, max_bitrate_bps); + + // Set the adjustment if it's not already set. + float last_adjusted_bitrate_bps = adjusted_bitrate_bps_; + if (adjusted_bitrate_bps != last_adjusted_bitrate_bps) { + RTC_LOG(LS_VERBOSE) << "Adjusting encoder bitrate:" + "\n target_bitrate:" + << static_cast<uint32_t>(target_bitrate_bps) + << "\n estimated_bitrate:" + << static_cast<uint32_t>(estimated_bitrate_bps) + << "\n last_adjusted_bitrate:" + << static_cast<uint32_t>(last_adjusted_bitrate_bps) + << "\n adjusted_bitrate:" + << static_cast<uint32_t>(adjusted_bitrate_bps); + adjusted_bitrate_bps_ = adjusted_bitrate_bps; + } + } + last_bitrate_update_time_ms_ = current_time_ms; + frames_since_last_update_ = 0; + last_adjusted_target_bitrate_bps_ = target_bitrate_bps_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/bitrate_adjuster_unittest.cc b/third_party/libwebrtc/common_video/bitrate_adjuster_unittest.cc new file mode 100644 index 0000000000..995aac1c27 --- /dev/null +++ b/third_party/libwebrtc/common_video/bitrate_adjuster_unittest.cc @@ -0,0 +1,169 @@ +/* + * Copyright 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/include/bitrate_adjuster.h" + +#include "api/units/time_delta.h" +#include "rtc_base/fake_clock.h" +#include "test/gtest.h" + +namespace webrtc { + +class BitrateAdjusterTest : public ::testing::Test { + public: + BitrateAdjusterTest() + : adjuster_(kMinAdjustedBitratePct, kMaxAdjustedBitratePct) {} + + // Simulate an output bitrate for one update cycle of BitrateAdjuster. + void SimulateBitrateBps(uint32_t bitrate_bps) { + const uint32_t update_interval_ms = + BitrateAdjuster::kBitrateUpdateIntervalMs; + const uint32_t update_frame_interval = + BitrateAdjuster::kBitrateUpdateFrameInterval; + // Round up frame interval so we get one cycle passes. + const uint32_t frame_interval_ms = + (update_interval_ms + update_frame_interval - 1) / + update_frame_interval; + const size_t frame_size_bytes = + (bitrate_bps * frame_interval_ms) / (8 * 1000); + for (size_t i = 0; i < update_frame_interval; ++i) { + clock_.AdvanceTime(webrtc::TimeDelta::Millis(frame_interval_ms)); + adjuster_.Update(frame_size_bytes); + } + } + + uint32_t GetTargetBitrateBpsPct(float pct) { + return pct * adjuster_.GetTargetBitrateBps(); + } + + void VerifyAdjustment() { + // The adjusted bitrate should be between the estimated bitrate and the + // target bitrate within clamp. + uint32_t target_bitrate_bps = adjuster_.GetTargetBitrateBps(); + uint32_t adjusted_bitrate_bps = adjuster_.GetAdjustedBitrateBps(); + uint32_t estimated_bitrate_bps = + adjuster_.GetEstimatedBitrateBps().value_or(target_bitrate_bps); + uint32_t adjusted_lower_bound_bps = + GetTargetBitrateBpsPct(kMinAdjustedBitratePct); + uint32_t adjusted_upper_bound_bps = + GetTargetBitrateBpsPct(kMaxAdjustedBitratePct); + EXPECT_LE(adjusted_bitrate_bps, adjusted_upper_bound_bps); + EXPECT_GE(adjusted_bitrate_bps, adjusted_lower_bound_bps); + if (estimated_bitrate_bps > target_bitrate_bps) { + EXPECT_LT(adjusted_bitrate_bps, target_bitrate_bps); + } + } + + protected: + static const float kMinAdjustedBitratePct; + static const float kMaxAdjustedBitratePct; + rtc::ScopedFakeClock clock_; + BitrateAdjuster adjuster_; +}; + +const float BitrateAdjusterTest::kMinAdjustedBitratePct = .5f; +const float BitrateAdjusterTest::kMaxAdjustedBitratePct = .95f; + +TEST_F(BitrateAdjusterTest, VaryingBitrates) { + const uint32_t target_bitrate_bps = 640000; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + + // Grossly overshoot for a little while. Adjusted bitrate should decrease. + uint32_t actual_bitrate_bps = 2 * target_bitrate_bps; + uint32_t last_adjusted_bitrate_bps = 0; + uint32_t adjusted_bitrate_bps = 0; + + SimulateBitrateBps(actual_bitrate_bps); + VerifyAdjustment(); + last_adjusted_bitrate_bps = adjuster_.GetAdjustedBitrateBps(); + + SimulateBitrateBps(actual_bitrate_bps); + VerifyAdjustment(); + adjusted_bitrate_bps = adjuster_.GetAdjustedBitrateBps(); + EXPECT_LE(adjusted_bitrate_bps, last_adjusted_bitrate_bps); + last_adjusted_bitrate_bps = adjusted_bitrate_bps; + // After two cycles we should've stabilized and hit the lower bound. + EXPECT_EQ(GetTargetBitrateBpsPct(kMinAdjustedBitratePct), + adjusted_bitrate_bps); + + // Simulate encoder settling down. Adjusted bitrate should increase. + SimulateBitrateBps(target_bitrate_bps); + adjusted_bitrate_bps = adjuster_.GetAdjustedBitrateBps(); + VerifyAdjustment(); + EXPECT_GT(adjusted_bitrate_bps, last_adjusted_bitrate_bps); + last_adjusted_bitrate_bps = adjusted_bitrate_bps; + + SimulateBitrateBps(target_bitrate_bps); + adjusted_bitrate_bps = adjuster_.GetAdjustedBitrateBps(); + VerifyAdjustment(); + EXPECT_GT(adjusted_bitrate_bps, last_adjusted_bitrate_bps); + last_adjusted_bitrate_bps = adjusted_bitrate_bps; + // After two cycles we should've stabilized and hit the upper bound. + EXPECT_EQ(GetTargetBitrateBpsPct(kMaxAdjustedBitratePct), + adjusted_bitrate_bps); +} + +// Tests that large changes in target bitrate will result in immediate change +// in adjusted bitrate. +TEST_F(BitrateAdjusterTest, LargeTargetDelta) { + uint32_t target_bitrate_bps = 640000; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); + + float delta_pct = BitrateAdjuster::kBitrateTolerancePct * 2; + + target_bitrate_bps = (1 + delta_pct) * target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); + + target_bitrate_bps = (1 - delta_pct) * target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); +} + +// Tests that small changes in target bitrate within tolerance will not affect +// adjusted bitrate immediately. +TEST_F(BitrateAdjusterTest, SmallTargetDelta) { + const uint32_t initial_target_bitrate_bps = 640000; + uint32_t target_bitrate_bps = initial_target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(initial_target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); + + float delta_pct = BitrateAdjuster::kBitrateTolerancePct / 2; + + target_bitrate_bps = (1 + delta_pct) * target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(initial_target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); + + target_bitrate_bps = (1 - delta_pct) * target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(initial_target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); +} + +TEST_F(BitrateAdjusterTest, SmallTargetDeltaOverflow) { + const uint32_t initial_target_bitrate_bps = 640000; + uint32_t target_bitrate_bps = initial_target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(initial_target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); + + float delta_pct = BitrateAdjuster::kBitrateTolerancePct / 2; + + target_bitrate_bps = (1 + delta_pct) * target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(initial_target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); + + // 1.05 * 1.05 is 1.1 which is greater than tolerance for the initial target + // bitrate. Since we didn't advance the clock the adjuster never updated. + target_bitrate_bps = (1 + delta_pct) * target_bitrate_bps; + adjuster_.SetTargetBitrateBps(target_bitrate_bps); + EXPECT_EQ(target_bitrate_bps, adjuster_.GetAdjustedBitrateBps()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/common_video_gn/moz.build b/third_party/libwebrtc/common_video/common_video_gn/moz.build new file mode 100644 index 0000000000..a767c9c765 --- /dev/null +++ b/third_party/libwebrtc/common_video/common_video_gn/moz.build @@ -0,0 +1,244 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/media/libyuv/", + "/media/libyuv/libyuv/include/", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/common_video/bitrate_adjuster.cc", + "/third_party/libwebrtc/common_video/frame_rate_estimator.cc", + "/third_party/libwebrtc/common_video/framerate_controller.cc", + "/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc", + "/third_party/libwebrtc/common_video/h264/h264_common.cc", + "/third_party/libwebrtc/common_video/h264/pps_parser.cc", + "/third_party/libwebrtc/common_video/h264/sps_parser.cc", + "/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc", + "/third_party/libwebrtc/common_video/libyuv/webrtc_libyuv.cc", + "/third_party/libwebrtc/common_video/video_frame_buffer.cc", + "/third_party/libwebrtc/common_video/video_frame_buffer_pool.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["RTC_ENABLE_WIN_WGC"] = True + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["_GNU_SOURCE"] = True + +Library("common_video_gn") diff --git a/third_party/libwebrtc/common_video/frame_counts.h b/third_party/libwebrtc/common_video/frame_counts.h new file mode 100644 index 0000000000..505d3129ef --- /dev/null +++ b/third_party/libwebrtc/common_video/frame_counts.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_FRAME_COUNTS_H_ +#define COMMON_VIDEO_FRAME_COUNTS_H_ + +#include <cstdint> + +namespace webrtc { + +struct FrameCounts { + FrameCounts() : key_frames(0), delta_frames(0) {} + int key_frames; + int delta_frames; +}; + +// Callback, used to notify an observer whenever frame counts have been updated. +class FrameCountObserver { + public: + virtual ~FrameCountObserver() {} + virtual void FrameCountUpdated(const FrameCounts& frame_counts, + uint32_t ssrc) = 0; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_FRAME_COUNTS_H_ diff --git a/third_party/libwebrtc/common_video/frame_counts_gn/moz.build b/third_party/libwebrtc/common_video/frame_counts_gn/moz.build new file mode 100644 index 0000000000..0ccbf9ac76 --- /dev/null +++ b/third_party/libwebrtc/common_video/frame_counts_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["RTC_ENABLE_WIN_WGC"] = True + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "x86": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["_GNU_SOURCE"] = True + +Library("frame_counts_gn") diff --git a/third_party/libwebrtc/common_video/frame_rate_estimator.cc b/third_party/libwebrtc/common_video/frame_rate_estimator.cc new file mode 100644 index 0000000000..4c5a341ac0 --- /dev/null +++ b/third_party/libwebrtc/common_video/frame_rate_estimator.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/frame_rate_estimator.h" + +#include "rtc_base/time_utils.h" + +namespace webrtc { + +FrameRateEstimator::FrameRateEstimator(TimeDelta averaging_window) + : averaging_window_(averaging_window) {} + +void FrameRateEstimator::OnFrame(Timestamp time) { + CullOld(time); + frame_times_.push_back(time); +} + +absl::optional<double> FrameRateEstimator::GetAverageFps() const { + if (frame_times_.size() < 2) { + return absl::nullopt; + } + TimeDelta time_span = frame_times_.back() - frame_times_.front(); + if (time_span < TimeDelta::Micros(1)) { + return absl::nullopt; + } + TimeDelta avg_frame_interval = time_span / (frame_times_.size() - 1); + + return static_cast<double>(rtc::kNumMicrosecsPerSec) / + avg_frame_interval.us(); +} + +absl::optional<double> FrameRateEstimator::GetAverageFps(Timestamp now) { + CullOld(now); + return GetAverageFps(); +} + +void FrameRateEstimator::Reset() { + frame_times_.clear(); +} + +void FrameRateEstimator::CullOld(Timestamp now) { + while (!frame_times_.empty() && + frame_times_.front() + averaging_window_ < now) { + frame_times_.pop_front(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/frame_rate_estimator.h b/third_party/libwebrtc/common_video/frame_rate_estimator.h new file mode 100644 index 0000000000..95219a534d --- /dev/null +++ b/third_party/libwebrtc/common_video/frame_rate_estimator.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_FRAME_RATE_ESTIMATOR_H_ +#define COMMON_VIDEO_FRAME_RATE_ESTIMATOR_H_ + +#include <deque> + +#include "absl/types/optional.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" + +namespace webrtc { + +// Class used to estimate a frame-rate using inter-frame intervals. +// Some notes on usage: +// This class is intended to accurately estimate the frame rate during a +// continuous stream. Unlike a traditional rate estimator that looks at number +// of data points within a time window, if the input stops this implementation +// will not smoothly fall down towards 0. This is done so that the estimated +// fps is not affected by edge conditions like if we sample just before or just +// after the next frame. +// To avoid problems if a stream is stopped and restarted (where estimated fps +// could look too low), users of this class should explicitly call Reset() on +// restart. +// Also note that this class is not thread safe, it's up to the user to guard +// against concurrent access. +class FrameRateEstimator { + public: + explicit FrameRateEstimator(TimeDelta averaging_window); + + // Insert a frame, potentially culling old frames that falls outside the + // averaging window. + void OnFrame(Timestamp time); + + // Get the current average FPS, based on the frames currently in the window. + absl::optional<double> GetAverageFps() const; + + // Move the window so it ends at `now`, and return the new fps estimate. + absl::optional<double> GetAverageFps(Timestamp now); + + // Completely clear the averaging window. + void Reset(); + + private: + void CullOld(Timestamp now); + const TimeDelta averaging_window_; + std::deque<Timestamp> frame_times_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_FRAME_RATE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/common_video/frame_rate_estimator_unittest.cc b/third_party/libwebrtc/common_video/frame_rate_estimator_unittest.cc new file mode 100644 index 0000000000..d11a7d4140 --- /dev/null +++ b/third_party/libwebrtc/common_video/frame_rate_estimator_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/frame_rate_estimator.h" + +#include "system_wrappers/include/clock.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr TimeDelta kDefaultWindow = TimeDelta::Millis(1000); +} + +class FrameRateEstimatorTest : public ::testing::Test { + public: + FrameRateEstimatorTest() : clock_(123), estimator_(kDefaultWindow) {} + + protected: + SimulatedClock clock_; + FrameRateEstimator estimator_; +}; + +TEST_F(FrameRateEstimatorTest, NoEstimateWithLessThanTwoFrames) { + EXPECT_FALSE(estimator_.GetAverageFps()); + estimator_.OnFrame(clock_.CurrentTime()); + EXPECT_FALSE(estimator_.GetAverageFps()); + clock_.AdvanceTime(TimeDelta::Millis(33)); + EXPECT_FALSE(estimator_.GetAverageFps()); +} + +TEST_F(FrameRateEstimatorTest, NoEstimateWithZeroSpan) { + // Two frames, but they are spanning 0ms so can't estimate frame rate. + estimator_.OnFrame(clock_.CurrentTime()); + estimator_.OnFrame(clock_.CurrentTime()); + EXPECT_FALSE(estimator_.GetAverageFps()); +} + +TEST_F(FrameRateEstimatorTest, SingleSpanFps) { + const double kExpectedFps = 30.0; + estimator_.OnFrame(clock_.CurrentTime()); + clock_.AdvanceTime(TimeDelta::Seconds(1) / kExpectedFps); + estimator_.OnFrame(clock_.CurrentTime()); + EXPECT_NEAR(*estimator_.GetAverageFps(), kExpectedFps, 0.001); +} + +TEST_F(FrameRateEstimatorTest, AverageFps) { + // Insert frames a intervals corresponding to 10fps for half the window, then + // 40fps half the window. The average should be 20fps. + const double kLowFps = 10.0; + const double kHighFps = 30.0; + const double kExpectedFps = 20.0; + + const Timestamp start_time = clock_.CurrentTime(); + while (clock_.CurrentTime() - start_time < kDefaultWindow / 2) { + estimator_.OnFrame(clock_.CurrentTime()); + clock_.AdvanceTime(TimeDelta::Seconds(1) / kLowFps); + } + while (clock_.CurrentTime() - start_time < kDefaultWindow) { + estimator_.OnFrame(clock_.CurrentTime()); + clock_.AdvanceTime(TimeDelta::Seconds(1) / kHighFps); + } + + EXPECT_NEAR(*estimator_.GetAverageFps(), kExpectedFps, 0.001); +} + +TEST_F(FrameRateEstimatorTest, CullsOldFramesFromAveragingWindow) { + // Two frames, just on the border of the 1s window => 1 fps. + estimator_.OnFrame(clock_.CurrentTime()); + clock_.AdvanceTime(kDefaultWindow); + estimator_.OnFrame(clock_.CurrentTime()); + EXPECT_TRUE(estimator_.GetAverageFps()); + EXPECT_NEAR(*estimator_.GetAverageFps(), 1.0, 0.001); + + // Oldest frame should just be pushed out the window, leaving a single frame + // => no estimate possible. + clock_.AdvanceTime(TimeDelta::Micros(1)); + EXPECT_FALSE(estimator_.GetAverageFps(clock_.CurrentTime())); +} + +TEST_F(FrameRateEstimatorTest, Reset) { + estimator_.OnFrame(clock_.CurrentTime()); + clock_.AdvanceTime(TimeDelta::Seconds(1) / 30); + estimator_.OnFrame(clock_.CurrentTime()); + EXPECT_TRUE(estimator_.GetAverageFps()); + + // Clear estimator, no estimate should be possible even after inserting one + // new frame. + estimator_.Reset(); + EXPECT_FALSE(estimator_.GetAverageFps()); + clock_.AdvanceTime(TimeDelta::Seconds(1) / 30); + estimator_.OnFrame(clock_.CurrentTime()); + EXPECT_FALSE(estimator_.GetAverageFps()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/framerate_controller.cc b/third_party/libwebrtc/common_video/framerate_controller.cc new file mode 100644 index 0000000000..23e9c70cbd --- /dev/null +++ b/third_party/libwebrtc/common_video/framerate_controller.cc @@ -0,0 +1,88 @@ +/* + * Copyright 2021 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/framerate_controller.h" + +#include <limits> + +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace { +constexpr double kMinFramerate = 0.5; +} // namespace + +FramerateController::FramerateController() + : FramerateController(std::numeric_limits<double>::max()) {} + +FramerateController::FramerateController(double max_framerate) + : max_framerate_(max_framerate) {} + +FramerateController::~FramerateController() {} + +void FramerateController::SetMaxFramerate(double max_framerate) { + max_framerate_ = max_framerate; +} + +double FramerateController::GetMaxFramerate() const { + return max_framerate_; +} + +bool FramerateController::ShouldDropFrame(int64_t in_timestamp_ns) { + if (max_framerate_ < kMinFramerate) + return true; + + // If `max_framerate_` is not set (i.e. maxdouble), `frame_interval_ns` is + // rounded to 0. + int64_t frame_interval_ns = rtc::kNumNanosecsPerSec / max_framerate_; + if (frame_interval_ns <= 0) { + // Frame rate throttling not enabled. + return false; + } + + if (next_frame_timestamp_ns_) { + // Time until next frame should be outputted. + const int64_t time_until_next_frame_ns = + (*next_frame_timestamp_ns_ - in_timestamp_ns); + // Continue if timestamp is within expected range. + if (std::abs(time_until_next_frame_ns) < 2 * frame_interval_ns) { + // Drop if a frame shouldn't be outputted yet. + if (time_until_next_frame_ns > 0) + return true; + // Time to output new frame. + *next_frame_timestamp_ns_ += frame_interval_ns; + return false; + } + } + + // First timestamp received or timestamp is way outside expected range, so + // reset. Set first timestamp target to just half the interval to prefer + // keeping frames in case of jitter. + next_frame_timestamp_ns_ = in_timestamp_ns + frame_interval_ns / 2; + return false; +} + +void FramerateController::Reset() { + max_framerate_ = std::numeric_limits<double>::max(); + next_frame_timestamp_ns_ = absl::nullopt; +} + +void FramerateController::KeepFrame(int64_t in_timestamp_ns) { + if (ShouldDropFrame(in_timestamp_ns)) { + if (max_framerate_ < kMinFramerate) + return; + + int64_t frame_interval_ns = rtc::kNumNanosecsPerSec / max_framerate_; + if (next_frame_timestamp_ns_) + *next_frame_timestamp_ns_ += frame_interval_ns; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/framerate_controller.h b/third_party/libwebrtc/common_video/framerate_controller.h new file mode 100644 index 0000000000..371ffd419f --- /dev/null +++ b/third_party/libwebrtc/common_video/framerate_controller.h @@ -0,0 +1,46 @@ +/* + * Copyright 2021 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_FRAMERATE_CONTROLLER_H_ +#define COMMON_VIDEO_FRAMERATE_CONTROLLER_H_ + +#include <stdint.h> + +#include "absl/types/optional.h" + +namespace webrtc { + +// Determines which frames that should be dropped based on input framerate and +// requested framerate. +class FramerateController { + public: + FramerateController(); + explicit FramerateController(double max_framerate); + ~FramerateController(); + + // Sets max framerate (default is maxdouble). + void SetMaxFramerate(double max_framerate); + double GetMaxFramerate() const; + + // Returns true if the frame should be dropped, false otherwise. + bool ShouldDropFrame(int64_t in_timestamp_ns); + + void Reset(); + + void KeepFrame(int64_t in_timestamp_ns); + + private: + double max_framerate_; + absl::optional<int64_t> next_frame_timestamp_ns_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_FRAMERATE_CONTROLLER_H_ diff --git a/third_party/libwebrtc/common_video/framerate_controller_unittest.cc b/third_party/libwebrtc/common_video/framerate_controller_unittest.cc new file mode 100644 index 0000000000..690076ca61 --- /dev/null +++ b/third_party/libwebrtc/common_video/framerate_controller_unittest.cc @@ -0,0 +1,162 @@ +/* + * Copyright 2021 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/framerate_controller.h" + +#include <limits> + +#include "rtc_base/time_utils.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kInputFps = 30; +constexpr int kNumFrames = 60; +} // namespace + +class FramerateControllerTest : public ::testing::Test { + protected: + int64_t GetNextTimestampNs() { + int64_t interval_us = rtc::kNumMicrosecsPerSec / kInputFps; + next_timestamp_us_ += interval_us; + return next_timestamp_us_ * rtc::kNumNanosecsPerMicrosec; + } + + int64_t next_timestamp_us_ = rtc::TimeMicros(); + FramerateController controller_; +}; + +TEST_F(FramerateControllerTest, NoFramesDroppedIfNothingRequested) { + // Default max framerate is maxdouble. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_FALSE(controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, AllFramesDroppedIfZeroRequested) { + controller_.SetMaxFramerate(0); + + for (int i = 1; i < kNumFrames; ++i) + EXPECT_TRUE(controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, AllFramesDroppedIfNegativeRequested) { + controller_.SetMaxFramerate(-1); + + for (int i = 1; i < kNumFrames; ++i) + EXPECT_TRUE(controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, EverySecondFrameDroppedIfHalfRequested) { + controller_.SetMaxFramerate(kInputFps / 2); + + // The first frame should not be dropped. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_EQ(i % 2 == 0, controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, EveryThirdFrameDroppedIfTwoThirdsRequested) { + controller_.SetMaxFramerate(kInputFps * 2 / 3); + + // The first frame should not be dropped. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_EQ(i % 3 == 0, controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, NoFrameDroppedIfTwiceRequested) { + controller_.SetMaxFramerate(kInputFps * 2); + + for (int i = 1; i < kNumFrames; ++i) + EXPECT_FALSE(controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, TestAverageFramerate) { + const double kMaxFps = 18.2; + controller_.SetMaxFramerate(kMaxFps); + + const int kNumSec = 10; + int frames_kept = 0; + for (int i = 0; i < kInputFps * kNumSec; ++i) { + if (!controller_.ShouldDropFrame(GetNextTimestampNs())) + ++frames_kept; + } + double average_fps = static_cast<double>(frames_kept) / kNumSec; + EXPECT_NEAR(kMaxFps, average_fps, 0.01); +} + +TEST_F(FramerateControllerTest, NoFrameDroppedForLargeTimestampOffset) { + controller_.SetMaxFramerate(kInputFps); + EXPECT_FALSE(controller_.ShouldDropFrame(0)); + + const int64_t kLargeOffsetNs = -987654321LL * 1000; + EXPECT_FALSE(controller_.ShouldDropFrame(kLargeOffsetNs)); + + int64_t input_interval_ns = rtc::kNumNanosecsPerSec / kInputFps; + EXPECT_FALSE(controller_.ShouldDropFrame(kLargeOffsetNs + input_interval_ns)); +} + +TEST_F(FramerateControllerTest, NoFrameDroppedIfInputWithJitterRequested) { + controller_.SetMaxFramerate(kInputFps); + + // Input fps with jitter. + int64_t input_interval_ns = rtc::kNumNanosecsPerSec / kInputFps; + EXPECT_FALSE(controller_.ShouldDropFrame(input_interval_ns * 0 / 10)); + EXPECT_FALSE(controller_.ShouldDropFrame(input_interval_ns * 10 / 10 - 1)); + EXPECT_FALSE(controller_.ShouldDropFrame(input_interval_ns * 25 / 10)); + EXPECT_FALSE(controller_.ShouldDropFrame(input_interval_ns * 30 / 10)); + EXPECT_FALSE(controller_.ShouldDropFrame(input_interval_ns * 35 / 10)); + EXPECT_FALSE(controller_.ShouldDropFrame(input_interval_ns * 50 / 10)); +} + +TEST_F(FramerateControllerTest, FrameDroppedWhenReductionRequested) { + controller_.SetMaxFramerate(kInputFps); + + // Expect no frame drop. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_FALSE(controller_.ShouldDropFrame(GetNextTimestampNs())); + + // Reduce max frame rate. + controller_.SetMaxFramerate(kInputFps / 2); + + // Verify that every other frame is dropped. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_EQ(i % 2 == 0, controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, NoFramesDroppedAfterReset) { + controller_.SetMaxFramerate(0); + + // All frames dropped. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_TRUE(controller_.ShouldDropFrame(GetNextTimestampNs())); + + controller_.Reset(); + + // Expect no frame drop after reset. + for (int i = 1; i < kNumFrames; ++i) + EXPECT_FALSE(controller_.ShouldDropFrame(GetNextTimestampNs())); +} + +TEST_F(FramerateControllerTest, TestKeepFrame) { + FramerateController controller(kInputFps / 2); + + EXPECT_FALSE(controller.ShouldDropFrame(GetNextTimestampNs())); + EXPECT_TRUE(controller.ShouldDropFrame(GetNextTimestampNs())); + EXPECT_FALSE(controller.ShouldDropFrame(GetNextTimestampNs())); + EXPECT_TRUE(controller.ShouldDropFrame(GetNextTimestampNs())); + EXPECT_FALSE(controller.ShouldDropFrame(GetNextTimestampNs())); + + // Next frame should be dropped. + // Keep this frame (e.g. in case of a key frame). + controller.KeepFrame(GetNextTimestampNs()); + // Expect next frame to be dropped instead. + EXPECT_TRUE(controller.ShouldDropFrame(GetNextTimestampNs())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/generic_frame_descriptor/BUILD.gn b/third_party/libwebrtc/common_video/generic_frame_descriptor/BUILD.gn new file mode 100644 index 0000000000..ab97e887f2 --- /dev/null +++ b/third_party/libwebrtc/common_video/generic_frame_descriptor/BUILD.gn @@ -0,0 +1,28 @@ +# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +rtc_library("generic_frame_descriptor") { + sources = [ + "generic_frame_info.cc", + "generic_frame_info.h", + ] + + deps = [ + "../../api:array_view", + "../../api/transport/rtp:dependency_descriptor", + "../../api/video:video_codec_constants", + "../../rtc_base:checks", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/container:inlined_vector", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} diff --git a/third_party/libwebrtc/common_video/generic_frame_descriptor/OWNERS b/third_party/libwebrtc/common_video/generic_frame_descriptor/OWNERS new file mode 100644 index 0000000000..b2351ef6d7 --- /dev/null +++ b/third_party/libwebrtc/common_video/generic_frame_descriptor/OWNERS @@ -0,0 +1,2 @@ +philipel@webrtc.org +danilchap@webrtc.org diff --git a/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_descriptor_gn/moz.build b/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_descriptor_gn/moz.build new file mode 100644 index 0000000000..7aa4e9bfff --- /dev/null +++ b/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_descriptor_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["RTC_ENABLE_WIN_WGC"] = True + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["_GNU_SOURCE"] = True + +Library("generic_frame_descriptor_gn") diff --git a/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.cc b/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.cc new file mode 100644 index 0000000000..af66bbaf67 --- /dev/null +++ b/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/generic_frame_descriptor/generic_frame_info.h" + +#include <utility> + +#include "rtc_base/checks.h" + +namespace webrtc { + +GenericFrameInfo::GenericFrameInfo() = default; +GenericFrameInfo::GenericFrameInfo(const GenericFrameInfo&) = default; +GenericFrameInfo::~GenericFrameInfo() = default; + +GenericFrameInfo::Builder::Builder() = default; +GenericFrameInfo::Builder::~Builder() = default; + +GenericFrameInfo GenericFrameInfo::Builder::Build() const { + return info_; +} + +GenericFrameInfo::Builder& GenericFrameInfo::Builder::T(int temporal_id) { + info_.temporal_id = temporal_id; + return *this; +} + +GenericFrameInfo::Builder& GenericFrameInfo::Builder::S(int spatial_id) { + info_.spatial_id = spatial_id; + return *this; +} + +GenericFrameInfo::Builder& GenericFrameInfo::Builder::Dtis( + absl::string_view indication_symbols) { + info_.decode_target_indications = + webrtc_impl::StringToDecodeTargetIndications(indication_symbols); + return *this; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.h b/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.h new file mode 100644 index 0000000000..19f413b5d4 --- /dev/null +++ b/third_party/libwebrtc/common_video/generic_frame_descriptor/generic_frame_info.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_GENERIC_FRAME_DESCRIPTOR_GENERIC_FRAME_INFO_H_ +#define COMMON_VIDEO_GENERIC_FRAME_DESCRIPTOR_GENERIC_FRAME_INFO_H_ + +#include <bitset> +#include <initializer_list> +#include <vector> + +#include "absl/container/inlined_vector.h" +#include "absl/strings/string_view.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_codec_constants.h" + +namespace webrtc { + +// Describes how a certain encoder buffer was used when encoding a frame. +struct CodecBufferUsage { + constexpr CodecBufferUsage(int id, bool referenced, bool updated) + : id(id), referenced(referenced), updated(updated) {} + + int id = 0; + bool referenced = false; + bool updated = false; +}; + +struct GenericFrameInfo : public FrameDependencyTemplate { + class Builder; + + GenericFrameInfo(); + GenericFrameInfo(const GenericFrameInfo&); + ~GenericFrameInfo(); + + absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers> encoder_buffers; + std::vector<bool> part_of_chain; + std::bitset<32> active_decode_targets = ~uint32_t{0}; +}; + +class GenericFrameInfo::Builder { + public: + Builder(); + ~Builder(); + + GenericFrameInfo Build() const; + Builder& T(int temporal_id); + Builder& S(int spatial_id); + Builder& Dtis(absl::string_view indication_symbols); + + private: + GenericFrameInfo info_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_GENERIC_FRAME_DESCRIPTOR_GENERIC_FRAME_INFO_H_ diff --git a/third_party/libwebrtc/common_video/h264/OWNERS b/third_party/libwebrtc/common_video/h264/OWNERS new file mode 100644 index 0000000000..361ed7e84a --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/OWNERS @@ -0,0 +1 @@ +ssilkin@webrtc.org diff --git a/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc new file mode 100644 index 0000000000..2311d0d2ee --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/h264/h264_bitstream_parser.h" + +#include <stdlib.h> + +#include <cstdint> +#include <vector> + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +constexpr int kMaxAbsQpDeltaValue = 51; +constexpr int kMinQpValue = 0; +constexpr int kMaxQpValue = 51; + +} // namespace + +H264BitstreamParser::H264BitstreamParser() = default; +H264BitstreamParser::~H264BitstreamParser() = default; + +H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( + const uint8_t* source, + size_t source_length, + uint8_t nalu_type) { + if (!sps_ || !pps_) + return kInvalidStream; + + last_slice_qp_delta_ = absl::nullopt; + const std::vector<uint8_t> slice_rbsp = + H264::ParseRbsp(source, source_length); + if (slice_rbsp.size() < H264::kNaluTypeSize) + return kInvalidStream; + + BitstreamReader slice_reader(slice_rbsp); + slice_reader.ConsumeBits(H264::kNaluTypeSize * 8); + + // Check to see if this is an IDR slice, which has an extra field to parse + // out. + bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr; + uint8_t nal_ref_idc = (source[0] & 0x60) >> 5; + + // first_mb_in_slice: ue(v) + slice_reader.ReadExponentialGolomb(); + // slice_type: ue(v) + uint32_t slice_type = slice_reader.ReadExponentialGolomb(); + // slice_type's 5..9 range is used to indicate that all slices of a picture + // have the same value of slice_type % 5, we don't care about that, so we map + // to the corresponding 0..4 range. + slice_type %= 5; + // pic_parameter_set_id: ue(v) + slice_reader.ReadExponentialGolomb(); + if (sps_->separate_colour_plane_flag == 1) { + // colour_plane_id + slice_reader.ConsumeBits(2); + } + // frame_num: u(v) + // Represented by log2_max_frame_num bits. + slice_reader.ConsumeBits(sps_->log2_max_frame_num); + bool field_pic_flag = false; + if (sps_->frame_mbs_only_flag == 0) { + // field_pic_flag: u(1) + field_pic_flag = slice_reader.Read<bool>(); + if (field_pic_flag) { + // bottom_field_flag: u(1) + slice_reader.ConsumeBits(1); + } + } + if (is_idr) { + // idr_pic_id: ue(v) + slice_reader.ReadExponentialGolomb(); + } + // pic_order_cnt_lsb: u(v) + // Represented by sps_.log2_max_pic_order_cnt_lsb bits. + if (sps_->pic_order_cnt_type == 0) { + slice_reader.ConsumeBits(sps_->log2_max_pic_order_cnt_lsb); + if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { + // delta_pic_order_cnt_bottom: se(v) + slice_reader.ReadExponentialGolomb(); + } + } + if (sps_->pic_order_cnt_type == 1 && + !sps_->delta_pic_order_always_zero_flag) { + // delta_pic_order_cnt[0]: se(v) + slice_reader.ReadExponentialGolomb(); + if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { + // delta_pic_order_cnt[1]: se(v) + slice_reader.ReadExponentialGolomb(); + } + } + if (pps_->redundant_pic_cnt_present_flag) { + // redundant_pic_cnt: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (slice_type == H264::SliceType::kB) { + // direct_spatial_mv_pred_flag: u(1) + slice_reader.ConsumeBits(1); + } + switch (slice_type) { + case H264::SliceType::kP: + case H264::SliceType::kB: + case H264::SliceType::kSp: + // num_ref_idx_active_override_flag: u(1) + if (slice_reader.Read<bool>()) { + // num_ref_idx_l0_active_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + if (slice_type == H264::SliceType::kB) { + // num_ref_idx_l1_active_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } + break; + default: + break; + } + if (!slice_reader.Ok()) { + return kInvalidStream; + } + // assume nal_unit_type != 20 && nal_unit_type != 21: + if (nalu_type == 20 || nalu_type == 21) { + RTC_LOG(LS_ERROR) << "Unsupported nal unit type."; + return kUnsupportedStream; + } + // if (nal_unit_type == 20 || nal_unit_type == 21) + // ref_pic_list_mvc_modification() + // else + { + // ref_pic_list_modification(): + // `slice_type` checks here don't use named constants as they aren't named + // in the spec for this segment. Keeping them consistent makes it easier to + // verify that they are both the same. + if (slice_type % 5 != 2 && slice_type % 5 != 4) { + // ref_pic_list_modification_flag_l0: u(1) + if (slice_reader.Read<bool>()) { + uint32_t modification_of_pic_nums_idc; + do { + // modification_of_pic_nums_idc: ue(v) + modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb(); + if (modification_of_pic_nums_idc == 0 || + modification_of_pic_nums_idc == 1) { + // abs_diff_pic_num_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } else if (modification_of_pic_nums_idc == 2) { + // long_term_pic_num: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok()); + } + } + if (slice_type % 5 == 1) { + // ref_pic_list_modification_flag_l1: u(1) + if (slice_reader.Read<bool>()) { + uint32_t modification_of_pic_nums_idc; + do { + // modification_of_pic_nums_idc: ue(v) + modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb(); + if (modification_of_pic_nums_idc == 0 || + modification_of_pic_nums_idc == 1) { + // abs_diff_pic_num_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } else if (modification_of_pic_nums_idc == 2) { + // long_term_pic_num: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok()); + } + } + } + if (!slice_reader.Ok()) { + return kInvalidStream; + } + // TODO(pbos): Do we need support for pred_weight_table()? + if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP || + slice_type == H264::SliceType::kSp)) || + (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) { + RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; + return kUnsupportedStream; + } + // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) || + // (weighted_bipred_idc == 1 && slice_type == B)) { + // pred_weight_table() + // } + if (nal_ref_idc != 0) { + // dec_ref_pic_marking(): + if (is_idr) { + // no_output_of_prior_pics_flag: u(1) + // long_term_reference_flag: u(1) + slice_reader.ConsumeBits(2); + } else { + // adaptive_ref_pic_marking_mode_flag: u(1) + if (slice_reader.Read<bool>()) { + uint32_t memory_management_control_operation; + do { + // memory_management_control_operation: ue(v) + memory_management_control_operation = + slice_reader.ReadExponentialGolomb(); + if (memory_management_control_operation == 1 || + memory_management_control_operation == 3) { + // difference_of_pic_nums_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (memory_management_control_operation == 2) { + // long_term_pic_num: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (memory_management_control_operation == 3 || + memory_management_control_operation == 6) { + // long_term_frame_idx: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (memory_management_control_operation == 4) { + // max_long_term_frame_idx_plus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } while (memory_management_control_operation != 0 && slice_reader.Ok()); + } + } + } + if (pps_->entropy_coding_mode_flag && slice_type != H264::SliceType::kI && + slice_type != H264::SliceType::kSi) { + // cabac_init_idc: ue(v) + slice_reader.ReadExponentialGolomb(); + } + + int last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); + if (!slice_reader.Ok()) { + return kInvalidStream; + } + if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) { + // Something has gone wrong, and the parsed value is invalid. + RTC_LOG(LS_WARNING) << "Parsed QP value out of range."; + return kInvalidStream; + } + + last_slice_qp_delta_ = last_slice_qp_delta; + return kOk; +} + +void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { + H264::NaluType nalu_type = H264::ParseNaluType(slice[0]); + switch (nalu_type) { + case H264::NaluType::kSps: { + sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize, + length - H264::kNaluTypeSize); + if (!sps_) + RTC_DLOG(LS_WARNING) << "Unable to parse SPS from H264 bitstream."; + break; + } + case H264::NaluType::kPps: { + pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize, + length - H264::kNaluTypeSize); + if (!pps_) + RTC_DLOG(LS_WARNING) << "Unable to parse PPS from H264 bitstream."; + break; + } + case H264::NaluType::kAud: + case H264::NaluType::kSei: + case H264::NaluType::kPrefix: + break; // Ignore these nalus, as we don't care about their contents. + default: + Result res = ParseNonParameterSetNalu(slice, length, nalu_type); + if (res != kOk) + RTC_DLOG(LS_INFO) << "Failed to parse bitstream. Error: " << res; + break; + } +} + +void H264BitstreamParser::ParseBitstream( + rtc::ArrayView<const uint8_t> bitstream) { + std::vector<H264::NaluIndex> nalu_indices = + H264::FindNaluIndices(bitstream.data(), bitstream.size()); + for (const H264::NaluIndex& index : nalu_indices) + ParseSlice(bitstream.data() + index.payload_start_offset, + index.payload_size); +} + +absl::optional<int> H264BitstreamParser::GetLastSliceQp() const { + if (!last_slice_qp_delta_ || !pps_) + return absl::nullopt; + const int qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_; + if (qp < kMinQpValue || qp > kMaxQpValue) { + RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; + return absl::nullopt; + } + return qp; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.h b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.h new file mode 100644 index 0000000000..05427825ac --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_H264_BITSTREAM_PARSER_H_ +#define COMMON_VIDEO_H264_H264_BITSTREAM_PARSER_H_ +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "api/video_codecs/bitstream_parser.h" +#include "common_video/h264/pps_parser.h" +#include "common_video/h264/sps_parser.h" + +namespace webrtc { + +// Stateful H264 bitstream parser (due to SPS/PPS). Used to parse out QP values +// from the bitstream. +// TODO(pbos): Unify with RTP SPS parsing and only use one H264 parser. +// TODO(pbos): If/when this gets used on the receiver side CHECKs must be +// removed and gracefully abort as we have no control over receive-side +// bitstreams. +class H264BitstreamParser : public BitstreamParser { + public: + H264BitstreamParser(); + ~H264BitstreamParser() override; + + void ParseBitstream(rtc::ArrayView<const uint8_t> bitstream) override; + absl::optional<int> GetLastSliceQp() const override; + + protected: + enum Result { + kOk, + kInvalidStream, + kUnsupportedStream, + }; + void ParseSlice(const uint8_t* slice, size_t length); + Result ParseNonParameterSetNalu(const uint8_t* source, + size_t source_length, + uint8_t nalu_type); + + // SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices. + absl::optional<SpsParser::SpsState> sps_; + absl::optional<PpsParser::PpsState> pps_; + + // Last parsed slice QP. + absl::optional<int32_t> last_slice_qp_delta_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_H264_BITSTREAM_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h264/h264_bitstream_parser_unittest.cc b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser_unittest.cc new file mode 100644 index 0000000000..3f4f202af2 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser_unittest.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/h264_bitstream_parser.h" + +#include "test/gtest.h" + +namespace webrtc { + +// SPS/PPS part of below chunk. +uint8_t kH264SpsPps[] = {0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, + 0x01, 0x40, 0x16, 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, + 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, 0xe2}; + +// Contains enough of the image slice to contain slice QP. +uint8_t kH264BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, 0x01, 0x40, 0x16, + 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, + 0xe2, 0x00, 0x00, 0x00, 0x01, 0x65, 0xb8, 0x40, 0xf0, 0x8c, 0x03, 0xf2, + 0x75, 0x67, 0xad, 0x41, 0x64, 0x24, 0x0e, 0xa0, 0xb2, 0x12, 0x1e, 0xf8, +}; + +uint8_t kH264BitstreamChunkCabac[] = { + 0x00, 0x00, 0x00, 0x01, 0x27, 0x64, 0x00, 0x0d, 0xac, 0x52, 0x30, + 0x50, 0x7e, 0xc0, 0x5a, 0x81, 0x01, 0x01, 0x18, 0x56, 0xbd, 0xef, + 0x80, 0x80, 0x00, 0x00, 0x00, 0x01, 0x28, 0xfe, 0x09, 0x8b, +}; + +// Contains enough of the image slice to contain slice QP. +uint8_t kH264BitstreamNextImageSliceChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x41, 0xe2, 0x01, 0x16, 0x0e, 0x3e, 0x2b, 0x86, +}; + +// Contains enough of the image slice to contain slice QP. +uint8_t kH264BitstreamNextImageSliceChunkCabac[] = { + 0x00, 0x00, 0x00, 0x01, 0x21, 0xe1, 0x05, 0x11, 0x3f, 0x9a, 0xae, 0x46, + 0x70, 0xbf, 0xc1, 0x4a, 0x16, 0x8f, 0x51, 0xf4, 0xca, 0xfb, 0xa3, 0x65, +}; + +TEST(H264BitstreamParserTest, ReportsNoQpWithoutParsedSlices) { + H264BitstreamParser h264_parser; + EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); +} + +TEST(H264BitstreamParserTest, ReportsNoQpWithOnlyParsedPpsAndSpsSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264SpsPps); + EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); +} + +TEST(H264BitstreamParserTest, ReportsLastSliceQpForImageSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264BitstreamChunk); + absl::optional<int> qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(35, *qp); + + // Parse an additional image slice. + h264_parser.ParseBitstream(kH264BitstreamNextImageSliceChunk); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(37, *qp); +} + +TEST(H264BitstreamParserTest, ReportsLastSliceQpForCABACImageSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264BitstreamChunkCabac); + EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); + + // Parse an additional image slice. + h264_parser.ParseBitstream(kH264BitstreamNextImageSliceChunkCabac); + absl::optional<int> qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(24, *qp); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/h264_common.cc b/third_party/libwebrtc/common_video/h264/h264_common.cc new file mode 100644 index 0000000000..06d94e0305 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_common.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/h264_common.h" + +#include <cstdint> + +namespace webrtc { +namespace H264 { + +const uint8_t kNaluTypeMask = 0x1F; + +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + // This is sorta like Boyer-Moore, but with only the first optimization step: + // given a 3-byte sequence we're looking at, if the 3rd byte isn't 1 or 0, + // skip ahead to the next 3-byte sequence. 0s and 1s are relatively rare, so + // this will skip the majority of reads/checks. + std::vector<NaluIndex> sequences; + if (buffer_size < kNaluShortStartSequenceSize) + return sequences; + + static_assert(kNaluShortStartSequenceSize >= 2, + "kNaluShortStartSequenceSize must be larger or equals to 2"); + const size_t end = buffer_size - kNaluShortStartSequenceSize; + for (size_t i = 0; i < end;) { + if (buffer[i + 2] > 1) { + i += 3; + } else if (buffer[i + 2] == 1) { + if (buffer[i + 1] == 0 && buffer[i] == 0) { + // We found a start sequence, now check if it was a 3 of 4 byte one. + NaluIndex index = {i, i + 3, 0}; + if (index.start_offset > 0 && buffer[index.start_offset - 1] == 0) + --index.start_offset; + + // Update length of previous entry. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = index.start_offset - it->payload_start_offset; + + sequences.push_back(index); + } + + i += 3; + } else { + ++i; + } + } + + // Update length of last entry, if any. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = buffer_size - it->payload_start_offset; + + return sequences; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast<NaluType>(data & kNaluTypeMask); +} + +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length) { + std::vector<uint8_t> out; + out.reserve(length); + + for (size_t i = 0; i < length;) { + // Be careful about over/underflow here. byte_length_ - 3 can underflow, and + // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ + // above, and that expression will produce the number of bytes left in + // the stream including the byte at i. + if (length - i >= 3 && !data[i] && !data[i + 1] && data[i + 2] == 3) { + // Two rbsp bytes. + out.push_back(data[i++]); + out.push_back(data[i++]); + // Skip the emulation byte. + i++; + } else { + // Single rbsp byte. + out.push_back(data[i++]); + } + } + return out; +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + static const uint8_t kZerosInStartSequence = 2; + static const uint8_t kEmulationByte = 0x03u; + size_t num_consecutive_zeros = 0; + destination->EnsureCapacity(destination->size() + length); + + for (size_t i = 0; i < length; ++i) { + uint8_t byte = bytes[i]; + if (byte <= kEmulationByte && + num_consecutive_zeros >= kZerosInStartSequence) { + // Need to escape. + destination->AppendData(kEmulationByte); + num_consecutive_zeros = 0; + } + destination->AppendData(byte); + if (byte == 0) { + ++num_consecutive_zeros; + } else { + num_consecutive_zeros = 0; + } + } +} + +} // namespace H264 +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/h264_common.h b/third_party/libwebrtc/common_video/h264/h264_common.h new file mode 100644 index 0000000000..0b1843ee38 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_common.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_H264_COMMON_H_ +#define COMMON_VIDEO_H264_H264_COMMON_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "rtc_base/buffer.h" + +namespace webrtc { + +namespace H264 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +const size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +const size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU type byte (1). +const size_t kNaluTypeSize = 1; + +enum NaluType : uint8_t { + kSlice = 1, + kIdr = 5, + kSei = 6, + kSps = 7, + kPps = 8, + kAud = 9, + kEndOfSequence = 10, + kEndOfStream = 11, + kFiller = 12, + kPrefix = 14, + kStapA = 24, + kFuA = 28 +}; + +enum SliceType : uint8_t { kP = 0, kB = 1, kI = 2, kSp = 3, kSi = 4 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset; + // Start index of NALU payload, typically type header. + size_t payload_start_offset; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.1 of the H264 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); +} // namespace H264 +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_H264_COMMON_H_ diff --git a/third_party/libwebrtc/common_video/h264/pps_parser.cc b/third_party/libwebrtc/common_video/h264/pps_parser.cc new file mode 100644 index 0000000000..2fc9749e8c --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/pps_parser.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/pps_parser.h" + +#include <cstdint> +#include <limits> +#include <vector> + +#include "absl/numeric/bits.h" +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +constexpr int kMaxPicInitQpDeltaValue = 25; +constexpr int kMinPicInitQpDeltaValue = -26; +} // namespace + +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +absl::optional<PpsParser::PpsState> PpsParser::ParsePps(const uint8_t* data, + size_t length) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.264 standard. + return ParseInternal(H264::ParseRbsp(data, length)); +} + +bool PpsParser::ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id) { + RTC_DCHECK(pps_id); + RTC_DCHECK(sps_id); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.264 standard. + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + *pps_id = reader.ReadExponentialGolomb(); + *sps_id = reader.ReadExponentialGolomb(); + return reader.Ok(); +} + +absl::optional<uint32_t> PpsParser::ParsePpsIdFromSlice(const uint8_t* data, + size_t length) { + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader slice_reader(unpacked_buffer); + + // first_mb_in_slice: ue(v) + slice_reader.ReadExponentialGolomb(); + // slice_type: ue(v) + slice_reader.ReadExponentialGolomb(); + // pic_parameter_set_id: ue(v) + uint32_t slice_pps_id = slice_reader.ReadExponentialGolomb(); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + return slice_pps_id; +} + +absl::optional<PpsParser::PpsState> PpsParser::ParseInternal( + rtc::ArrayView<const uint8_t> buffer) { + BitstreamReader reader(buffer); + PpsState pps; + pps.id = reader.ReadExponentialGolomb(); + pps.sps_id = reader.ReadExponentialGolomb(); + + // entropy_coding_mode_flag: u(1) + pps.entropy_coding_mode_flag = reader.Read<bool>(); + // bottom_field_pic_order_in_frame_present_flag: u(1) + pps.bottom_field_pic_order_in_frame_present_flag = reader.Read<bool>(); + + // num_slice_groups_minus1: ue(v) + uint32_t num_slice_groups_minus1 = reader.ReadExponentialGolomb(); + if (num_slice_groups_minus1 > 0) { + // slice_group_map_type: ue(v) + uint32_t slice_group_map_type = reader.ReadExponentialGolomb(); + if (slice_group_map_type == 0) { + for (uint32_t i_group = 0; + i_group <= num_slice_groups_minus1 && reader.Ok(); ++i_group) { + // run_length_minus1[iGroup]: ue(v) + reader.ReadExponentialGolomb(); + } + } else if (slice_group_map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + } else if (slice_group_map_type == 2) { + for (uint32_t i_group = 0; + i_group <= num_slice_groups_minus1 && reader.Ok(); ++i_group) { + // top_left[iGroup]: ue(v) + reader.ReadExponentialGolomb(); + // bottom_right[iGroup]: ue(v) + reader.ReadExponentialGolomb(); + } + } else if (slice_group_map_type == 3 || slice_group_map_type == 4 || + slice_group_map_type == 5) { + // slice_group_change_direction_flag: u(1) + reader.ConsumeBits(1); + // slice_group_change_rate_minus1: ue(v) + reader.ReadExponentialGolomb(); + } else if (slice_group_map_type == 6) { + // pic_size_in_map_units_minus1: ue(v) + uint32_t pic_size_in_map_units = reader.ReadExponentialGolomb() + 1; + int slice_group_id_bits = 1 + absl::bit_width(num_slice_groups_minus1); + + // slice_group_id: array of size pic_size_in_map_units, each element + // is represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + int64_t bits_to_consume = + int64_t{slice_group_id_bits} * pic_size_in_map_units; + if (!reader.Ok() || bits_to_consume > std::numeric_limits<int>::max()) { + return absl::nullopt; + } + reader.ConsumeBits(bits_to_consume); + } + } + // num_ref_idx_l0_default_active_minus1: ue(v) + reader.ReadExponentialGolomb(); + // num_ref_idx_l1_default_active_minus1: ue(v) + reader.ReadExponentialGolomb(); + // weighted_pred_flag: u(1) + pps.weighted_pred_flag = reader.Read<bool>(); + // weighted_bipred_idc: u(2) + pps.weighted_bipred_idc = reader.ReadBits(2); + + // pic_init_qp_minus26: se(v) + pps.pic_init_qp_minus26 = reader.ReadSignedExponentialGolomb(); + // Sanity-check parsed value + if (!reader.Ok() || pps.pic_init_qp_minus26 > kMaxPicInitQpDeltaValue || + pps.pic_init_qp_minus26 < kMinPicInitQpDeltaValue) { + return absl::nullopt; + } + // pic_init_qs_minus26: se(v) + reader.ReadExponentialGolomb(); + // chroma_qp_index_offset: se(v) + reader.ReadExponentialGolomb(); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + reader.ConsumeBits(2); + // redundant_pic_cnt_present_flag: u(1) + pps.redundant_pic_cnt_present_flag = reader.ReadBit(); + if (!reader.Ok()) { + return absl::nullopt; + } + + return pps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/pps_parser.h b/third_party/libwebrtc/common_video/h264/pps_parser.h new file mode 100644 index 0000000000..52717dcc26 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/pps_parser.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_PPS_PARSER_H_ +#define COMMON_VIDEO_H264_PPS_PARSER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H264 NALU. +class PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool bottom_field_pic_order_in_frame_present_flag = false; + bool weighted_pred_flag = false; + bool entropy_coding_mode_flag = false; + uint32_t weighted_bipred_idc = false; + uint32_t redundant_pic_cnt_present_flag = 0; + int pic_init_qp_minus26 = 0; + uint32_t id = 0; + uint32_t sps_id = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static absl::optional<PpsState> ParsePps(const uint8_t* data, size_t length); + + static bool ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id); + + static absl::optional<uint32_t> ParsePpsIdFromSlice(const uint8_t* data, + size_t length); + + protected: + // Parse the PPS state, for a buffer where RBSP decoding has already been + // performed. + static absl::optional<PpsState> ParseInternal( + rtc::ArrayView<const uint8_t> buffer); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_PPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h264/pps_parser_unittest.cc b/third_party/libwebrtc/common_video/h264/pps_parser_unittest.cc new file mode 100644 index 0000000000..4fe742d2e6 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/pps_parser_unittest.cc @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/pps_parser.h" + +#include <vector> + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +// Contains enough of the image slice to contain slice QP. +const uint8_t kH264BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, 0x01, 0x40, 0x16, + 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, + 0xe2, 0x00, 0x00, 0x00, 0x01, 0x65, 0xb8, 0x40, 0xf0, 0x8c, 0x03, 0xf2, + 0x75, 0x67, 0xad, 0x41, 0x64, 0x24, 0x0e, 0xa0, 0xb2, 0x12, 0x1e, 0xf8, +}; +const size_t kPpsBufferMaxSize = 256; +const uint32_t kIgnored = 0; +} // namespace + +void WritePps(const PpsParser::PpsState& pps, + int slice_group_map_type, + int num_slice_groups, + int pic_size_in_map_units, + rtc::Buffer* out_buffer) { + uint8_t data[kPpsBufferMaxSize] = {0}; + rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize); + + // pic_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.id); + // seq_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.sps_id); + // entropy_coding_mode_flag: u(1) + bit_buffer.WriteBits(pps.entropy_coding_mode_flag, 1); + // bottom_field_pic_order_in_frame_present_flag: u(1) + bit_buffer.WriteBits(pps.bottom_field_pic_order_in_frame_present_flag ? 1 : 0, + 1); + // num_slice_groups_minus1: ue(v) + RTC_CHECK_GT(num_slice_groups, 0); + bit_buffer.WriteExponentialGolomb(num_slice_groups - 1); + + if (num_slice_groups > 1) { + // slice_group_map_type: ue(v) + bit_buffer.WriteExponentialGolomb(slice_group_map_type); + switch (slice_group_map_type) { + case 0: + for (int i = 0; i < num_slice_groups; ++i) { + // run_length_minus1[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + break; + case 2: + for (int i = 0; i < num_slice_groups; ++i) { + // top_left[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // bottom_right[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + break; + case 3: + case 4: + case 5: + // slice_group_change_direction_flag: u(1) + bit_buffer.WriteBits(kIgnored, 1); + // slice_group_change_rate_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + break; + case 6: { + bit_buffer.WriteExponentialGolomb(pic_size_in_map_units - 1); + + uint32_t slice_group_id_bits = 0; + // If num_slice_groups is not a power of two an additional bit is + // required + // to account for the ceil() of log2() below. + if ((num_slice_groups & (num_slice_groups - 1)) != 0) + ++slice_group_id_bits; + while (num_slice_groups > 0) { + num_slice_groups >>= 1; + ++slice_group_id_bits; + } + + for (int i = 0; i < pic_size_in_map_units; ++i) { + // slice_group_id[i]: u(v) + // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + bit_buffer.WriteBits(kIgnored, slice_group_id_bits); + } + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + } + + // num_ref_idx_l0_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // num_ref_idx_l1_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // weighted_pred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_pred_flag ? 1 : 0, 1); + // weighted_bipred_idc: u(2) + bit_buffer.WriteBits(pps.weighted_bipred_idc, 2); + + // pic_init_qp_minus26: se(v) + bit_buffer.WriteSignedExponentialGolomb(pps.pic_init_qp_minus26); + // pic_init_qs_minus26: se(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // chroma_qp_index_offset: se(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + bit_buffer.WriteBits(kIgnored, 2); + // redundant_pic_cnt_present_flag: u(1) + bit_buffer.WriteBits(pps.redundant_pic_cnt_present_flag, 1); + + size_t byte_offset; + size_t bit_offset; + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + bit_buffer.WriteBits(0, 8 - bit_offset); + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + } + + H264::WriteRbsp(data, byte_offset, out_buffer); +} + +class PpsParserTest : public ::testing::Test { + public: + PpsParserTest() {} + ~PpsParserTest() override {} + + void RunTest() { + VerifyParsing(generated_pps_, 0, 1, 0); + const int kMaxSliceGroups = 17; // Arbitrarily large. + const int kMaxMapType = 6; + int slice_group_bits = 0; + for (int slice_group = 2; slice_group < kMaxSliceGroups; ++slice_group) { + if ((slice_group & (slice_group - 1)) == 0) { + // Slice group at a new power of two - increase slice_group_bits. + ++slice_group_bits; + } + for (int map_type = 0; map_type <= kMaxMapType; ++map_type) { + if (map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + continue; + } else if (map_type == 6) { + int max_pic_size = 1 << slice_group_bits; + for (int pic_size = 1; pic_size < max_pic_size; ++pic_size) + VerifyParsing(generated_pps_, map_type, slice_group, pic_size); + } else { + VerifyParsing(generated_pps_, map_type, slice_group, 0); + } + } + } + } + + void VerifyParsing(const PpsParser::PpsState& pps, + int slice_group_map_type, + int num_slice_groups, + int pic_size_in_map_units) { + buffer_.Clear(); + WritePps(pps, slice_group_map_type, num_slice_groups, pic_size_in_map_units, + &buffer_); + parsed_pps_ = PpsParser::ParsePps(buffer_.data(), buffer_.size()); + ASSERT_TRUE(parsed_pps_); + EXPECT_EQ(pps.bottom_field_pic_order_in_frame_present_flag, + parsed_pps_->bottom_field_pic_order_in_frame_present_flag); + EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag); + EXPECT_EQ(pps.weighted_bipred_idc, parsed_pps_->weighted_bipred_idc); + EXPECT_EQ(pps.entropy_coding_mode_flag, + parsed_pps_->entropy_coding_mode_flag); + EXPECT_EQ(pps.redundant_pic_cnt_present_flag, + parsed_pps_->redundant_pic_cnt_present_flag); + EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26); + EXPECT_EQ(pps.id, parsed_pps_->id); + EXPECT_EQ(pps.sps_id, parsed_pps_->sps_id); + } + + PpsParser::PpsState generated_pps_; + rtc::Buffer buffer_; + absl::optional<PpsParser::PpsState> parsed_pps_; +}; + +TEST_F(PpsParserTest, ZeroPps) { + RunTest(); +} + +TEST_F(PpsParserTest, MaxPps) { + generated_pps_.bottom_field_pic_order_in_frame_present_flag = true; + generated_pps_.pic_init_qp_minus26 = 25; + generated_pps_.redundant_pic_cnt_present_flag = 1; // 1 bit value. + generated_pps_.weighted_bipred_idc = (1 << 2) - 1; // 2 bit value. + generated_pps_.weighted_pred_flag = true; + generated_pps_.entropy_coding_mode_flag = true; + generated_pps_.id = 2; + generated_pps_.sps_id = 1; + RunTest(); + + generated_pps_.pic_init_qp_minus26 = -25; + RunTest(); +} + +TEST_F(PpsParserTest, PpsIdFromSlice) { + std::vector<H264::NaluIndex> nalu_indices = + H264::FindNaluIndices(kH264BitstreamChunk, sizeof(kH264BitstreamChunk)); + EXPECT_EQ(nalu_indices.size(), 3ull); + for (const auto& index : nalu_indices) { + H264::NaluType nalu_type = + H264::ParseNaluType(kH264BitstreamChunk[index.payload_start_offset]); + if (nalu_type == H264::NaluType::kIdr) { + absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice( + kH264BitstreamChunk + index.payload_start_offset, index.payload_size); + EXPECT_EQ(pps_id, 0u); + break; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_parser.cc b/third_party/libwebrtc/common_video/h264/sps_parser.cc new file mode 100644 index 0000000000..e14334249c --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_parser.h" + +#include <cstdint> +#include <vector> + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" + +namespace { +constexpr int kScalingDeltaMin = -128; +constexpr int kScaldingDeltaMax = 127; +} // namespace + +namespace webrtc { + +SpsParser::SpsState::SpsState() = default; +SpsParser::SpsState::SpsState(const SpsState&) = default; +SpsParser::SpsState::~SpsState() = default; + +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data, + size_t length) { + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + return ParseSpsUpToVui(reader); +} + +absl::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui( + BitstreamReader& reader) { + // Now, we need to use a bitstream reader to parse through the actual AVC SPS + // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the + // H.264 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + + SpsState sps; + + // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is + // 0. It defaults to 1, when not specified. + uint32_t chroma_format_idc = 1; + + // profile_idc: u(8). We need it to determine if we need to read/skip chroma + // formats. + uint8_t profile_idc = reader.Read<uint8_t>(); + // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits + // 1 bit each for the flags + 2 bits + 8 bits for level_idc = 16 bits. + reader.ConsumeBits(16); + // seq_parameter_set_id: ue(v) + sps.id = reader.ReadExponentialGolomb(); + sps.separate_colour_plane_flag = 0; + // See if profile_idc has chroma format information. + if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || + profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || + profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || + profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { + // chroma_format_idc: ue(v) + chroma_format_idc = reader.ReadExponentialGolomb(); + if (chroma_format_idc == 3) { + // separate_colour_plane_flag: u(1) + sps.separate_colour_plane_flag = reader.ReadBit(); + } + // bit_depth_luma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // bit_depth_chroma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // qpprime_y_zero_transform_bypass_flag: u(1) + reader.ConsumeBits(1); + // seq_scaling_matrix_present_flag: u(1) + if (reader.Read<bool>()) { + // Process the scaling lists just enough to be able to properly + // skip over them, so we can still read the resolution on streams + // where this is included. + int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8); + for (int i = 0; i < scaling_list_count; ++i) { + // seq_scaling_list_present_flag[i] : u(1) + if (reader.Read<bool>()) { + int last_scale = 8; + int next_scale = 8; + int size_of_scaling_list = i < 6 ? 16 : 64; + for (int j = 0; j < size_of_scaling_list; j++) { + if (next_scale != 0) { + // delta_scale: se(v) + int delta_scale = reader.ReadSignedExponentialGolomb(); + if (!reader.Ok() || delta_scale < kScalingDeltaMin || + delta_scale > kScaldingDeltaMax) { + return absl::nullopt; + } + next_scale = (last_scale + delta_scale + 256) % 256; + } + if (next_scale != 0) + last_scale = next_scale; + } + } + } + } + } + // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with + // BitstreamReader::ReadBits, which can read at most 64 bits at a time. We + // also have to avoid overflow when adding 4 to the on-wire golomb value, + // e.g., for evil input data, ReadExponentialGolomb might return 0xfffc. + const uint32_t kMaxLog2Minus4 = 12; + + // log2_max_frame_num_minus4: ue(v) + uint32_t log2_max_frame_num_minus4 = reader.ReadExponentialGolomb(); + if (!reader.Ok() || log2_max_frame_num_minus4 > kMaxLog2Minus4) { + return absl::nullopt; + } + sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4; + + // pic_order_cnt_type: ue(v) + sps.pic_order_cnt_type = reader.ReadExponentialGolomb(); + if (sps.pic_order_cnt_type == 0) { + // log2_max_pic_order_cnt_lsb_minus4: ue(v) + uint32_t log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); + if (!reader.Ok() || log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) { + return absl::nullopt; + } + sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4; + } else if (sps.pic_order_cnt_type == 1) { + // delta_pic_order_always_zero_flag: u(1) + sps.delta_pic_order_always_zero_flag = reader.ReadBit(); + // offset_for_non_ref_pic: se(v) + reader.ReadExponentialGolomb(); + // offset_for_top_to_bottom_field: se(v) + reader.ReadExponentialGolomb(); + // num_ref_frames_in_pic_order_cnt_cycle: ue(v) + uint32_t num_ref_frames_in_pic_order_cnt_cycle = + reader.ReadExponentialGolomb(); + for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { + // offset_for_ref_frame[i]: se(v) + reader.ReadExponentialGolomb(); + if (!reader.Ok()) { + return absl::nullopt; + } + } + } + // max_num_ref_frames: ue(v) + sps.max_num_ref_frames = reader.ReadExponentialGolomb(); + // gaps_in_frame_num_value_allowed_flag: u(1) + reader.ConsumeBits(1); + // + // IMPORTANT ONES! Now we're getting to resolution. First we read the pic + // width/height in macroblocks (16x16), which gives us the base resolution, + // and then we continue on until we hit the frame crop offsets, which are used + // to signify resolutions that aren't multiples of 16. + // + // pic_width_in_mbs_minus1: ue(v) + sps.width = 16 * (reader.ReadExponentialGolomb() + 1); + // pic_height_in_map_units_minus1: ue(v) + uint32_t pic_height_in_map_units_minus1 = reader.ReadExponentialGolomb(); + // frame_mbs_only_flag: u(1) + sps.frame_mbs_only_flag = reader.ReadBit(); + if (!sps.frame_mbs_only_flag) { + // mb_adaptive_frame_field_flag: u(1) + reader.ConsumeBits(1); + } + sps.height = + 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); + // direct_8x8_inference_flag: u(1) + reader.ConsumeBits(1); + // + // MORE IMPORTANT ONES! Now we're at the frame crop information. + // + uint32_t frame_crop_left_offset = 0; + uint32_t frame_crop_right_offset = 0; + uint32_t frame_crop_top_offset = 0; + uint32_t frame_crop_bottom_offset = 0; + // frame_cropping_flag: u(1) + if (reader.Read<bool>()) { + // frame_crop_{left, right, top, bottom}_offset: ue(v) + frame_crop_left_offset = reader.ReadExponentialGolomb(); + frame_crop_right_offset = reader.ReadExponentialGolomb(); + frame_crop_top_offset = reader.ReadExponentialGolomb(); + frame_crop_bottom_offset = reader.ReadExponentialGolomb(); + } + // vui_parameters_present_flag: u(1) + sps.vui_params_present = reader.ReadBit(); + + // Far enough! We don't use the rest of the SPS. + if (!reader.Ok()) { + return absl::nullopt; + } + + // Figure out the crop units in pixels. That's based on the chroma format's + // sampling, which is indicated by chroma_format_idc. + if (sps.separate_colour_plane_flag || chroma_format_idc == 0) { + frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag); + frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag); + } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) { + // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). + if (chroma_format_idc == 1 || chroma_format_idc == 2) { + frame_crop_left_offset *= 2; + frame_crop_right_offset *= 2; + } + // Height multipliers for format 1 (4:2:0). + if (chroma_format_idc == 1) { + frame_crop_top_offset *= 2; + frame_crop_bottom_offset *= 2; + } + } + // Subtract the crop for each dimension. + sps.width -= (frame_crop_left_offset + frame_crop_right_offset); + sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset); + + return sps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_parser.h b/third_party/libwebrtc/common_video/h264/sps_parser.h new file mode 100644 index 0000000000..da328b48b0 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_SPS_PARSER_H_ +#define COMMON_VIDEO_H264_SPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// A class for parsing out sequence parameter set (SPS) data from an H264 NALU. +class SpsParser { + public: + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState(); + SpsState(const SpsState&); + ~SpsState(); + + uint32_t width = 0; + uint32_t height = 0; + uint32_t delta_pic_order_always_zero_flag = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t frame_mbs_only_flag = 0; + uint32_t log2_max_frame_num = 4; // Smallest valid value. + uint32_t log2_max_pic_order_cnt_lsb = 4; // Smallest valid value. + uint32_t pic_order_cnt_type = 0; + uint32_t max_num_ref_frames = 0; + uint32_t vui_params_present = 0; + uint32_t id = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static absl::optional<SpsState> ParseSps(const uint8_t* data, size_t length); + + protected: + // Parse the SPS state, up till the VUI part, for a buffer where RBSP + // decoding has already been performed. + static absl::optional<SpsState> ParseSpsUpToVui(BitstreamReader& reader); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H264_SPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h264/sps_parser_unittest.cc b/third_party/libwebrtc/common_video/h264/sps_parser_unittest.cc new file mode 100644 index 0000000000..c9326e4b28 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser_unittest.cc @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_parser.h" + +#include "common_video/h264/h264_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +// Example SPS can be generated with ffmpeg. Here's an example set of commands, +// runnable on OS X: +// 1) Generate a video, from the camera: +// ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov +// +// 2) Scale the video to the desired size: +// ffmpeg -i camera.mov -vf scale=640x360 scaled.mov +// +// 3) Get just the H.264 bitstream in AnnexB: +// ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264 +// +// 4) Open out.h264 and find the SPS, generally everything between the first +// two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67, +// which should be stripped out before being passed to the parser. + +static const size_t kSpsBufferMaxSize = 256; + +// Generates a fake SPS with basically everything empty but the width/height. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +void GenerateFakeSps(uint16_t width, + uint16_t height, + int id, + uint32_t log2_max_frame_num_minus4, + uint32_t log2_max_pic_order_cnt_lsb_minus4, + rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // Profile byte. + writer.WriteUInt8(0); + // Constraint sets and reserved zero bits. + writer.WriteUInt8(0); + // level_idc. + writer.WriteUInt8(0x3u); + // seq_paramter_set_id. + writer.WriteExponentialGolomb(id); + // Profile is not special, so we skip all the chroma format settings. + + // Now some bit magic. + // log2_max_frame_num_minus4: ue(v). + writer.WriteExponentialGolomb(log2_max_frame_num_minus4); + // pic_order_cnt_type: ue(v). 0 is the type we want. + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(log2_max_pic_order_cnt_lsb_minus4); + // max_num_ref_frames: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + // gaps_in_frame_num_value_allowed_flag: u(1). + writer.WriteBits(0, 1); + // Next are width/height. First, calculate the mbs/map_units versions. + uint16_t width_in_mbs_minus1 = (width + 15) / 16 - 1; + + // For the height, we're going to define frame_mbs_only_flag, so we need to + // divide by 2. See the parser for the full calculation. + uint16_t height_in_map_units_minus1 = ((height + 15) / 16 - 1) / 2; + // Write each as ue(v). + writer.WriteExponentialGolomb(width_in_mbs_minus1); + writer.WriteExponentialGolomb(height_in_map_units_minus1); + // frame_mbs_only_flag: u(1). Needs to be false. + writer.WriteBits(0, 1); + // mb_adaptive_frame_field_flag: u(1). + writer.WriteBits(0, 1); + // direct_8x8_inferene_flag: u(1). + writer.WriteBits(0, 1); + // frame_cropping_flag: u(1). 1, so we can supply crop. + writer.WriteBits(1, 1); + // Now we write the left/right/top/bottom crop. For simplicity, we'll put all + // the crop at the left/top. + // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values. + // Left/right. + writer.WriteExponentialGolomb(((16 - (width % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + // Top/bottom. + writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + + // vui_parameters_present_flag: u(1) + writer.WriteBits(0, 1); + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + out_buffer->Clear(); + H264::WriteRbsp(rbsp, byte_count, out_buffer); +} + +TEST(H264SpsParserTest, TestSampleSPSHdLandscape) { + // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains + // emulation bytes but no cropping. + const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, + 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, + 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(1280u, sps->width); + EXPECT_EQ(720u, sps->height); +} + +TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) { + // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation + // bytes and cropping (360 isn't divisible by 16). + const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F, + 0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80, + 0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(640u, sps->width); + EXPECT_EQ(360u, sps->height); +} + +TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) { + // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and + // veritcal crop (neither dimension is divisible by 16). + const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E, + 0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00, + 0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(200u, sps->width); + EXPECT_EQ(400u, sps->height); +} + +TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); +} + +TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) { + rtc::Buffer buffer; + GenerateFakeSps(156u, 122u, 2, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(156u, sps->width); + EXPECT_EQ(122u, sps->height); + EXPECT_EQ(2u, sps->id); +} + +TEST(H264SpsParserTest, TestSampleSPSWithScalingLists) { + // SPS from a 1920x1080 video. Contains scaling lists (and vertical cropping). + const uint8_t buffer[] = {0x64, 0x00, 0x2a, 0xad, 0x84, 0x01, 0x0c, 0x20, + 0x08, 0x61, 0x00, 0x43, 0x08, 0x02, 0x18, 0x40, + 0x10, 0xc2, 0x00, 0x84, 0x3b, 0x50, 0x3c, 0x01, + 0x13, 0xf2, 0xcd, 0xc0, 0x40, 0x40, 0x50, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0xe8, 0x40}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(1920u, sps->width); + EXPECT_EQ(1080u, sps->height); +} + +TEST(H264SpsParserTest, TestLog2MaxFrameNumMinus4) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(4u, sps->log2_max_frame_num); + + GenerateFakeSps(320u, 180u, 1, 12, 0, &buffer); + sps = SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(16u, sps->log2_max_frame_num); + + GenerateFakeSps(320u, 180u, 1, 13, 0, &buffer); + EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size())); +} + +TEST(H264SpsParserTest, TestLog2MaxPicOrderCntMinus4) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(4u, sps->log2_max_pic_order_cnt_lsb); + + GenerateFakeSps(320u, 180u, 1, 0, 12, &buffer); + EXPECT_TRUE(static_cast<bool>( + sps = SpsParser::ParseSps(buffer.data(), buffer.size()))); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(16u, sps->log2_max_pic_order_cnt_lsb); + + GenerateFakeSps(320u, 180u, 1, 0, 13, &buffer); + EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc new file mode 100644 index 0000000000..117e92a1e5 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include "common_video/h264/sps_vui_rewriter.h" + +#include <string.h> + +#include <algorithm> +#include <cstdint> +#include <vector> + +#include "api/video/color_space.h" +#include "common_video/h264/h264_common.h" +#include "common_video/h264/sps_parser.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// The maximum expected growth from adding a VUI to the SPS. It's actually +// closer to 24 or so, but better safe than sorry. +const size_t kMaxVuiSpsIncrease = 64; + +const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid"; +enum SpsValidEvent { + kReceivedSpsVuiOk = 1, + kReceivedSpsRewritten = 2, + kReceivedSpsParseFailure = 3, + kSentSpsPocOk = 4, + kSentSpsVuiOk = 5, + kSentSpsRewritten = 6, + kSentSpsParseFailure = 7, + kSpsRewrittenMax = 8 +}; + +#define RETURN_FALSE_ON_FAIL(x) \ + do { \ + if (!(x)) { \ + RTC_LOG_F(LS_ERROR) << " (line:" << __LINE__ << ") FAILED: " #x; \ + return false; \ + } \ + } while (0) + +uint8_t CopyUInt8(BitstreamReader& source, rtc::BitBufferWriter& destination) { + uint8_t tmp = source.Read<uint8_t>(); + if (!destination.WriteUInt8(tmp)) { + source.Invalidate(); + } + return tmp; +} + +uint32_t CopyExpGolomb(BitstreamReader& source, + rtc::BitBufferWriter& destination) { + uint32_t tmp = source.ReadExponentialGolomb(); + if (!destination.WriteExponentialGolomb(tmp)) { + source.Invalidate(); + } + return tmp; +} + +uint32_t CopyBits(int bits, + BitstreamReader& source, + rtc::BitBufferWriter& destination) { + RTC_DCHECK_GT(bits, 0); + RTC_DCHECK_LE(bits, 32); + uint64_t tmp = source.ReadBits(bits); + if (!destination.WriteBits(tmp, bits)) { + source.Invalidate(); + } + return tmp; +} + +bool CopyAndRewriteVui(const SpsParser::SpsState& sps, + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const webrtc::ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten); + +void CopyHrdParameters(BitstreamReader& source, + rtc::BitBufferWriter& destination); +bool AddBitstreamRestriction(rtc::BitBufferWriter* destination, + uint32_t max_num_ref_frames); +bool IsDefaultColorSpace(const ColorSpace& color_space); +bool AddVideoSignalTypeInfo(rtc::BitBufferWriter& destination, + const ColorSpace& color_space); +bool CopyOrRewriteVideoSignalTypeInfo( + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten); +bool CopyRemainingBits(BitstreamReader& source, + rtc::BitBufferWriter& destination); +} // namespace + +void SpsVuiRewriter::UpdateStats(ParseResult result, Direction direction) { + switch (result) { + case SpsVuiRewriter::ParseResult::kVuiRewritten: + RTC_HISTOGRAM_ENUMERATION( + kSpsValidHistogramName, + direction == SpsVuiRewriter::Direction::kIncoming + ? SpsValidEvent::kReceivedSpsRewritten + : SpsValidEvent::kSentSpsRewritten, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kVuiOk: + RTC_HISTOGRAM_ENUMERATION( + kSpsValidHistogramName, + direction == SpsVuiRewriter::Direction::kIncoming + ? SpsValidEvent::kReceivedSpsVuiOk + : SpsValidEvent::kSentSpsVuiOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kFailure: + RTC_HISTOGRAM_ENUMERATION( + kSpsValidHistogramName, + direction == SpsVuiRewriter::Direction::kIncoming + ? SpsValidEvent::kReceivedSpsParseFailure + : SpsValidEvent::kSentSpsParseFailure, + SpsValidEvent::kSpsRewrittenMax); + break; + } +} + +SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const webrtc::ColorSpace* color_space, + rtc::Buffer* destination) { + // Create temporary RBSP decoded buffer of the payload (exlcuding the + // leading nalu type header byte (the SpsParser uses only the payload). + std::vector<uint8_t> rbsp_buffer = H264::ParseRbsp(buffer, length); + BitstreamReader source_buffer(rbsp_buffer); + absl::optional<SpsParser::SpsState> sps_state = + SpsParser::ParseSpsUpToVui(source_buffer); + if (!sps_state) + return ParseResult::kFailure; + + *sps = sps_state; + + // We're going to completely muck up alignment, so we need a BitBufferWriter + // to write with. + rtc::Buffer out_buffer(length + kMaxVuiSpsIncrease); + rtc::BitBufferWriter sps_writer(out_buffer.data(), out_buffer.size()); + + // Check how far the SpsParser has read, and copy that data in bulk. + RTC_DCHECK(source_buffer.Ok()); + size_t total_bit_offset = + rbsp_buffer.size() * 8 - source_buffer.RemainingBitCount(); + size_t byte_offset = total_bit_offset / 8; + size_t bit_offset = total_bit_offset % 8; + memcpy(out_buffer.data(), rbsp_buffer.data(), + byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits. + + // SpsParser will have read the vui_params_present flag, which we want to + // modify, so back off a bit; + if (bit_offset == 0) { + --byte_offset; + bit_offset = 7; + } else { + --bit_offset; + } + sps_writer.Seek(byte_offset, bit_offset); + + ParseResult vui_updated; + if (!CopyAndRewriteVui(*sps_state, source_buffer, sps_writer, color_space, + vui_updated)) { + RTC_LOG(LS_ERROR) << "Failed to parse/copy SPS VUI."; + return ParseResult::kFailure; + } + + if (vui_updated == ParseResult::kVuiOk) { + // No update necessary after all, just return. + return vui_updated; + } + + if (!CopyRemainingBits(source_buffer, sps_writer)) { + RTC_LOG(LS_ERROR) << "Failed to parse/copy SPS VUI."; + return ParseResult::kFailure; + } + + // Pad up to next byte with zero bits. + sps_writer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + sps_writer.WriteBits(0, 8 - bit_offset); + ++byte_offset; + bit_offset = 0; + } + + RTC_DCHECK(byte_offset <= length + kMaxVuiSpsIncrease); + RTC_CHECK(destination != nullptr); + + out_buffer.SetSize(byte_offset); + + // Write updates SPS to destination with added RBSP + H264::WriteRbsp(out_buffer.data(), out_buffer.size(), destination); + + return ParseResult::kVuiRewritten; +} + +SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const webrtc::ColorSpace* color_space, + rtc::Buffer* destination, + Direction direction) { + ParseResult result = + ParseAndRewriteSps(buffer, length, sps, color_space, destination); + UpdateStats(result, direction); + return result; +} + +rtc::Buffer SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite( + rtc::ArrayView<const uint8_t> buffer, + const webrtc::ColorSpace* color_space) { + std::vector<H264::NaluIndex> nalus = + H264::FindNaluIndices(buffer.data(), buffer.size()); + + // Allocate some extra space for potentially adding a missing VUI. + rtc::Buffer output_buffer(/*size=*/0, /*capacity=*/buffer.size() + + nalus.size() * kMaxVuiSpsIncrease); + + for (const H264::NaluIndex& nalu : nalus) { + // Copy NAL unit start code. + const uint8_t* start_code_ptr = buffer.data() + nalu.start_offset; + const size_t start_code_length = + nalu.payload_start_offset - nalu.start_offset; + const uint8_t* nalu_ptr = buffer.data() + nalu.payload_start_offset; + const size_t nalu_length = nalu.payload_size; + + if (H264::ParseNaluType(nalu_ptr[0]) == H264::NaluType::kSps) { + // Check if stream uses picture order count type 0, and if so rewrite it + // to enable faster decoding. Streams in that format incur additional + // delay because it allows decode order to differ from render order. + // The mechanism used is to rewrite (edit or add) the SPS's VUI to contain + // restrictions on the maximum number of reordered pictures. This reduces + // latency significantly, though it still adds about a frame of latency to + // decoding. + // Note that we do this rewriting both here (send side, in order to + // protect legacy receive clients) in RtpDepacketizerH264::ParseSingleNalu + // (receive side, in orderer to protect us from unknown or legacy send + // clients). + absl::optional<SpsParser::SpsState> sps; + rtc::Buffer output_nalu; + + // Add the type header to the output buffer first, so that the rewriter + // can append modified payload on top of that. + output_nalu.AppendData(nalu_ptr[0]); + + ParseResult result = ParseAndRewriteSps( + nalu_ptr + H264::kNaluTypeSize, nalu_length - H264::kNaluTypeSize, + &sps, color_space, &output_nalu, Direction::kOutgoing); + if (result == ParseResult::kVuiRewritten) { + output_buffer.AppendData(start_code_ptr, start_code_length); + output_buffer.AppendData(output_nalu.data(), output_nalu.size()); + continue; + } + } else if (H264::ParseNaluType(nalu_ptr[0]) == H264::NaluType::kAud) { + // Skip the access unit delimiter copy. + continue; + } + + // vui wasn't rewritten and it is not aud, copy the nal unit as is. + output_buffer.AppendData(start_code_ptr, start_code_length); + output_buffer.AppendData(nalu_ptr, nalu_length); + } + return output_buffer; +} + +namespace { +bool CopyAndRewriteVui(const SpsParser::SpsState& sps, + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const webrtc::ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten) { + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiOk; + + // + // vui_parameters_present_flag: u(1) + // + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + + // ********* IMPORTANT! ********** + // Now we're at the VUI, so we want to (1) add it if it isn't present, and + // (2) rewrite frame reordering values so no reordering is allowed. + if (!sps.vui_params_present) { + // Write a simple VUI with the parameters we want and 0 for all other flags. + + // aspect_ratio_info_present_flag, overscan_info_present_flag. Both u(1). + RETURN_FALSE_ON_FAIL(destination.WriteBits(0, 2)); + + uint32_t video_signal_type_present_flag = + (color_space && !IsDefaultColorSpace(*color_space)) ? 1 : 0; + RETURN_FALSE_ON_FAIL( + destination.WriteBits(video_signal_type_present_flag, 1)); + if (video_signal_type_present_flag) { + RETURN_FALSE_ON_FAIL(AddVideoSignalTypeInfo(destination, *color_space)); + } + // chroma_loc_info_present_flag, timing_info_present_flag, + // nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag, + // pic_struct_present_flag, All u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits(0, 5)); + // bitstream_restriction_flag: u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + RETURN_FALSE_ON_FAIL( + AddBitstreamRestriction(&destination, sps.max_num_ref_frames)); + + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } else { + // Parse out the full VUI. + // aspect_ratio_info_present_flag: u(1) + uint32_t aspect_ratio_info_present_flag = CopyBits(1, source, destination); + if (aspect_ratio_info_present_flag) { + // aspect_ratio_idc: u(8) + uint8_t aspect_ratio_idc = CopyUInt8(source, destination); + if (aspect_ratio_idc == 255u) { // Extended_SAR + // sar_width/sar_height: u(16) each. + CopyBits(32, source, destination); + } + } + // overscan_info_present_flag: u(1) + uint32_t overscan_info_present_flag = CopyBits(1, source, destination); + if (overscan_info_present_flag) { + // overscan_appropriate_flag: u(1) + CopyBits(1, source, destination); + } + + CopyOrRewriteVideoSignalTypeInfo(source, destination, color_space, + out_vui_rewritten); + + // chroma_loc_info_present_flag: u(1) + uint32_t chroma_loc_info_present_flag = CopyBits(1, source, destination); + if (chroma_loc_info_present_flag == 1) { + // chroma_sample_loc_type_(top|bottom)_field: ue(v) each. + CopyExpGolomb(source, destination); + CopyExpGolomb(source, destination); + } + // timing_info_present_flag: u(1) + uint32_t timing_info_present_flag = CopyBits(1, source, destination); + if (timing_info_present_flag == 1) { + // num_units_in_tick, time_scale: u(32) each + CopyBits(32, source, destination); + CopyBits(32, source, destination); + // fixed_frame_rate_flag: u(1) + CopyBits(1, source, destination); + } + // nal_hrd_parameters_present_flag: u(1) + uint32_t nal_hrd_parameters_present_flag = CopyBits(1, source, destination); + if (nal_hrd_parameters_present_flag == 1) { + CopyHrdParameters(source, destination); + } + // vcl_hrd_parameters_present_flag: u(1) + uint32_t vcl_hrd_parameters_present_flag = CopyBits(1, source, destination); + if (vcl_hrd_parameters_present_flag == 1) { + CopyHrdParameters(source, destination); + } + if (nal_hrd_parameters_present_flag == 1 || + vcl_hrd_parameters_present_flag == 1) { + // low_delay_hrd_flag: u(1) + CopyBits(1, source, destination); + } + // pic_struct_present_flag: u(1) + CopyBits(1, source, destination); + + // bitstream_restriction_flag: u(1) + uint32_t bitstream_restriction_flag = source.ReadBit(); + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + if (bitstream_restriction_flag == 0) { + // We're adding one from scratch. + RETURN_FALSE_ON_FAIL( + AddBitstreamRestriction(&destination, sps.max_num_ref_frames)); + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } else { + // We're replacing. + // motion_vectors_over_pic_boundaries_flag: u(1) + CopyBits(1, source, destination); + // max_bytes_per_pic_denom: ue(v) + CopyExpGolomb(source, destination); + // max_bits_per_mb_denom: ue(v) + CopyExpGolomb(source, destination); + // log2_max_mv_length_horizontal: ue(v) + CopyExpGolomb(source, destination); + // log2_max_mv_length_vertical: ue(v) + CopyExpGolomb(source, destination); + // ********* IMPORTANT! ********** + // The next two are the ones we need to set to low numbers: + // max_num_reorder_frames: ue(v) + // max_dec_frame_buffering: ue(v) + // However, if they are already set to no greater than the numbers we + // want, then we don't need to be rewriting. + uint32_t max_num_reorder_frames = source.ReadExponentialGolomb(); + uint32_t max_dec_frame_buffering = source.ReadExponentialGolomb(); + RETURN_FALSE_ON_FAIL(destination.WriteExponentialGolomb(0)); + RETURN_FALSE_ON_FAIL( + destination.WriteExponentialGolomb(sps.max_num_ref_frames)); + if (max_num_reorder_frames != 0 || + max_dec_frame_buffering > sps.max_num_ref_frames) { + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } + } + } + return source.Ok(); +} + +// Copies a VUI HRD parameters segment. +void CopyHrdParameters(BitstreamReader& source, + rtc::BitBufferWriter& destination) { + // cbp_cnt_minus1: ue(v) + uint32_t cbp_cnt_minus1 = CopyExpGolomb(source, destination); + // bit_rate_scale and cbp_size_scale: u(4) each + CopyBits(8, source, destination); + for (size_t i = 0; source.Ok() && i <= cbp_cnt_minus1; ++i) { + // bit_rate_value_minus1 and cbp_size_value_minus1: ue(v) each + CopyExpGolomb(source, destination); + CopyExpGolomb(source, destination); + // cbr_flag: u(1) + CopyBits(1, source, destination); + } + // initial_cbp_removal_delay_length_minus1: u(5) + // cbp_removal_delay_length_minus1: u(5) + // dbp_output_delay_length_minus1: u(5) + // time_offset_length: u(5) + CopyBits(5 * 4, source, destination); +} + +// These functions are similar to webrtc::H264SpsParser::Parse, and based on the +// same version of the H.264 standard. You can find it here: +// http://www.itu.int/rec/T-REC-H.264 + +// Adds a bitstream restriction VUI segment. +bool AddBitstreamRestriction(rtc::BitBufferWriter* destination, + uint32_t max_num_ref_frames) { + // motion_vectors_over_pic_boundaries_flag: u(1) + // Default is 1 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1)); + // max_bytes_per_pic_denom: ue(v) + // Default is 2 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(2)); + // max_bits_per_mb_denom: ue(v) + // Default is 1 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(1)); + // log2_max_mv_length_horizontal: ue(v) + // log2_max_mv_length_vertical: ue(v) + // Both default to 16 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16)); + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16)); + + // ********* IMPORTANT! ********** + // max_num_reorder_frames: ue(v) + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0)); + // max_dec_frame_buffering: ue(v) + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(max_num_ref_frames)); + return true; +} + +bool IsDefaultColorSpace(const ColorSpace& color_space) { + return color_space.range() != ColorSpace::RangeID::kFull && + color_space.primaries() == ColorSpace::PrimaryID::kUnspecified && + color_space.transfer() == ColorSpace::TransferID::kUnspecified && + color_space.matrix() == ColorSpace::MatrixID::kUnspecified; +} + +bool AddVideoSignalTypeInfo(rtc::BitBufferWriter& destination, + const ColorSpace& color_space) { + // video_format: u(3). + RETURN_FALSE_ON_FAIL(destination.WriteBits(5, 3)); // 5 = Unspecified + // video_full_range_flag: u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits( + color_space.range() == ColorSpace::RangeID::kFull ? 1 : 0, 1)); + // colour_description_present_flag: u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + // colour_primaries: u(8) + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(static_cast<uint8_t>(color_space.primaries()))); + // transfer_characteristics: u(8) + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(static_cast<uint8_t>(color_space.transfer()))); + // matrix_coefficients: u(8) + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(static_cast<uint8_t>(color_space.matrix()))); + return true; +} + +bool CopyOrRewriteVideoSignalTypeInfo( + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten) { + // Read. + uint32_t video_format = 5; // H264 default: unspecified + uint32_t video_full_range_flag = 0; // H264 default: limited + uint32_t colour_description_present_flag = 0; + uint8_t colour_primaries = 3; // H264 default: unspecified + uint8_t transfer_characteristics = 3; // H264 default: unspecified + uint8_t matrix_coefficients = 3; // H264 default: unspecified + uint32_t video_signal_type_present_flag = source.ReadBit(); + if (video_signal_type_present_flag) { + video_format = source.ReadBits(3); + video_full_range_flag = source.ReadBit(); + colour_description_present_flag = source.ReadBit(); + if (colour_description_present_flag) { + colour_primaries = source.Read<uint8_t>(); + transfer_characteristics = source.Read<uint8_t>(); + matrix_coefficients = source.Read<uint8_t>(); + } + } + RETURN_FALSE_ON_FAIL(source.Ok()); + + // Update. + uint32_t video_signal_type_present_flag_override = + video_signal_type_present_flag; + uint32_t video_format_override = video_format; + uint32_t video_full_range_flag_override = video_full_range_flag; + uint32_t colour_description_present_flag_override = + colour_description_present_flag; + uint8_t colour_primaries_override = colour_primaries; + uint8_t transfer_characteristics_override = transfer_characteristics; + uint8_t matrix_coefficients_override = matrix_coefficients; + if (color_space) { + if (IsDefaultColorSpace(*color_space)) { + video_signal_type_present_flag_override = 0; + } else { + video_signal_type_present_flag_override = 1; + video_format_override = 5; // unspecified + + if (color_space->range() == ColorSpace::RangeID::kFull) { + video_full_range_flag_override = 1; + } else { + // ColorSpace::RangeID::kInvalid and kDerived are treated as limited. + video_full_range_flag_override = 0; + } + + colour_description_present_flag_override = + color_space->primaries() != ColorSpace::PrimaryID::kUnspecified || + color_space->transfer() != ColorSpace::TransferID::kUnspecified || + color_space->matrix() != ColorSpace::MatrixID::kUnspecified; + colour_primaries_override = + static_cast<uint8_t>(color_space->primaries()); + transfer_characteristics_override = + static_cast<uint8_t>(color_space->transfer()); + matrix_coefficients_override = + static_cast<uint8_t>(color_space->matrix()); + } + } + + // Write. + RETURN_FALSE_ON_FAIL( + destination.WriteBits(video_signal_type_present_flag_override, 1)); + if (video_signal_type_present_flag_override) { + RETURN_FALSE_ON_FAIL(destination.WriteBits(video_format_override, 3)); + RETURN_FALSE_ON_FAIL( + destination.WriteBits(video_full_range_flag_override, 1)); + RETURN_FALSE_ON_FAIL( + destination.WriteBits(colour_description_present_flag_override, 1)); + if (colour_description_present_flag_override) { + RETURN_FALSE_ON_FAIL(destination.WriteUInt8(colour_primaries_override)); + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(transfer_characteristics_override)); + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(matrix_coefficients_override)); + } + } + + if (video_signal_type_present_flag_override != + video_signal_type_present_flag || + video_format_override != video_format || + video_full_range_flag_override != video_full_range_flag || + colour_description_present_flag_override != + colour_description_present_flag || + colour_primaries_override != colour_primaries || + transfer_characteristics_override != transfer_characteristics || + matrix_coefficients_override != matrix_coefficients) { + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } + + return true; +} + +bool CopyRemainingBits(BitstreamReader& source, + rtc::BitBufferWriter& destination) { + // Try to get at least the destination aligned. + if (source.RemainingBitCount() > 0 && source.RemainingBitCount() % 8 != 0) { + size_t misaligned_bits = source.RemainingBitCount() % 8; + CopyBits(misaligned_bits, source, destination); + } + while (source.RemainingBitCount() > 0) { + int count = std::min(32, source.RemainingBitCount()); + CopyBits(count, source, destination); + } + // TODO(noahric): The last byte could be all zeroes now, which we should just + // strip. + return source.Ok(); +} + +} // namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.h b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.h new file mode 100644 index 0000000000..ef80d5b60e --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ +#define COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "api/video/color_space.h" +#include "common_video/h264/sps_parser.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +// A class that can parse an SPS+VUI and if necessary creates a copy with +// updated parameters. +// The rewriter disables frame buffering. This should force decoders to deliver +// decoded frame immediately and, thus, reduce latency. +// The rewriter updates video signal type parameters if external parameters are +// provided. +class SpsVuiRewriter : private SpsParser { + public: + enum class ParseResult { kFailure, kVuiOk, kVuiRewritten }; + enum class Direction { kIncoming, kOutgoing }; + + // Parses an SPS block and if necessary copies it and rewrites the VUI. + // Returns kFailure on failure, kParseOk if parsing succeeded and no update + // was necessary and kParsedAndModified if an updated copy of buffer was + // written to destination. destination may be populated with some data even if + // no rewrite was necessary, but the end offset should remain unchanged. + // Unless parsing fails, the sps parameter will be populated with the parsed + // SPS state. This function assumes that any previous headers + // (NALU start, type, Stap-A, etc) have already been parsed and that RBSP + // decoding has been performed. + static ParseResult ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const ColorSpace* color_space, + rtc::Buffer* destination, + Direction Direction); + + // Parses NAL units from `buffer`, strips AUD blocks and rewrites VUI in SPS + // blocks if necessary. + static rtc::Buffer ParseOutgoingBitstreamAndRewrite( + rtc::ArrayView<const uint8_t> buffer, + const ColorSpace* color_space); + + private: + static ParseResult ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const ColorSpace* color_space, + rtc::Buffer* destination); + + static void UpdateStats(ParseResult result, Direction direction); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ diff --git a/third_party/libwebrtc/common_video/h264/sps_vui_rewriter_unittest.cc b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter_unittest.cc new file mode 100644 index 0000000000..2907949e6c --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter_unittest.cc @@ -0,0 +1,463 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_vui_rewriter.h" + +#include <cstdint> +#include <vector> + +#include "api/video/color_space.h" +#include "common_video/h264/h264_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +enum SpsMode { + kNoRewriteRequired_VuiOptimal, + kRewriteRequired_NoVui, + kRewriteRequired_NoBitstreamRestriction, + kRewriteRequired_VuiSuboptimal, +}; + +static const size_t kSpsBufferMaxSize = 256; +static const size_t kWidth = 640; +static const size_t kHeight = 480; + +static const uint8_t kStartSequence[] = {0x00, 0x00, 0x00, 0x01}; +static const uint8_t kAud[] = {H264::NaluType::kAud, 0x09, 0x10}; +static const uint8_t kSpsNaluType[] = {H264::NaluType::kSps}; +static const uint8_t kIdr1[] = {H264::NaluType::kIdr, 0xFF, 0x00, 0x00, 0x04}; +static const uint8_t kIdr2[] = {H264::NaluType::kIdr, 0xFF, 0x00, 0x11}; + +struct VuiHeader { + uint32_t vui_parameters_present_flag; + uint32_t bitstream_restriction_flag; + uint32_t max_num_reorder_frames; + uint32_t max_dec_frame_buffering; + uint32_t video_signal_type_present_flag; + uint32_t video_full_range_flag; + uint32_t colour_description_present_flag; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; +}; + +static const VuiHeader kVuiNotPresent = { + /* vui_parameters_present_flag= */ 0, + /* bitstream_restriction_flag= */ 0, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 0, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiNoBitstreamRestriction = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 0, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 0, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiNoFrameBuffering = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiFrameBuffering = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 3, + /* max_dec_frame_buffering= */ 3, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiNoVideoSignalType = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiLimitedRangeNoColourDescription = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 1, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiFullRangeNoColourDescription = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 1, + /* video_full_range_flag= */ 1, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiLimitedRangeBt709Color = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 1, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 1, + /* colour_primaries= */ 1, + /* transfer_characteristics= */ 1, + /* matrix_coefficients= */ 1}; + +static const webrtc::ColorSpace kColorSpaceH264Default( + ColorSpace::PrimaryID::kUnspecified, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpacePrimariesBt709( + ColorSpace::PrimaryID::kBT709, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpaceTransferBt709( + ColorSpace::PrimaryID::kUnspecified, + ColorSpace::TransferID::kBT709, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpaceMatrixBt709( + ColorSpace::PrimaryID::kUnspecified, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kBT709, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpaceFullRange( + ColorSpace::PrimaryID::kBT709, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kFull); + +static const webrtc::ColorSpace kColorSpaceBt709LimitedRange( + ColorSpace::PrimaryID::kBT709, + ColorSpace::TransferID::kBT709, + ColorSpace::MatrixID::kBT709, + ColorSpace::RangeID::kLimited); +} // namespace + +// Generates a fake SPS with basically everything empty and with characteristics +// based off SpsMode. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +void GenerateFakeSps(const VuiHeader& vui, rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // Profile byte. + writer.WriteUInt8(0); + // Constraint sets and reserved zero bits. + writer.WriteUInt8(0); + // level_idc. + writer.WriteUInt8(3); + // seq_paramter_set_id. + writer.WriteExponentialGolomb(0); + // Profile is not special, so we skip all the chroma format settings. + + // Now some bit magic. + // log2_max_frame_num_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + // pic_order_cnt_type: ue(v). + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + + // max_num_ref_frames: ue(v). Use 1, to make optimal/suboptimal more obvious. + writer.WriteExponentialGolomb(1); + // gaps_in_frame_num_value_allowed_flag: u(1). + writer.WriteBits(0, 1); + // Next are width/height. First, calculate the mbs/map_units versions. + uint16_t width_in_mbs_minus1 = (kWidth + 15) / 16 - 1; + + // For the height, we're going to define frame_mbs_only_flag, so we need to + // divide by 2. See the parser for the full calculation. + uint16_t height_in_map_units_minus1 = ((kHeight + 15) / 16 - 1) / 2; + // Write each as ue(v). + writer.WriteExponentialGolomb(width_in_mbs_minus1); + writer.WriteExponentialGolomb(height_in_map_units_minus1); + // frame_mbs_only_flag: u(1). Needs to be false. + writer.WriteBits(0, 1); + // mb_adaptive_frame_field_flag: u(1). + writer.WriteBits(0, 1); + // direct_8x8_inferene_flag: u(1). + writer.WriteBits(0, 1); + // frame_cropping_flag: u(1). 1, so we can supply crop. + writer.WriteBits(1, 1); + // Now we write the left/right/top/bottom crop. For simplicity, we'll put all + // the crop at the left/top. + // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values. + // Left/right. + writer.WriteExponentialGolomb(((16 - (kWidth % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + // Top/bottom. + writer.WriteExponentialGolomb(((16 - (kHeight % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + + // Finally! The VUI. + // vui_parameters_present_flag: u(1) + writer.WriteBits(vui.vui_parameters_present_flag, 1); + if (vui.vui_parameters_present_flag) { + // aspect_ratio_info_present_flag, overscan_info_present_flag. Both u(1). + writer.WriteBits(0, 2); + + writer.WriteBits(vui.video_signal_type_present_flag, 1); + if (vui.video_signal_type_present_flag) { + // video_format: u(3). 5 = Unspecified + writer.WriteBits(5, 3); + writer.WriteBits(vui.video_full_range_flag, 1); + writer.WriteBits(vui.colour_description_present_flag, 1); + if (vui.colour_description_present_flag) { + writer.WriteUInt8(vui.colour_primaries); + writer.WriteUInt8(vui.transfer_characteristics); + writer.WriteUInt8(vui.matrix_coefficients); + } + } + + // chroma_loc_info_present_flag, timing_info_present_flag, + // nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag, + // pic_struct_present_flag, All u(1) + writer.WriteBits(0, 5); + + writer.WriteBits(vui.bitstream_restriction_flag, 1); + if (vui.bitstream_restriction_flag) { + // Write some defaults. Shouldn't matter for parsing, though. + // motion_vectors_over_pic_boundaries_flag: u(1) + writer.WriteBits(1, 1); + // max_bytes_per_pic_denom: ue(v) + writer.WriteExponentialGolomb(2); + // max_bits_per_mb_denom: ue(v) + writer.WriteExponentialGolomb(1); + // log2_max_mv_length_horizontal: ue(v) + // log2_max_mv_length_vertical: ue(v) + writer.WriteExponentialGolomb(16); + writer.WriteExponentialGolomb(16); + + // Next are the limits we care about. + writer.WriteExponentialGolomb(vui.max_num_reorder_frames); + writer.WriteExponentialGolomb(vui.max_dec_frame_buffering); + } + } + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + H264::WriteRbsp(rbsp, byte_count, out_buffer); +} + +void TestSps(const VuiHeader& vui, + const ColorSpace* color_space, + SpsVuiRewriter::ParseResult expected_parse_result) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + rtc::Buffer original_sps; + GenerateFakeSps(vui, &original_sps); + + absl::optional<SpsParser::SpsState> sps; + rtc::Buffer rewritten_sps; + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + original_sps.data(), original_sps.size(), &sps, color_space, + &rewritten_sps, SpsVuiRewriter::Direction::kIncoming); + EXPECT_EQ(expected_parse_result, result); + ASSERT_TRUE(sps); + EXPECT_EQ(sps->width, kWidth); + EXPECT_EQ(sps->height, kHeight); + if (vui.vui_parameters_present_flag) { + EXPECT_EQ(sps->vui_params_present, 1u); + } + + if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) { + // Ensure that added/rewritten SPS is parsable. + rtc::Buffer tmp; + result = SpsVuiRewriter::ParseAndRewriteSps( + rewritten_sps.data(), rewritten_sps.size(), &sps, nullptr, &tmp, + SpsVuiRewriter::Direction::kIncoming); + EXPECT_EQ(SpsVuiRewriter::ParseResult::kVuiOk, result); + ASSERT_TRUE(sps); + EXPECT_EQ(sps->width, kWidth); + EXPECT_EQ(sps->height, kHeight); + EXPECT_EQ(sps->vui_params_present, 1u); + } +} + +class SpsVuiRewriterTest : public ::testing::Test, + public ::testing::WithParamInterface< + ::testing::tuple<VuiHeader, + const ColorSpace*, + SpsVuiRewriter::ParseResult>> { +}; + +TEST_P(SpsVuiRewriterTest, RewriteVui) { + VuiHeader vui = ::testing::get<0>(GetParam()); + const ColorSpace* color_space = ::testing::get<1>(GetParam()); + SpsVuiRewriter::ParseResult expected_parse_result = + ::testing::get<2>(GetParam()); + TestSps(vui, color_space, expected_parse_result); +} + +INSTANTIATE_TEST_SUITE_P( + All, + SpsVuiRewriterTest, + ::testing::Values( + std::make_tuple(kVuiNoFrameBuffering, + nullptr, + SpsVuiRewriter::ParseResult::kVuiOk), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpaceH264Default, + SpsVuiRewriter::ParseResult::kVuiOk), + std::make_tuple(kVuiLimitedRangeBt709Color, + &kColorSpaceBt709LimitedRange, + SpsVuiRewriter::ParseResult::kVuiOk), + std::make_tuple(kVuiNotPresent, + nullptr, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoBitstreamRestriction, + nullptr, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiFrameBuffering, + nullptr, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiLimitedRangeNoColourDescription, + &kColorSpaceFullRange, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpacePrimariesBt709, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpaceTransferBt709, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpaceMatrixBt709, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiFullRangeNoColourDescription, + &kColorSpaceH264Default, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiLimitedRangeBt709Color, + &kColorSpaceH264Default, + SpsVuiRewriter::ParseResult::kVuiRewritten))); + +TEST(SpsVuiRewriterOutgoingVuiTest, ParseOutgoingBitstreamOptimalVui) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + rtc::Buffer optimal_sps; + GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps); + + rtc::Buffer buffer; + buffer.AppendData(kStartSequence); + buffer.AppendData(optimal_sps); + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr1); + + EXPECT_THAT(SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite(buffer, nullptr), + ::testing::ElementsAreArray(buffer)); +} + +TEST(SpsVuiRewriterOutgoingVuiTest, ParseOutgoingBitstreamNoVui) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + rtc::Buffer sps; + GenerateFakeSps(kVuiNotPresent, &sps); + + rtc::Buffer buffer; + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr1); + buffer.AppendData(kStartSequence); + buffer.AppendData(kSpsNaluType); + buffer.AppendData(sps); + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr2); + + rtc::Buffer optimal_sps; + GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps); + + rtc::Buffer expected_buffer; + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kIdr1); + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kSpsNaluType); + expected_buffer.AppendData(optimal_sps); + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kIdr2); + + EXPECT_THAT(SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite(buffer, nullptr), + ::testing::ElementsAreArray(expected_buffer)); +} + +TEST(SpsVuiRewriterOutgoingAudTest, ParseOutgoingBitstreamWithAud) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + rtc::Buffer optimal_sps; + GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps); + + rtc::Buffer buffer; + buffer.AppendData(kStartSequence); + buffer.AppendData(kAud); + buffer.AppendData(kStartSequence); + buffer.AppendData(optimal_sps); + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr1); + + rtc::Buffer expected_buffer; + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(optimal_sps); + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kIdr1); + + EXPECT_THAT(SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite(buffer, nullptr), + ::testing::ElementsAreArray(expected_buffer)); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.cc b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.cc new file mode 100644 index 0000000000..1093add102 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.cc @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/h265/h265_bitstream_parser.h" + +#include <stdlib.h> + +#include <cstdint> +#include <vector> + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +#define IN_RANGE_OR_RETURN(val, min, max) \ + do { \ + if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return kInvalidStream; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_NULL(val, min, max) \ + do { \ + if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return absl::nullopt; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_VOID(val, min, max) \ + do { \ + if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!slice_reader.Ok() || !(a)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ + << #a; \ + return kInvalidStream; \ + } \ + } while (0) + +namespace { + +constexpr int kMaxAbsQpDeltaValue = 51; +constexpr int kMinQpValue = 0; +constexpr int kMaxQpValue = 51; +constexpr int kMaxRefIdxActive = 15; + +} // namespace + +namespace webrtc { + +H265BitstreamParser::H265BitstreamParser() = default; +H265BitstreamParser::~H265BitstreamParser() = default; + +// General note: this is based off the 08/2021 version of the H.265 standard, +// section 7.3.6.1. You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 +H265BitstreamParser::Result H265BitstreamParser::ParseNonParameterSetNalu( + const uint8_t* source, + size_t source_length, + uint8_t nalu_type) { + last_slice_qp_delta_ = absl::nullopt; + last_slice_pps_id_ = absl::nullopt; + const std::vector<uint8_t> slice_rbsp = + H265::ParseRbsp(source, source_length); + if (slice_rbsp.size() < H265::kNaluHeaderSize) + return kInvalidStream; + + BitstreamReader slice_reader(slice_rbsp); + slice_reader.ConsumeBits(H265::kNaluHeaderSize * 8); + + // first_slice_segment_in_pic_flag: u(1) + bool first_slice_segment_in_pic_flag = slice_reader.Read<bool>(); + bool irap_pic = (H265::NaluType::kBlaWLp <= nalu_type && + nalu_type <= H265::NaluType::kRsvIrapVcl23); + if (irap_pic) { + // no_output_of_prior_pics_flag: u(1) + slice_reader.ConsumeBits(1); + } + // slice_pic_parameter_set_id: ue(v) + uint32_t pps_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(pps_id, 0, 63); + const H265PpsParser::PpsState* pps = GetPPS(pps_id); + TRUE_OR_RETURN(pps); + const H265SpsParser::SpsState* sps = GetSPS(pps->sps_id); + TRUE_OR_RETURN(sps); + bool dependent_slice_segment_flag = 0; + if (!first_slice_segment_in_pic_flag) { + if (pps->dependent_slice_segments_enabled_flag) { + // dependent_slice_segment_flag: u(1) + dependent_slice_segment_flag = slice_reader.Read<bool>(); + } + + // slice_segment_address: u(v) + int32_t log2_ctb_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size; + uint32_t ctb_size_y = 1 << log2_ctb_size_y; + uint32_t pic_width_in_ctbs_y = sps->pic_width_in_luma_samples / ctb_size_y; + if (sps->pic_width_in_luma_samples % ctb_size_y) + pic_width_in_ctbs_y++; + + uint32_t pic_height_in_ctbs_y = + sps->pic_height_in_luma_samples / ctb_size_y; + if (sps->pic_height_in_luma_samples % ctb_size_y) + pic_height_in_ctbs_y++; + + uint32_t slice_segment_address_bits = + H265::Log2Ceiling(pic_height_in_ctbs_y * pic_width_in_ctbs_y); + slice_reader.ConsumeBits(slice_segment_address_bits); + } + + if (dependent_slice_segment_flag == 0) { + for (uint32_t i = 0; i < pps->num_extra_slice_header_bits; i++) { + // slice_reserved_flag: u(1) + slice_reader.ConsumeBits(1); + } + // slice_type: ue(v) + uint32_t slice_type = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(slice_type, 0, 2); + if (pps->output_flag_present_flag) { + // pic_output_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (sps->separate_colour_plane_flag) { + // colour_plane_id: u(2) + slice_reader.ConsumeBits(2); + } + uint32_t num_long_term_sps = 0; + uint32_t num_long_term_pics = 0; + std::vector<bool> used_by_curr_pic_lt_flag; + bool short_term_ref_pic_set_sps_flag = false; + uint32_t short_term_ref_pic_set_idx = 0; + H265SpsParser::ShortTermRefPicSet short_term_ref_pic_set; + bool slice_temporal_mvp_enabled_flag = 0; + if (nalu_type != H265::NaluType::kIdrWRadl && + nalu_type != H265::NaluType::kIdrNLp) { + // slice_pic_order_cnt_lsb: u(v) + uint32_t slice_pic_order_cnt_lsb_bits = + sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + slice_reader.ConsumeBits(slice_pic_order_cnt_lsb_bits); + // short_term_ref_pic_set_sps_flag: u(1) + short_term_ref_pic_set_sps_flag = slice_reader.Read<bool>(); + if (!short_term_ref_pic_set_sps_flag) { + absl::optional<H265SpsParser::ShortTermRefPicSet> ref_pic_set = + H265SpsParser::ParseShortTermRefPicSet( + sps->num_short_term_ref_pic_sets, + sps->num_short_term_ref_pic_sets, sps->short_term_ref_pic_set, + sps->sps_max_dec_pic_buffering_minus1 + [sps->sps_max_sub_layers_minus1], + slice_reader); + TRUE_OR_RETURN(ref_pic_set); + short_term_ref_pic_set = *ref_pic_set; + + } else if (sps->num_short_term_ref_pic_sets > 1) { + // short_term_ref_pic_set_idx: u(v) + uint32_t short_term_ref_pic_set_idx_bits = + H265::Log2Ceiling(sps->num_short_term_ref_pic_sets); + if ((1 << short_term_ref_pic_set_idx_bits) < + sps->num_short_term_ref_pic_sets) { + short_term_ref_pic_set_idx_bits++; + } + if (short_term_ref_pic_set_idx_bits > 0) { + short_term_ref_pic_set_idx = + slice_reader.ReadBits(short_term_ref_pic_set_idx_bits); + IN_RANGE_OR_RETURN(short_term_ref_pic_set_idx, 0, + sps->num_short_term_ref_pic_sets - 1); + } + } + if (sps->long_term_ref_pics_present_flag) { + if (sps->num_long_term_ref_pics_sps > 0) { + // num_long_term_sps: ue(v) + num_long_term_sps = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_long_term_sps, 0, + sps->num_long_term_ref_pics_sps); + } + // num_long_term_pics: ue(v) + num_long_term_pics = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_long_term_pics, 0, + kMaxLongTermRefPicSets - num_long_term_sps); + used_by_curr_pic_lt_flag.resize(num_long_term_sps + num_long_term_pics, + 0); + for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { + if (i < num_long_term_sps) { + uint32_t lt_idx_sps = 0; + if (sps->num_long_term_ref_pics_sps > 1) { + // lt_idx_sps: u(v) + uint32_t lt_idx_sps_bits = + H265::Log2Ceiling(sps->num_long_term_ref_pics_sps); + lt_idx_sps = slice_reader.ReadBits(lt_idx_sps_bits); + IN_RANGE_OR_RETURN(lt_idx_sps, 0, + sps->num_long_term_ref_pics_sps - 1); + } + used_by_curr_pic_lt_flag[i] = + sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps]; + } else { + // poc_lsb_lt: u(v) + uint32_t poc_lsb_lt_bits = + sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + slice_reader.ConsumeBits(poc_lsb_lt_bits); + // used_by_curr_pic_lt_flag: u(1) + used_by_curr_pic_lt_flag[i] = slice_reader.Read<bool>(); + } + // delta_poc_msb_present_flag: u(1) + bool delta_poc_msb_present_flag = slice_reader.Read<bool>(); + if (delta_poc_msb_present_flag) { + // delta_poc_msb_cycle_lt: ue(v) + int delta_poc_msb_cycle_lt = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN( + delta_poc_msb_cycle_lt, 0, + std::pow(2, 32 - sps->log2_max_pic_order_cnt_lsb_minus4 - 4)); + } + } + } + if (sps->sps_temporal_mvp_enabled_flag) { + // slice_temporal_mvp_enabled_flag: u(1) + slice_temporal_mvp_enabled_flag = slice_reader.Read<bool>(); + } + } + + if (sps->sample_adaptive_offset_enabled_flag) { + // slice_sao_luma_flag: u(1) + slice_reader.ConsumeBits(1); + uint32_t chroma_array_type = + sps->separate_colour_plane_flag == 0 ? sps->chroma_format_idc : 0; + if (chroma_array_type != 0) { + // slice_sao_chroma_flag: u(1) + slice_reader.ConsumeBits(1); + } + } + + if (slice_type == H265::SliceType::kP || + slice_type == H265::SliceType::kB) { + // num_ref_idx_active_override_flag: u(1) + bool num_ref_idx_active_override_flag = slice_reader.Read<bool>(); + uint32_t num_ref_idx_l0_active_minus1 = + pps->num_ref_idx_l0_default_active_minus1; + uint32_t num_ref_idx_l1_active_minus1 = + pps->num_ref_idx_l1_default_active_minus1; + if (num_ref_idx_active_override_flag) { + // num_ref_idx_l0_active_minus1: ue(v) + num_ref_idx_l0_active_minus1 = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_ref_idx_l0_active_minus1, 0, + kMaxRefIdxActive - 1); + if (slice_type == H265::SliceType::kB) { + // num_ref_idx_l1_active_minus1: ue(v) + num_ref_idx_l1_active_minus1 = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_ref_idx_l1_active_minus1, 0, + kMaxRefIdxActive - 1); + } + } + + uint32_t num_pic_total_curr = 0; + uint32_t curr_sps_idx = 0; + if (short_term_ref_pic_set_sps_flag) { + curr_sps_idx = short_term_ref_pic_set_idx; + } else { + curr_sps_idx = sps->num_short_term_ref_pic_sets; + } + if (sps->short_term_ref_pic_set.size() <= curr_sps_idx) { + TRUE_OR_RETURN(!(curr_sps_idx != 0 || short_term_ref_pic_set_sps_flag)); + } + const H265SpsParser::ShortTermRefPicSet* ref_pic_set; + if (curr_sps_idx < sps->short_term_ref_pic_set.size()) { + ref_pic_set = &(sps->short_term_ref_pic_set[curr_sps_idx]); + } else { + ref_pic_set = &short_term_ref_pic_set; + } + + // Equation 7-57 + IN_RANGE_OR_RETURN(ref_pic_set->num_negative_pics, 0, + kMaxShortTermRefPicSets); + IN_RANGE_OR_RETURN(ref_pic_set->num_positive_pics, 0, + kMaxShortTermRefPicSets); + for (uint32_t i = 0; i < ref_pic_set->num_negative_pics; i++) { + if (ref_pic_set->used_by_curr_pic_s0[i]) { + num_pic_total_curr++; + } + } + for (uint32_t i = 0; i < ref_pic_set->num_positive_pics; i++) { + if (ref_pic_set->used_by_curr_pic_s1[i]) { + num_pic_total_curr++; + } + } + for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { + if (used_by_curr_pic_lt_flag[i]) { + num_pic_total_curr++; + } + } + + if (pps->lists_modification_present_flag && num_pic_total_curr > 1) { + // ref_pic_lists_modification() + uint32_t list_entry_bits = H265::Log2Ceiling(num_pic_total_curr); + if ((1 << list_entry_bits) < num_pic_total_curr) { + list_entry_bits++; + } + // ref_pic_list_modification_flag_l0: u(1) + bool ref_pic_list_modification_flag_l0 = slice_reader.Read<bool>(); + if (ref_pic_list_modification_flag_l0) { + for (uint32_t i = 0; i < num_ref_idx_l0_active_minus1; i++) { + // list_entry_l0: u(v) + slice_reader.ConsumeBits(list_entry_bits); + } + } + if (slice_type == H265::SliceType::kB) { + // ref_pic_list_modification_flag_l1: u(1) + bool ref_pic_list_modification_flag_l1 = slice_reader.Read<bool>(); + if (ref_pic_list_modification_flag_l1) { + for (uint32_t i = 0; i < num_ref_idx_l1_active_minus1; i++) { + // list_entry_l1: u(v) + slice_reader.ConsumeBits(list_entry_bits); + } + } + } + } + if (slice_type == H265::SliceType::kB) { + // mvd_l1_zero_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (pps->cabac_init_present_flag) { + // cabac_init_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (slice_temporal_mvp_enabled_flag) { + bool collocated_from_l0_flag = false; + if (slice_type == H265::SliceType::kB) { + // collocated_from_l0_flag: u(1) + collocated_from_l0_flag = slice_reader.Read<bool>(); + } + if ((collocated_from_l0_flag && num_ref_idx_l0_active_minus1 > 0) || + (!collocated_from_l0_flag && num_ref_idx_l1_active_minus1 > 0)) { + // collocated_ref_idx: ue(v) + uint32_t collocated_ref_idx = slice_reader.ReadExponentialGolomb(); + if ((slice_type == H265::SliceType::kP || + slice_type == H265::SliceType::kB) && + collocated_from_l0_flag) { + IN_RANGE_OR_RETURN(collocated_ref_idx, 0, + num_ref_idx_l0_active_minus1); + } + if (slice_type == H265::SliceType::kB && !collocated_from_l0_flag) { + IN_RANGE_OR_RETURN(collocated_ref_idx, 0, + num_ref_idx_l1_active_minus1); + } + } + } + if (!slice_reader.Ok() || + ((pps->weighted_pred_flag && slice_type == H265::SliceType::kP) || + (pps->weighted_bipred_flag && slice_type == H265::SliceType::kB))) { + // pred_weight_table() + RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; + return kUnsupportedStream; + } + // five_minus_max_num_merge_cand: ue(v) + uint32_t five_minus_max_num_merge_cand = + slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(5 - five_minus_max_num_merge_cand, 1, 5); + } + } + + // slice_qp_delta: se(v) + int32_t last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); + if (!slice_reader.Ok() || (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue)) { + // Something has gone wrong, and the parsed value is invalid. + RTC_LOG(LS_ERROR) << "Parsed QP value out of range."; + return kInvalidStream; + } + // 7-54 in H265 spec. + IN_RANGE_OR_RETURN(26 + pps->init_qp_minus26 + last_slice_qp_delta, + -pps->qp_bd_offset_y, 51); + + last_slice_qp_delta_ = last_slice_qp_delta; + last_slice_pps_id_ = pps_id; + if (!slice_reader.Ok()) { + return kInvalidStream; + } + + return kOk; +} + +const H265PpsParser::PpsState* H265BitstreamParser::GetPPS(uint32_t id) const { + auto it = pps_.find(id); + if (it == pps_.end()) { + RTC_LOG(LS_WARNING) << "Requested a nonexistent PPS id " << id; + return nullptr; + } + + return &it->second; +} + +const H265SpsParser::SpsState* H265BitstreamParser::GetSPS(uint32_t id) const { + auto it = sps_.find(id); + if (it == sps_.end()) { + RTC_LOG(LS_WARNING) << "Requested a nonexistent SPS id " << id; + return nullptr; + } + + return &it->second; +} + +void H265BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { + H265::NaluType nalu_type = H265::ParseNaluType(slice[0]); + switch (nalu_type) { + case H265::NaluType::kVps: { + absl::optional<H265VpsParser::VpsState> vps_state; + if (length >= H265::kNaluHeaderSize) { + vps_state = H265VpsParser::ParseVps(slice + H265::kNaluHeaderSize, + length - H265::kNaluHeaderSize); + } + + if (!vps_state) { + RTC_LOG(LS_WARNING) << "Unable to parse VPS from H265 bitstream."; + } else { + vps_[vps_state->id] = *vps_state; + } + break; + } + case H265::NaluType::kSps: { + absl::optional<H265SpsParser::SpsState> sps_state; + if (length >= H265::kNaluHeaderSize) { + sps_state = H265SpsParser::ParseSps(slice + H265::kNaluHeaderSize, + length - H265::kNaluHeaderSize); + } + if (!sps_state) { + RTC_LOG(LS_WARNING) << "Unable to parse SPS from H265 bitstream."; + } else { + sps_[sps_state->sps_id] = *sps_state; + } + break; + } + case H265::NaluType::kPps: { + absl::optional<H265PpsParser::PpsState> pps_state; + if (length >= H265::kNaluHeaderSize) { + std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp( + slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize); + BitstreamReader slice_reader(unpacked_buffer); + // pic_parameter_set_id: ue(v) + uint32_t pps_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_VOID(pps_id, 0, 63); + // seq_parameter_set_id: ue(v) + uint32_t sps_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_VOID(sps_id, 0, 15); + const H265SpsParser::SpsState* sps = GetSPS(sps_id); + pps_state = H265PpsParser::ParsePps( + slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize, sps); + } + if (!pps_state) { + RTC_LOG(LS_WARNING) << "Unable to parse PPS from H265 bitstream."; + } else { + pps_[pps_state->pps_id] = *pps_state; + } + break; + } + case H265::NaluType::kAud: + case H265::NaluType::kPrefixSei: + case H265::NaluType::kSuffixSei: + case H265::NaluType::kAP: + case H265::NaluType::kFU: + break; + default: + Result res = ParseNonParameterSetNalu(slice, length, nalu_type); + if (res != kOk) { + RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res; + } + break; + } +} + +absl::optional<uint32_t> +H265BitstreamParser::ParsePpsIdFromSliceSegmentLayerRbsp(const uint8_t* data, + size_t length, + uint8_t nalu_type) { + std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length); + BitstreamReader slice_reader(unpacked_buffer); + + // first_slice_segment_in_pic_flag: u(1) + slice_reader.ConsumeBits(1); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + + if (nalu_type >= H265::NaluType::kBlaWLp && + nalu_type <= H265::NaluType::kRsvIrapVcl23) { + // no_output_of_prior_pics_flag: u(1) + slice_reader.ConsumeBits(1); + } + + // slice_pic_parameter_set_id: ue(v) + uint32_t slice_pic_parameter_set_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(slice_pic_parameter_set_id, 0, 63); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + + return slice_pic_parameter_set_id; +} + +void H265BitstreamParser::ParseBitstream( + rtc::ArrayView<const uint8_t> bitstream) { + std::vector<H265::NaluIndex> nalu_indices = + H265::FindNaluIndices(bitstream.data(), bitstream.size()); + for (const H265::NaluIndex& index : nalu_indices) + ParseSlice(&bitstream[index.payload_start_offset], index.payload_size); +} + +absl::optional<int> H265BitstreamParser::GetLastSliceQp() const { + if (!last_slice_qp_delta_ || !last_slice_pps_id_) { + return absl::nullopt; + } + uint32_t pps_id = 0; + const H265PpsParser::PpsState* pps = GetPPS(pps_id); + if (!pps) + return absl::nullopt; + const int parsed_qp = 26 + pps->init_qp_minus26 + *last_slice_qp_delta_; + if (parsed_qp < kMinQpValue || parsed_qp > kMaxQpValue) { + RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; + return absl::nullopt; + } + return parsed_qp; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.h b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.h new file mode 100644 index 0000000000..3c0883c7a1 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ +#define COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "absl/types/optional.h" +#include "api/video_codecs/bitstream_parser.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "rtc_base/containers/flat_map.h" + +namespace webrtc { + +// Stateful H265 bitstream parser (due to VPS/SPS/PPS). Used to parse out QP +// values from the bitstream. +class H265BitstreamParser : public BitstreamParser { + public: + H265BitstreamParser(); + ~H265BitstreamParser() override; + + // New interface. + void ParseBitstream(rtc::ArrayView<const uint8_t> bitstream) override; + absl::optional<int> GetLastSliceQp() const override; + + static absl::optional<uint32_t> ParsePpsIdFromSliceSegmentLayerRbsp( + const uint8_t* data, + size_t length, + uint8_t nalu_type); + + protected: + enum Result { + kOk, + kInvalidStream, + kUnsupportedStream, + }; + void ParseSlice(const uint8_t* slice, size_t length); + Result ParseNonParameterSetNalu(const uint8_t* source, + size_t source_length, + uint8_t nalu_type); + + const H265PpsParser::PpsState* GetPPS(uint32_t id) const; + const H265SpsParser::SpsState* GetSPS(uint32_t id) const; + + // VPS/SPS/PPS state, updated when parsing new VPS/SPS/PPS, used to parse + // slices. + flat_map<uint32_t, H265VpsParser::VpsState> vps_; + flat_map<uint32_t, H265SpsParser::SpsState> sps_; + flat_map<uint32_t, H265PpsParser::PpsState> pps_; + + // Last parsed slice QP. + absl::optional<int32_t> last_slice_qp_delta_; + absl::optional<uint32_t> last_slice_pps_id_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_bitstream_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser_unittest.cc new file mode 100644 index 0000000000..7ca979433a --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser_unittest.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_bitstream_parser.h" + +#include "common_video/h265/h265_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// VPS/SPS/PPS part of below chunk. +const uint8_t kH265VpsSpsPps[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40}; + +// Contains enough of the image slice to contain slice QP. +const uint8_t kH265BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40, 0x00, + 0x00, 0x01, 0x26, 0x01, 0xaf, 0x08, 0x42, 0x23, 0x10, 0x5d, 0x2b, 0x51, + 0xf9, 0x7a, 0x55, 0x15, 0x0d, 0x10, 0x40, 0xe8, 0x10, 0x05, 0x30, 0x95, + 0x09, 0x9a, 0xa5, 0xb6, 0x6a, 0x66, 0x6d, 0xde, 0xe0, 0xf9, +}; + +// Contains enough of the image slice to contain slice QP. +const uint8_t kH265BitstreamNextImageSliceChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xe0, 0x24, 0xbf, 0x82, 0x05, + 0x21, 0x12, 0x22, 0xa3, 0x29, 0xb4, 0x21, 0x91, 0xa1, 0xaa, 0x40, +}; + +// Contains enough of the image slice to contain slice QP. +const uint8_t kH265SliceChunk[] = { + 0xa4, 0x04, 0x55, 0xa2, 0x6d, 0xce, 0xc0, 0xc3, 0xed, 0x0b, 0xac, 0xbc, + 0x00, 0xc4, 0x44, 0x2e, 0xf7, 0x55, 0xfd, 0x05, 0x86, 0x92, 0x19, 0xdf, + 0x58, 0xec, 0x38, 0x36, 0xb7, 0x7c, 0x00, 0x15, 0x33, 0x78, 0x03, 0x67, + 0x26, 0x0f, 0x7b, 0x30, 0x1c, 0xd7, 0xd4, 0x3a, 0xec, 0xad, 0xef, 0x73, +}; + +// Contains short term ref pic set slice to verify Log2Ceiling path. +const uint8_t kH265SliceStrChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x99, 0x94, 0x90, 0x24, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, + 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x99, 0xa0, 0x01, 0x40, 0x20, 0x06, 0x41, 0xfe, 0x59, + 0x49, 0x26, 0x4d, 0x86, 0x16, 0x22, 0xaa, 0x4c, 0x4c, 0x32, 0xfb, 0x3e, + 0xbc, 0xdf, 0x96, 0x7d, 0x78, 0x51, 0x18, 0x9c, 0xbb, 0x20, 0x00, 0x00, + 0x00, 0x01, 0x44, 0x01, 0xc1, 0xa5, 0x58, 0x11, 0x20, 0x00, 0x00, 0x01, + 0x02, 0x01, 0xe1, 0x18, 0xfe, 0x47, 0x60, 0xd2, 0x74, 0xd6, 0x9f, 0xfc, + 0xbe, 0x6b, 0x15, 0x48, 0x59, 0x1f, 0xf7, 0xc1, 0x7c, 0xe2, 0xe8, 0x10, +}; + +// Contains enough of the image slice to contain invalid slice QP -52. +const uint8_t kH265BitstreamInvalidQPChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40, 0x00, + 0x00, 0x01, 0x26, 0x01, 0xaf, 0x03, 0x4c, +}; + +// Contains enough of the image slice to contain invalid slice QP 52. +const uint8_t kH265BitstreamInvalidQPChunk52[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40, 0x00, + 0x00, 0x01, 0x26, 0x01, 0xaf, 0x03, 0x44, +}; + +TEST(H265BitstreamParserTest, ReportsNoQpWithoutParsedSlices) { + H265BitstreamParser h265_parser; + EXPECT_FALSE(h265_parser.GetLastSliceQp().has_value()); +} + +TEST(H265BitstreamParserTest, ReportsNoQpWithOnlyParsedPpsAndSpsSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265VpsSpsPps); + EXPECT_FALSE(h265_parser.GetLastSliceQp().has_value()); +} + +TEST(H265BitstreamParserTest, ReportsLastSliceQpForImageSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265BitstreamChunk); + absl::optional<int> qp = h265_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(34, *qp); + + // Parse an additional image slice. + h265_parser.ParseBitstream(kH265BitstreamNextImageSliceChunk); + qp = h265_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(36, *qp); +} + +TEST(H265BitstreamParserTest, ReportsLastSliceQpFromShortTermReferenceSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265SliceStrChunk); + absl::optional<int> qp = h265_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(33, *qp); +} + +TEST(H265BitstreamParserTest, PpsIdFromSlice) { + H265BitstreamParser h265_parser; + absl::optional<uint32_t> pps_id = + h265_parser.ParsePpsIdFromSliceSegmentLayerRbsp( + kH265SliceChunk, sizeof(kH265SliceChunk), H265::NaluType::kTrailR); + ASSERT_TRUE(pps_id); + EXPECT_EQ(1u, *pps_id); +} + +TEST(H265BitstreamParserTest, ReportsLastSliceQpInvalidQPSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265BitstreamInvalidQPChunk); + absl::optional<int> qp = h265_parser.GetLastSliceQp(); + ASSERT_FALSE(qp.has_value()); + + h265_parser.ParseBitstream(kH265BitstreamInvalidQPChunk52); + qp = h265_parser.GetLastSliceQp(); + ASSERT_FALSE(qp.has_value()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_common.cc b/third_party/libwebrtc/common_video/h265/h265_common.cc new file mode 100644 index 0000000000..70864495bc --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_common.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_common.h" + +#include "common_video/h264/h264_common.h" + +namespace webrtc { +namespace H265 { + +constexpr uint8_t kNaluTypeMask = 0x7E; + +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + std::vector<H264::NaluIndex> indices = + H264::FindNaluIndices(buffer, buffer_size); + std::vector<NaluIndex> results; + for (auto& index : indices) { + results.push_back( + {index.start_offset, index.payload_start_offset, index.payload_size}); + } + return results; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast<NaluType>((data & kNaluTypeMask) >> 1); +} + +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length) { + return H264::ParseRbsp(data, length); +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + H264::WriteRbsp(bytes, length, destination); +} + +uint32_t Log2Ceiling(uint32_t value) { + // When n == 0, we want the function to return -1. + // When n == 0, (n - 1) will underflow to 0xFFFFFFFF, which is + // why the statement below starts with (n ? 32 : -1). + return (value ? 32 : -1) - WebRtcVideo_CountLeadingZeros32(value - 1); +} + +} // namespace H265 +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_common.h b/third_party/libwebrtc/common_video/h265/h265_common.h new file mode 100644 index 0000000000..fcb97815ff --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_common.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_COMMON_H_ +#define COMMON_VIDEO_H265_H265_COMMON_H_ + +#include <memory> +#include <vector> + +#include "common_video/h265/h265_inline.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +namespace H265 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +constexpr size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +constexpr size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU header byte (2). +constexpr size_t kNaluHeaderSize = 2; + +// Type description of 0-40 is defined in Table7-1 of the H.265 spec +// Type desciption of 48-49 is defined in section 4.4.2 and 4.4.3 of RFC7798 +enum NaluType : uint8_t { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdrNLp = 20, + kCra = 21, + kRsvIrapVcl23 = 23, + kVps = 32, + kSps = 33, + kPps = 34, + kAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kAP = 48, + kFU = 49 +}; + +// Slice type definition. See table 7-7 of the H265 spec +enum SliceType : uint8_t { kB = 0, kP = 1, kI = 2 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset = 0; + // Start index of NALU payload, typically type header. + size_t payload_start_offset = 0; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size = 0; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.2 of the H265 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); + +uint32_t Log2Ceiling(uint32_t value); + +} // namespace H265 +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_COMMON_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_inline.cc b/third_party/libwebrtc/common_video/h265/h265_inline.cc new file mode 100644 index 0000000000..3943a7a41e --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_inline.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_inline.h" + +#include <stdint.h> + +// Table used by WebRtcVideo_CountLeadingZeros32_NotBuiltin. For each uint32_t n +// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at +// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in +// n. +const int8_t kWebRtcVideo_CountLeadingZeros32_Table[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, +}; diff --git a/third_party/libwebrtc/common_video/h265/h265_inline.h b/third_party/libwebrtc/common_video/h265/h265_inline.h new file mode 100644 index 0000000000..85421a6706 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_inline.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in H265 parser. + +#ifndef COMMON_VIDEO_H265_H265_INLINE_H_ +#define COMMON_VIDEO_H265_H265_INLINE_H_ + +#include <stdint.h> + +#include "rtc_base/compile_assert_c.h" + +extern const int8_t kWebRtcVideo_CountLeadingZeros32_Table[64]; + +static __inline int WebRtcVideo_CountLeadingZeros32_NotBuiltin(uint32_t n) { + // Normalize n by rounding up to the nearest number that is a sequence of 0 + // bits followed by a sequence of 1 bits. This number has the same number of + // leading zeros as the original n. There are exactly 33 such values. + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + + // Multiply the modified n with a constant selected (by exhaustive search) + // such that each of the 33 possible values of n give a product whose 6 most + // significant bits are unique. Then look up the answer in the table. + return kWebRtcVideo_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcVideo_CountLeadingZeros32(uint32_t n) { +#ifdef __GNUC__ + RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t)); + return n == 0 ? 32 : __builtin_clz(n); +#else + return WebRtcVideo_CountLeadingZeros32_NotBuiltin(n); +#endif +} +#endif // COMMON_VIDEO_H265_H265_INLINE_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_pps_parser.cc b/third_party/libwebrtc/common_video/h265/h265_pps_parser.cc new file mode 100644 index 0000000000..1cc9abd794 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_pps_parser.cc @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_pps_parser.h" + +#include <memory> +#include <vector> + +#include "absl/types/optional.h" +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +#define IN_RANGE_OR_RETURN_NULL(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return absl::nullopt; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_FALSE(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return false; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!reader.Ok() || !(a)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ + << #a; \ + return absl::nullopt; \ + } \ + } while (0) + +namespace { +constexpr int kMaxNumTileColumnWidth = 19; +constexpr int kMaxNumTileRowHeight = 21; +constexpr int kMaxRefIdxActive = 15; +} // namespace + +namespace webrtc { + +// General note: this is based off the 08/2021 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +absl::optional<H265PpsParser::PpsState> H265PpsParser::ParsePps( + const uint8_t* data, + size_t length, + const H265SpsParser::SpsState* sps) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard. + return ParseInternal(H265::ParseRbsp(data, length), sps); +} + +bool H265PpsParser::ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id) { + RTC_DCHECK(pps_id); + RTC_DCHECK(sps_id); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard. + std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + *pps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(*pps_id, 0, 63); + *sps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(*sps_id, 0, 15); + return reader.Ok(); +} + +absl::optional<H265PpsParser::PpsState> H265PpsParser::ParseInternal( + rtc::ArrayView<const uint8_t> buffer, + const H265SpsParser::SpsState* sps) { + BitstreamReader reader(buffer); + PpsState pps; + + if (!sps) { + return absl::nullopt; + } + + if (!ParsePpsIdsInternal(reader, pps.pps_id, pps.sps_id)) { + return absl::nullopt; + } + + // dependent_slice_segments_enabled_flag: u(1) + pps.dependent_slice_segments_enabled_flag = reader.Read<bool>(); + // output_flag_present_flag: u(1) + pps.output_flag_present_flag = reader.Read<bool>(); + // num_extra_slice_header_bits: u(3) + pps.num_extra_slice_header_bits = reader.ReadBits(3); + IN_RANGE_OR_RETURN_NULL(pps.num_extra_slice_header_bits, 0, 2); + // sign_data_hiding_enabled_flag: u(1) + reader.ConsumeBits(1); + // cabac_init_present_flag: u(1) + pps.cabac_init_present_flag = reader.Read<bool>(); + // num_ref_idx_l0_default_active_minus1: ue(v) + pps.num_ref_idx_l0_default_active_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps.num_ref_idx_l0_default_active_minus1, 0, + kMaxRefIdxActive - 1); + // num_ref_idx_l1_default_active_minus1: ue(v) + pps.num_ref_idx_l1_default_active_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps.num_ref_idx_l1_default_active_minus1, 0, + kMaxRefIdxActive - 1); + // init_qp_minus26: se(v) + pps.init_qp_minus26 = reader.ReadSignedExponentialGolomb(); + pps.qp_bd_offset_y = 6 * sps->bit_depth_luma_minus8; + // Sanity-check parsed value + IN_RANGE_OR_RETURN_NULL(pps.init_qp_minus26, -(26 + pps.qp_bd_offset_y), 25); + // constrained_intra_pred_flag: u(1)log2_min_pcm_luma_coding_block_size_minus3 + reader.ConsumeBits(1); + // transform_skip_enabled_flag: u(1) + reader.ConsumeBits(1); + // cu_qp_delta_enabled_flag: u(1) + bool cu_qp_delta_enabled_flag = reader.Read<bool>(); + if (cu_qp_delta_enabled_flag) { + // diff_cu_qp_delta_depth: ue(v) + uint32_t diff_cu_qp_delta_depth = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(diff_cu_qp_delta_depth, 0, + sps->log2_diff_max_min_luma_coding_block_size); + } + // pps_cb_qp_offset: se(v) + int32_t pps_cb_qp_offset = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_cb_qp_offset, -12, 12); + // pps_cr_qp_offset: se(v) + int32_t pps_cr_qp_offset = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_cr_qp_offset, -12, 12); + // pps_slice_chroma_qp_offsets_present_flag: u(1) + reader.ConsumeBits(1); + // weighted_pred_flag: u(1) + pps.weighted_pred_flag = reader.Read<bool>(); + // weighted_bipred_flag: u(1) + pps.weighted_bipred_flag = reader.Read<bool>(); + // transquant_bypass_enabled_flag: u(1) + reader.ConsumeBits(1); + // tiles_enabled_flag: u(1) + bool tiles_enabled_flag = reader.Read<bool>(); + // entropy_coding_sync_enabled_flag: u(1) + reader.ConsumeBits(1); + if (tiles_enabled_flag) { + // num_tile_columns_minus1: ue(v) + uint32_t num_tile_columns_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(num_tile_columns_minus1, 0, + sps->pic_width_in_ctbs_y - 1); + TRUE_OR_RETURN(num_tile_columns_minus1 < kMaxNumTileColumnWidth); + // num_tile_rows_minus1: ue(v) + uint32_t num_tile_rows_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(num_tile_rows_minus1, 0, + sps->pic_height_in_ctbs_y - 1); + TRUE_OR_RETURN((num_tile_columns_minus1 != 0) || + (num_tile_rows_minus1 != 0)); + TRUE_OR_RETURN(num_tile_rows_minus1 < kMaxNumTileRowHeight); + // uniform_spacing_flag: u(1) + bool uniform_spacing_flag = reader.Read<bool>(); + if (!uniform_spacing_flag) { + int column_width_minus1[kMaxNumTileColumnWidth]; + column_width_minus1[num_tile_columns_minus1] = + sps->pic_width_in_ctbs_y - 1; + for (uint32_t i = 0; i < num_tile_columns_minus1; i++) { + // column_width_minus1: ue(v) + column_width_minus1[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL( + column_width_minus1[i], 0, + column_width_minus1[num_tile_columns_minus1] - 1); + column_width_minus1[num_tile_columns_minus1] -= + column_width_minus1[i] + 1; + } + int row_height_minus1[kMaxNumTileRowHeight]; + row_height_minus1[num_tile_rows_minus1] = sps->pic_height_in_ctbs_y - 1; + for (uint32_t i = 0; i < num_tile_rows_minus1; i++) { + // row_height_minus1: ue(v) + row_height_minus1[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(row_height_minus1[i], 0, + row_height_minus1[num_tile_rows_minus1] - 1); + row_height_minus1[num_tile_rows_minus1] -= row_height_minus1[i] + 1; + } + // loop_filter_across_tiles_enabled_flag: u(1) + reader.ConsumeBits(1); + } + } + // pps_loop_filter_across_slices_enabled_flag: u(1) + reader.ConsumeBits(1); + // deblocking_filter_control_present_flag: u(1) + bool deblocking_filter_control_present_flag = reader.Read<bool>(); + if (deblocking_filter_control_present_flag) { + // deblocking_filter_override_enabled_flag: u(1) + reader.ConsumeBits(1); + // pps_deblocking_filter_disabled_flag: u(1) + bool pps_deblocking_filter_disabled_flag = reader.Read<bool>(); + if (!pps_deblocking_filter_disabled_flag) { + // pps_beta_offset_div2: se(v) + int pps_beta_offset_div2 = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_beta_offset_div2, -6, 6); + // pps_tc_offset_div2: se(v) + int pps_tc_offset_div2 = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_tc_offset_div2, -6, 6); + } + } + // pps_scaling_list_data_present_flag: u(1) + bool pps_scaling_list_data_present_flag = 0; + pps_scaling_list_data_present_flag = reader.Read<bool>(); + if (pps_scaling_list_data_present_flag) { + // scaling_list_data() + if (!H265SpsParser::ParseScalingListData(reader)) { + return absl::nullopt; + } + } + // lists_modification_present_flag: u(1) + pps.lists_modification_present_flag = reader.Read<bool>(); + + if (!reader.Ok()) { + return absl::nullopt; + } + + return pps; +} + +bool H265PpsParser::ParsePpsIdsInternal(BitstreamReader& reader, + uint32_t& pps_id, + uint32_t& sps_id) { + // pic_parameter_set_id: ue(v) + pps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(pps_id, 0, 63); + // seq_parameter_set_id: ue(v) + sps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(sps_id, 0, 15); + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_pps_parser.h b/third_party/libwebrtc/common_video/h265/h265_pps_parser.h new file mode 100644 index 0000000000..625869d8d5 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_pps_parser.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_PPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_PPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "common_video/h265/h265_sps_parser.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H265 NALU. +class H265PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool dependent_slice_segments_enabled_flag = false; + bool cabac_init_present_flag = false; + bool output_flag_present_flag = false; + uint32_t num_extra_slice_header_bits = 0; + uint32_t num_ref_idx_l0_default_active_minus1 = 0; + uint32_t num_ref_idx_l1_default_active_minus1 = 0; + int init_qp_minus26 = 0; + bool weighted_pred_flag = false; + bool weighted_bipred_flag = false; + bool lists_modification_present_flag = false; + uint32_t pps_id = 0; + uint32_t sps_id = 0; + int qp_bd_offset_y = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static absl::optional<PpsState> ParsePps(const uint8_t* data, + size_t length, + const H265SpsParser::SpsState* sps); + + static bool ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id); + + protected: + // Parse the PPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional<PpsState> ParseInternal( + rtc::ArrayView<const uint8_t> buffer, + const H265SpsParser::SpsState* sps); + static bool ParsePpsIdsInternal(BitstreamReader& reader, + uint32_t& pps_id, + uint32_t& sps_id); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_PPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_pps_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_pps_parser_unittest.cc new file mode 100644 index 0000000000..d91fc1a55c --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_pps_parser_unittest.cc @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_pps_parser.h" + +#include <algorithm> + +#include "common_video/h265/h265_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr size_t kPpsBufferMaxSize = 256; +constexpr uint32_t kIgnored = 0; +} // namespace + +void WritePps(const H265PpsParser::PpsState& pps, + bool cu_qp_delta_enabled_flag, + bool tiles_enabled_flag, + bool uniform_spacing_flag, + bool deblocking_filter_control_present_flag, + bool pps_deblocking_filter_disabled_flag, + bool pps_scaling_list_data_present_flag, + bool scaling_list_pred_mode_flag, + rtc::Buffer* out_buffer) { + uint8_t data[kPpsBufferMaxSize] = {0}; + rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize); + + // pic_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.pps_id); + // seq_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.sps_id); + // dependent_slice_segments_enabled_flag: u(1) + bit_buffer.WriteBits(pps.dependent_slice_segments_enabled_flag, 1); + // output_flag_present_flag: u(1) + bit_buffer.WriteBits(pps.output_flag_present_flag, 1); + // num_extra_slice_header_bits: u(3) + bit_buffer.WriteBits(pps.num_extra_slice_header_bits, 3); + // sign_data_hiding_enabled_flag: u(1) + bit_buffer.WriteBits(1, 1); + // cabac_init_present_flag: u(1) + bit_buffer.WriteBits(pps.cabac_init_present_flag, 1); + // num_ref_idx_l0_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(pps.num_ref_idx_l0_default_active_minus1); + // num_ref_idx_l1_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(pps.num_ref_idx_l1_default_active_minus1); + // init_qp_minus26: se(v) + bit_buffer.WriteSignedExponentialGolomb(pps.init_qp_minus26); + // constrained_intra_pred_flag: u(1) + bit_buffer.WriteBits(0, 1); + // transform_skip_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + // cu_qp_delta_enabled_flag: u(1) + bit_buffer.WriteBits(cu_qp_delta_enabled_flag, 1); + if (cu_qp_delta_enabled_flag) { + // diff_cu_qp_delta_depth: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + // pps_cb_qp_offset: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + // pps_cr_qp_offset: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + // pps_slice_chroma_qp_offsets_present_flag: u(1) + bit_buffer.WriteBits(0, 1); + // weighted_pred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_pred_flag, 1); + // weighted_bipred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_bipred_flag, 1); + // transquant_bypass_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + // tiles_enabled_flag: u(1) + bit_buffer.WriteBits(tiles_enabled_flag, 1); + // entropy_coding_sync_enabled_flag: u(1) + bit_buffer.WriteBits(1, 1); + if (tiles_enabled_flag) { + // num_tile_columns_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(6); + // num_tile_rows_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(1); + // uniform_spacing_flag: u(1) + bit_buffer.WriteBits(0, 1); + if (!uniform_spacing_flag) { + for (uint32_t i = 0; i < 6; i++) { + // column_width_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + for (uint32_t i = 0; i < 1; i++) { + // row_height_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + // loop_filter_across_tiles_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + } + } + // pps_loop_filter_across_slices_enabled_flag: u(1) + bit_buffer.WriteBits(1, 1); + // deblocking_filter_control_present_flag: u(1) + bit_buffer.WriteBits(deblocking_filter_control_present_flag, 1); + if (deblocking_filter_control_present_flag) { + // deblocking_filter_override_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + // pps_deblocking_filter_disabled_flag: u(1) + bit_buffer.WriteBits(pps_deblocking_filter_disabled_flag, 1); + if (!pps_deblocking_filter_disabled_flag) { + // pps_beta_offset_div2: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + // pps_tc_offset_div2: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + } + } + // pps_scaling_list_data_present_flag: u(1) + bit_buffer.WriteBits(pps_scaling_list_data_present_flag, 1); + if (pps_scaling_list_data_present_flag) { + for (int size_id = 0; size_id < 4; size_id++) { + for (int matrix_id = 0; matrix_id < 6; + matrix_id += (size_id == 3) ? 3 : 1) { + // scaling_list_pred_mode_flag: u(1) + bit_buffer.WriteBits(scaling_list_pred_mode_flag, 1); + if (!scaling_list_pred_mode_flag) { + // scaling_list_pred_matrix_id_delta: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } else { + uint32_t coef_num = std::min(64, 1 << (4 + (size_id << 1))); + if (size_id > 1) { + // scaling_list_dc_coef_minus8: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + } + for (uint32_t i = 0; i < coef_num; i++) { + // scaling_list_delta_coef: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + } + } + } + } + } + // lists_modification_present_flag: u(1) + bit_buffer.WriteBits(pps.lists_modification_present_flag, 1); + // log2_parallel_merge_level_minus2: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // slice_segment_header_extension_present_flag: u(1) + bit_buffer.WriteBits(0, 1); + + size_t byte_offset; + size_t bit_offset; + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + bit_buffer.WriteBits(0, 8 - bit_offset); + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + } + + H265::WriteRbsp(data, byte_offset, out_buffer); +} + +class H265PpsParserTest : public ::testing::Test { + public: + H265PpsParserTest() {} + ~H265PpsParserTest() override {} + + void RunTest() { + VerifyParsing(generated_pps_, false, false, false, false, false, false, + false); + // Enable flags to cover more path + VerifyParsing(generated_pps_, true, true, false, true, true, true, false); + } + + void VerifyParsing(const H265PpsParser::PpsState& pps, + bool cu_qp_delta_enabled_flag, + bool tiles_enabled_flag, + bool uniform_spacing_flag, + bool deblocking_filter_control_present_flag, + bool pps_deblocking_filter_disabled_flag, + bool pps_scaling_list_data_present_flag, + bool scaling_list_pred_mode_flag) { + buffer_.Clear(); + WritePps(pps, cu_qp_delta_enabled_flag, tiles_enabled_flag, + uniform_spacing_flag, deblocking_filter_control_present_flag, + pps_deblocking_filter_disabled_flag, + pps_scaling_list_data_present_flag, scaling_list_pred_mode_flag, + &buffer_); + const uint8_t sps_buffer[] = { + 0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x5d, 0xb0, 0x02, 0x80, 0x80, 0x2d, + 0x16, 0x59, 0x59, 0xa4, 0x93, 0x2b, 0x80, 0x40, 0x00, 0x00, + 0x03, 0x00, 0x40, 0x00, 0x00, 0x07, 0x82}; + H265SpsParser::SpsState parsed_sps = + H265SpsParser::ParseSps(sps_buffer, arraysize(sps_buffer)).value(); + parsed_pps_ = + H265PpsParser::ParsePps(buffer_.data(), buffer_.size(), &parsed_sps); + ASSERT_TRUE(parsed_pps_); + EXPECT_EQ(pps.dependent_slice_segments_enabled_flag, + parsed_pps_->dependent_slice_segments_enabled_flag); + EXPECT_EQ(pps.cabac_init_present_flag, + parsed_pps_->cabac_init_present_flag); + EXPECT_EQ(pps.output_flag_present_flag, + parsed_pps_->output_flag_present_flag); + EXPECT_EQ(pps.num_extra_slice_header_bits, + parsed_pps_->num_extra_slice_header_bits); + EXPECT_EQ(pps.num_ref_idx_l0_default_active_minus1, + parsed_pps_->num_ref_idx_l0_default_active_minus1); + EXPECT_EQ(pps.num_ref_idx_l1_default_active_minus1, + parsed_pps_->num_ref_idx_l1_default_active_minus1); + EXPECT_EQ(pps.init_qp_minus26, parsed_pps_->init_qp_minus26); + EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag); + EXPECT_EQ(pps.weighted_bipred_flag, parsed_pps_->weighted_bipred_flag); + EXPECT_EQ(pps.lists_modification_present_flag, + parsed_pps_->lists_modification_present_flag); + EXPECT_EQ(pps.pps_id, parsed_pps_->pps_id); + EXPECT_EQ(pps.sps_id, parsed_pps_->sps_id); + } + + H265PpsParser::PpsState generated_pps_; + rtc::Buffer buffer_; + absl::optional<H265PpsParser::PpsState> parsed_pps_; + absl::optional<H265SpsParser::SpsState> parsed_sps_; +}; + +TEST_F(H265PpsParserTest, ZeroPps) { + RunTest(); +} + +TEST_F(H265PpsParserTest, MaxPps) { + generated_pps_.dependent_slice_segments_enabled_flag = true; + generated_pps_.init_qp_minus26 = 25; + generated_pps_.num_extra_slice_header_bits = 1; // 1 bit value. + generated_pps_.weighted_bipred_flag = true; + generated_pps_.weighted_pred_flag = true; + generated_pps_.cabac_init_present_flag = true; + generated_pps_.pps_id = 2; + generated_pps_.sps_id = 1; + RunTest(); + + generated_pps_.init_qp_minus26 = -25; + RunTest(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_sps_parser.cc b/third_party/libwebrtc/common_video/h265/h265_sps_parser.cc new file mode 100644 index 0000000000..a2da4b9b7b --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_sps_parser.cc @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_sps_parser.h" + +#include <algorithm> +#include <memory> +#include <vector> + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +#define IN_RANGE_OR_RETURN_NULL(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return absl::nullopt; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_FALSE(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return false; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!reader.Ok() || !(a)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ + << #a; \ + return absl::nullopt; \ + } \ + } while (0) + +namespace { +using OptionalSps = absl::optional<webrtc::H265SpsParser::SpsState>; +using OptionalShortTermRefPicSet = + absl::optional<webrtc::H265SpsParser::ShortTermRefPicSet>; +using OptionalProfileTierLevel = + absl::optional<webrtc::H265SpsParser::ProfileTierLevel>; + +constexpr int kMaxNumSizeIds = 4; +constexpr int kMaxNumMatrixIds = 6; +constexpr int kMaxNumCoefs = 64; +} // namespace + +namespace webrtc { + +H265SpsParser::ShortTermRefPicSet::ShortTermRefPicSet() = default; + +H265SpsParser::ProfileTierLevel::ProfileTierLevel() = default; + +int H265SpsParser::GetMaxLumaPs(int general_level_idc) { + // From Table A.8 - General tier and level limits. + // |general_level_idc| is 30x the actual level. + if (general_level_idc <= 30) // level 1 + return 36864; + if (general_level_idc <= 60) // level 2 + return 122880; + if (general_level_idc <= 63) // level 2.1 + return 245760; + if (general_level_idc <= 90) // level 3 + return 552960; + if (general_level_idc <= 93) // level 3.1 + return 983040; + if (general_level_idc <= 123) // level 4, 4.1 + return 2228224; + if (general_level_idc <= 156) // level 5, 5.1, 5.2 + return 8912896; + // level 6, 6.1, 6.2 - beyond that there's no actual limit. + return 35651584; +} + +size_t H265SpsParser::GetDpbMaxPicBuf(int general_profile_idc) { + // From A.4.2 - Profile-specific level limits for the video profiles. + // If sps_curr_pic_ref_enabled_flag is required to be zero, than this is 6 + // otherwise it is 7. + return (general_profile_idc >= kProfileIdcMain && + general_profile_idc <= kProfileIdcHighThroughput) + ? 6 + : 7; +} + +// General note: this is based off the 08/2021 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSps( + const uint8_t* data, + size_t length) { + RTC_DCHECK(data); + return ParseSpsInternal(H265::ParseRbsp(data, length)); +} + +bool H265SpsParser::ParseScalingListData(BitstreamReader& reader) { + int32_t scaling_list_dc_coef_minus8[kMaxNumSizeIds][kMaxNumMatrixIds] = {}; + for (int size_id = 0; size_id < kMaxNumSizeIds; size_id++) { + for (int matrix_id = 0; matrix_id < kMaxNumMatrixIds; + matrix_id += (size_id == 3) ? 3 : 1) { + // scaling_list_pred_mode_flag: u(1) + bool scaling_list_pred_mode_flag = reader.Read<bool>(); + if (!scaling_list_pred_mode_flag) { + // scaling_list_pred_matrix_id_delta: ue(v) + int scaling_list_pred_matrix_id_delta = reader.ReadExponentialGolomb(); + if (size_id <= 2) { + IN_RANGE_OR_RETURN_FALSE(scaling_list_pred_matrix_id_delta, 0, + matrix_id); + } else { // size_id == 3 + IN_RANGE_OR_RETURN_FALSE(scaling_list_pred_matrix_id_delta, 0, + matrix_id / 3); + } + } else { + uint32_t coef_num = std::min(kMaxNumCoefs, 1 << (4 + (size_id << 1))); + if (size_id > 1) { + // scaling_list_dc_coef_minus8: se(v) + scaling_list_dc_coef_minus8[size_id - 2][matrix_id] = + reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE( + scaling_list_dc_coef_minus8[size_id - 2][matrix_id], -7, 247); + } + for (uint32_t i = 0; i < coef_num; i++) { + // scaling_list_delta_coef: se(v) + int32_t scaling_list_delta_coef = + reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(scaling_list_delta_coef, -128, 127); + } + } + } + } + return reader.Ok(); +} + +absl::optional<H265SpsParser::ShortTermRefPicSet> +H265SpsParser::ParseShortTermRefPicSet( + uint32_t st_rps_idx, + uint32_t num_short_term_ref_pic_sets, + const std::vector<H265SpsParser::ShortTermRefPicSet>& + short_term_ref_pic_set, + uint32_t sps_max_dec_pic_buffering_minus1, + BitstreamReader& reader) { + H265SpsParser::ShortTermRefPicSet st_ref_pic_set; + + bool inter_ref_pic_set_prediction_flag = false; + if (st_rps_idx != 0) { + // inter_ref_pic_set_prediction_flag: u(1) + inter_ref_pic_set_prediction_flag = reader.Read<bool>(); + } + + if (inter_ref_pic_set_prediction_flag) { + uint32_t delta_idx_minus1 = 0; + if (st_rps_idx == num_short_term_ref_pic_sets) { + // delta_idx_minus1: ue(v) + delta_idx_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(delta_idx_minus1, 0, st_rps_idx - 1); + } + // delta_rps_sign: u(1) + int delta_rps_sign = reader.ReadBits(1); + // abs_delta_rps_minus1: ue(v) + int abs_delta_rps_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(abs_delta_rps_minus1, 0, 0x7FFF); + int delta_rps = (1 - 2 * delta_rps_sign) * (abs_delta_rps_minus1 + 1); + uint32_t ref_rps_idx = st_rps_idx - (delta_idx_minus1 + 1); + uint32_t num_delta_pocs = + short_term_ref_pic_set[ref_rps_idx].num_delta_pocs; + IN_RANGE_OR_RETURN_NULL(num_delta_pocs, 0, kMaxShortTermRefPicSets); + const ShortTermRefPicSet& ref_set = short_term_ref_pic_set[ref_rps_idx]; + bool used_by_curr_pic_flag[kMaxShortTermRefPicSets] = {}; + bool use_delta_flag[kMaxShortTermRefPicSets] = {}; + // 7.4.8 - use_delta_flag defaults to 1 if not present. + std::fill_n(use_delta_flag, kMaxShortTermRefPicSets, true); + + for (uint32_t j = 0; j <= num_delta_pocs; j++) { + // used_by_curr_pic_flag: u(1) + used_by_curr_pic_flag[j] = reader.Read<bool>(); + if (!used_by_curr_pic_flag[j]) { + // use_delta_flag: u(1) + use_delta_flag[j] = reader.Read<bool>(); + } + } + + // Calculate delta_poc_s{0,1}, used_by_curr_pic_s{0,1}, num_negative_pics + // and num_positive_pics. + // Equation 7-61 + int i = 0; + IN_RANGE_OR_RETURN_NULL( + ref_set.num_negative_pics + ref_set.num_positive_pics, 0, + kMaxShortTermRefPicSets); + for (int j = ref_set.num_positive_pics - 1; j >= 0; --j) { + int d_poc = ref_set.delta_poc_s1[j] + delta_rps; + if (d_poc < 0 && use_delta_flag[ref_set.num_negative_pics + j]) { + st_ref_pic_set.delta_poc_s0[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s0[i++] = + used_by_curr_pic_flag[ref_set.num_negative_pics + j]; + } + } + if (delta_rps < 0 && use_delta_flag[ref_set.num_delta_pocs]) { + st_ref_pic_set.delta_poc_s0[i] = delta_rps; + st_ref_pic_set.used_by_curr_pic_s0[i++] = + used_by_curr_pic_flag[ref_set.num_delta_pocs]; + } + for (uint32_t j = 0; j < ref_set.num_negative_pics; ++j) { + int d_poc = ref_set.delta_poc_s0[j] + delta_rps; + if (d_poc < 0 && use_delta_flag[j]) { + st_ref_pic_set.delta_poc_s0[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s0[i++] = used_by_curr_pic_flag[j]; + } + } + st_ref_pic_set.num_negative_pics = i; + // Equation 7-62 + i = 0; + for (int j = ref_set.num_negative_pics - 1; j >= 0; --j) { + int d_poc = ref_set.delta_poc_s0[j] + delta_rps; + if (d_poc > 0 && use_delta_flag[j]) { + st_ref_pic_set.delta_poc_s1[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s1[i++] = used_by_curr_pic_flag[j]; + } + } + if (delta_rps > 0 && use_delta_flag[ref_set.num_delta_pocs]) { + st_ref_pic_set.delta_poc_s1[i] = delta_rps; + st_ref_pic_set.used_by_curr_pic_s1[i++] = + used_by_curr_pic_flag[ref_set.num_delta_pocs]; + } + for (uint32_t j = 0; j < ref_set.num_positive_pics; ++j) { + int d_poc = ref_set.delta_poc_s1[j] + delta_rps; + if (d_poc > 0 && use_delta_flag[ref_set.num_negative_pics + j]) { + st_ref_pic_set.delta_poc_s1[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s1[i++] = + used_by_curr_pic_flag[ref_set.num_negative_pics + j]; + } + } + st_ref_pic_set.num_positive_pics = i; + IN_RANGE_OR_RETURN_NULL(st_ref_pic_set.num_negative_pics, 0, + sps_max_dec_pic_buffering_minus1); + IN_RANGE_OR_RETURN_NULL( + st_ref_pic_set.num_positive_pics, 0, + sps_max_dec_pic_buffering_minus1 - st_ref_pic_set.num_negative_pics); + + } else { + // num_negative_pics: ue(v) + st_ref_pic_set.num_negative_pics = reader.ReadExponentialGolomb(); + // num_positive_pics: ue(v) + st_ref_pic_set.num_positive_pics = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(st_ref_pic_set.num_negative_pics, 0, + sps_max_dec_pic_buffering_minus1); + IN_RANGE_OR_RETURN_NULL( + st_ref_pic_set.num_positive_pics, 0, + sps_max_dec_pic_buffering_minus1 - st_ref_pic_set.num_negative_pics); + + for (uint32_t i = 0; i < st_ref_pic_set.num_negative_pics; i++) { + // delta_poc_s0_minus1: ue(v) + int delta_poc_s0_minus1 = 0; + delta_poc_s0_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(delta_poc_s0_minus1, 0, 0x7FFF); + if (i == 0) { + st_ref_pic_set.delta_poc_s0[i] = -(delta_poc_s0_minus1 + 1); + } else { + st_ref_pic_set.delta_poc_s0[i] = + st_ref_pic_set.delta_poc_s0[i - 1] - (delta_poc_s0_minus1 + 1); + } + // used_by_curr_pic_s0_flag: u(1) + st_ref_pic_set.used_by_curr_pic_s0[i] = reader.Read<bool>(); + } + + for (uint32_t i = 0; i < st_ref_pic_set.num_positive_pics; i++) { + // delta_poc_s1_minus1: ue(v) + int delta_poc_s1_minus1 = 0; + delta_poc_s1_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(delta_poc_s1_minus1, 0, 0x7FFF); + if (i == 0) { + st_ref_pic_set.delta_poc_s1[i] = delta_poc_s1_minus1 + 1; + } else { + st_ref_pic_set.delta_poc_s1[i] = + st_ref_pic_set.delta_poc_s1[i - 1] + delta_poc_s1_minus1 + 1; + } + // used_by_curr_pic_s1_flag: u(1) + st_ref_pic_set.used_by_curr_pic_s1[i] = reader.Read<bool>(); + } + } + + st_ref_pic_set.num_delta_pocs = + st_ref_pic_set.num_negative_pics + st_ref_pic_set.num_positive_pics; + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalShortTermRefPicSet(st_ref_pic_set); +} + +absl::optional<H265SpsParser::ProfileTierLevel> +H265SpsParser::ParseProfileTierLevel(bool profile_present, + int max_num_sub_layers_minus1, + BitstreamReader& reader) { + H265SpsParser::ProfileTierLevel pf_tier_level; + // 7.4.4 + if (profile_present) { + int general_profile_space; + general_profile_space = reader.ReadBits(2); + TRUE_OR_RETURN(general_profile_space == 0); + // general_tier_flag or reserved 0: u(1) + reader.ConsumeBits(1); + pf_tier_level.general_profile_idc = reader.ReadBits(5); + IN_RANGE_OR_RETURN_NULL(pf_tier_level.general_profile_idc, 0, 11); + uint16_t general_profile_compatibility_flag_high16 = reader.ReadBits(16); + uint16_t general_profile_compatibility_flag_low16 = reader.ReadBits(16); + pf_tier_level.general_profile_compatibility_flags = + (general_profile_compatibility_flag_high16 << 16) + + general_profile_compatibility_flag_low16; + pf_tier_level.general_progressive_source_flag = reader.ReadBits(1); + pf_tier_level.general_interlaced_source_flag = reader.ReadBits(1); + if (!reader.Ok() || (!pf_tier_level.general_progressive_source_flag && + pf_tier_level.general_interlaced_source_flag)) { + RTC_LOG(LS_WARNING) << "Interlaced streams not supported"; + return absl::nullopt; + } + pf_tier_level.general_non_packed_constraint_flag = reader.ReadBits(1); + pf_tier_level.general_frame_only_constraint_flag = reader.ReadBits(1); + // general_reserved_zero_7bits + reader.ConsumeBits(7); + pf_tier_level.general_one_picture_only_constraint_flag = reader.ReadBits(1); + // general_reserved_zero_35bits + reader.ConsumeBits(35); + // general_inbld_flag + reader.ConsumeBits(1); + } + pf_tier_level.general_level_idc = reader.ReadBits(8); + bool sub_layer_profile_present_flag[8] = {}; + bool sub_layer_level_present_flag[8] = {}; + for (int i = 0; i < max_num_sub_layers_minus1; ++i) { + sub_layer_profile_present_flag[i] = reader.ReadBits(1); + sub_layer_level_present_flag[i] = reader.ReadBits(1); + } + if (max_num_sub_layers_minus1 > 0) { + for (int i = max_num_sub_layers_minus1; i < 8; i++) { + reader.ConsumeBits(2); + } + } + for (int i = 0; i < max_num_sub_layers_minus1; i++) { + if (sub_layer_profile_present_flag[i]) { + // sub_layer_profile_space + reader.ConsumeBits(2); + // sub_layer_tier_flag + reader.ConsumeBits(1); + // sub_layer_profile_idc + reader.ConsumeBits(5); + // sub_layer_profile_compatibility_flag + reader.ConsumeBits(32); + // sub_layer_{progressive,interlaced}_source_flag + reader.ConsumeBits(2); + // Ignore sub_layer_non_packed_constraint_flag and + // sub_layer_frame_only_constraint_flag. + reader.ConsumeBits(2); + // Skip the compatibility flags, they are always 43 bits. + reader.ConsumeBits(43); + // sub_layer_inbld_flag + reader.ConsumeBits(1); + } + if (sub_layer_level_present_flag[i]) { + // sub_layer_level_idc + reader.ConsumeBits(8); + } + } + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalProfileTierLevel(pf_tier_level); +} + +absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSpsInternal( + rtc::ArrayView<const uint8_t> buffer) { + BitstreamReader reader(buffer); + + // Now, we need to use a bit buffer to parse through the actual H265 SPS + // format. See Section 7.3.2.2.1 ("General sequence parameter set data + // syntax") of the H.265 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + SpsState sps; + + // sps_video_parameter_set_id: u(4) + uint32_t sps_video_parameter_set_id = 0; + sps_video_parameter_set_id = reader.ReadBits(4); + IN_RANGE_OR_RETURN_NULL(sps_video_parameter_set_id, 0, 15); + + // sps_max_sub_layers_minus1: u(3) + uint32_t sps_max_sub_layers_minus1 = 0; + sps_max_sub_layers_minus1 = reader.ReadBits(3); + IN_RANGE_OR_RETURN_NULL(sps_max_sub_layers_minus1, 0, kMaxSubLayers - 1); + sps.sps_max_sub_layers_minus1 = sps_max_sub_layers_minus1; + // sps_temporal_id_nesting_flag: u(1) + reader.ConsumeBits(1); + // profile_tier_level(1, sps_max_sub_layers_minus1). + OptionalProfileTierLevel profile_tier_level = + ParseProfileTierLevel(true, sps.sps_max_sub_layers_minus1, reader); + if (!profile_tier_level) { + return absl::nullopt; + } + // sps_seq_parameter_set_id: ue(v) + sps.sps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.sps_id, 0, 15); + // chrome_format_idc: ue(v) + sps.chroma_format_idc = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.chroma_format_idc, 0, 3); + if (sps.chroma_format_idc == 3) { + // seperate_colour_plane_flag: u(1) + sps.separate_colour_plane_flag = reader.Read<bool>(); + } + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + // pic_width_in_luma_samples: ue(v) + pic_width_in_luma_samples = reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(pic_width_in_luma_samples != 0); + // pic_height_in_luma_samples: ue(v) + pic_height_in_luma_samples = reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(pic_height_in_luma_samples != 0); + + // Equation A-2: Calculate max_dpb_size. + uint32_t max_luma_ps = GetMaxLumaPs(profile_tier_level->general_level_idc); + uint32_t max_dpb_size = 0; + uint32_t pic_size_in_samples_y = pic_height_in_luma_samples; + pic_size_in_samples_y *= pic_width_in_luma_samples; + size_t max_dpb_pic_buf = + GetDpbMaxPicBuf(profile_tier_level->general_profile_idc); + if (pic_size_in_samples_y <= (max_luma_ps >> 2)) + max_dpb_size = std::min(4 * max_dpb_pic_buf, size_t{16}); + else if (pic_size_in_samples_y <= (max_luma_ps >> 1)) + max_dpb_size = std::min(2 * max_dpb_pic_buf, size_t{16}); + else if (pic_size_in_samples_y <= ((3 * max_luma_ps) >> 2)) + max_dpb_size = std::min((4 * max_dpb_pic_buf) / 3, size_t{16}); + else + max_dpb_size = max_dpb_pic_buf; + + // conformance_window_flag: u(1) + bool conformance_window_flag = reader.Read<bool>(); + + uint32_t conf_win_left_offset = 0; + uint32_t conf_win_right_offset = 0; + uint32_t conf_win_top_offset = 0; + uint32_t conf_win_bottom_offset = 0; + int sub_width_c = + ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) && + (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + int sub_height_c = + (1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) ? 2 + : 1; + if (conformance_window_flag) { + // conf_win_left_offset: ue(v) + conf_win_left_offset = reader.ReadExponentialGolomb(); + // conf_win_right_offset: ue(v) + conf_win_right_offset = reader.ReadExponentialGolomb(); + // conf_win_top_offset: ue(v) + conf_win_top_offset = reader.ReadExponentialGolomb(); + // conf_win_bottom_offset: ue(v) + conf_win_bottom_offset = reader.ReadExponentialGolomb(); + uint32_t width_crop = conf_win_left_offset; + width_crop += conf_win_right_offset; + width_crop *= sub_width_c; + TRUE_OR_RETURN(width_crop < pic_width_in_luma_samples); + uint32_t height_crop = conf_win_top_offset; + height_crop += conf_win_bottom_offset; + height_crop *= sub_height_c; + TRUE_OR_RETURN(height_crop < pic_height_in_luma_samples); + } + + // bit_depth_luma_minus8: ue(v) + sps.bit_depth_luma_minus8 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.bit_depth_luma_minus8, 0, 8); + // bit_depth_chroma_minus8: ue(v) + uint32_t bit_depth_chroma_minus8 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(bit_depth_chroma_minus8, 0, 8); + // log2_max_pic_order_cnt_lsb_minus4: ue(v) + sps.log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.log2_max_pic_order_cnt_lsb_minus4, 0, 12); + uint32_t sps_sub_layer_ordering_info_present_flag = 0; + // sps_sub_layer_ordering_info_present_flag: u(1) + sps_sub_layer_ordering_info_present_flag = reader.Read<bool>(); + uint32_t sps_max_num_reorder_pics[kMaxSubLayers] = {}; + for (uint32_t i = (sps_sub_layer_ordering_info_present_flag != 0) + ? 0 + : sps_max_sub_layers_minus1; + i <= sps_max_sub_layers_minus1; i++) { + // sps_max_dec_pic_buffering_minus1: ue(v) + sps.sps_max_dec_pic_buffering_minus1[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.sps_max_dec_pic_buffering_minus1[i], 0, + max_dpb_size - 1); + // sps_max_num_reorder_pics: ue(v) + sps_max_num_reorder_pics[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps_max_num_reorder_pics[i], 0, + sps.sps_max_dec_pic_buffering_minus1[i]); + if (i > 0) { + TRUE_OR_RETURN(sps.sps_max_dec_pic_buffering_minus1[i] >= + sps.sps_max_dec_pic_buffering_minus1[i - 1]); + TRUE_OR_RETURN(sps_max_num_reorder_pics[i] >= + sps_max_num_reorder_pics[i - 1]); + } + // sps_max_latency_increase_plus1: ue(v) + reader.ReadExponentialGolomb(); + } + if (!sps_sub_layer_ordering_info_present_flag) { + // Fill in the default values for the other sublayers. + for (uint32_t i = 0; i < sps_max_sub_layers_minus1; ++i) { + sps.sps_max_dec_pic_buffering_minus1[i] = + sps.sps_max_dec_pic_buffering_minus1[sps_max_sub_layers_minus1]; + } + } + // log2_min_luma_coding_block_size_minus3: ue(v) + sps.log2_min_luma_coding_block_size_minus3 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.log2_min_luma_coding_block_size_minus3, 0, 27); + // log2_diff_max_min_luma_coding_block_size: ue(v) + sps.log2_diff_max_min_luma_coding_block_size = reader.ReadExponentialGolomb(); + int min_cb_log2_size_y = sps.log2_min_luma_coding_block_size_minus3 + 3; + int ctb_log2_size_y = min_cb_log2_size_y; + ctb_log2_size_y += sps.log2_diff_max_min_luma_coding_block_size; + IN_RANGE_OR_RETURN_NULL(ctb_log2_size_y, 0, 30); + int min_cb_size_y = 1 << min_cb_log2_size_y; + int ctb_size_y = 1 << ctb_log2_size_y; + sps.pic_width_in_ctbs_y = + std::ceil(static_cast<float>(pic_width_in_luma_samples) / ctb_size_y); + sps.pic_height_in_ctbs_y = + std::ceil(static_cast<float>(pic_height_in_luma_samples) / ctb_size_y); + TRUE_OR_RETURN(pic_width_in_luma_samples % min_cb_size_y == 0); + TRUE_OR_RETURN(pic_height_in_luma_samples % min_cb_size_y == 0); + // log2_min_luma_transform_block_size_minus2: ue(v) + int log2_min_luma_transform_block_size_minus2 = + reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(log2_min_luma_transform_block_size_minus2, 0, + min_cb_log2_size_y - 3); + int min_tb_log2_size_y = log2_min_luma_transform_block_size_minus2 + 2; + // log2_diff_max_min_luma_transform_block_size: ue(v) + int log2_diff_max_min_luma_transform_block_size = + reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(log2_diff_max_min_luma_transform_block_size <= + std::min(ctb_log2_size_y, 5) - min_tb_log2_size_y); + // max_transform_hierarchy_depth_inter: ue(v) + int max_transform_hierarchy_depth_inter = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(max_transform_hierarchy_depth_inter, 0, + ctb_log2_size_y - min_tb_log2_size_y); + // max_transform_hierarchy_depth_intra: ue(v) + int max_transform_hierarchy_depth_intra = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(max_transform_hierarchy_depth_intra, 0, + ctb_log2_size_y - min_tb_log2_size_y); + // scaling_list_enabled_flag: u(1) + bool scaling_list_enabled_flag = reader.Read<bool>(); + if (scaling_list_enabled_flag) { + // sps_scaling_list_data_present_flag: u(1) + bool sps_scaling_list_data_present_flag = reader.Read<bool>(); + if (sps_scaling_list_data_present_flag) { + // scaling_list_data() + if (!ParseScalingListData(reader)) { + return absl::nullopt; + } + } + } + + // amp_enabled_flag: u(1) + reader.ConsumeBits(1); + // sample_adaptive_offset_enabled_flag: u(1) + sps.sample_adaptive_offset_enabled_flag = reader.Read<bool>(); + // pcm_enabled_flag: u(1) + bool pcm_enabled_flag = reader.Read<bool>(); + if (pcm_enabled_flag) { + // pcm_sample_bit_depth_luma_minus1: u(4) + reader.ConsumeBits(4); + // pcm_sample_bit_depth_chroma_minus1: u(4) + reader.ConsumeBits(4); + // log2_min_pcm_luma_coding_block_size_minus3: ue(v) + int log2_min_pcm_luma_coding_block_size_minus3 = + reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(log2_min_pcm_luma_coding_block_size_minus3, 0, 2); + int log2_min_ipcm_cb_size_y = + log2_min_pcm_luma_coding_block_size_minus3 + 3; + IN_RANGE_OR_RETURN_NULL(log2_min_ipcm_cb_size_y, + std::min(min_cb_log2_size_y, 5), + std::min(ctb_log2_size_y, 5)); + // log2_diff_max_min_pcm_luma_coding_block_size: ue(v) + int log2_diff_max_min_pcm_luma_coding_block_size = + reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(log2_diff_max_min_pcm_luma_coding_block_size <= + std::min(ctb_log2_size_y, 5) - log2_min_ipcm_cb_size_y); + // pcm_loop_filter_disabled_flag: u(1) + reader.ConsumeBits(1); + } + + // num_short_term_ref_pic_sets: ue(v) + sps.num_short_term_ref_pic_sets = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.num_short_term_ref_pic_sets, 0, + kMaxShortTermRefPicSets); + sps.short_term_ref_pic_set.resize(sps.num_short_term_ref_pic_sets); + for (uint32_t st_rps_idx = 0; st_rps_idx < sps.num_short_term_ref_pic_sets; + st_rps_idx++) { + uint32_t sps_max_dec_pic_buffering_minus1 = + sps.sps_max_dec_pic_buffering_minus1[sps.sps_max_sub_layers_minus1]; + // st_ref_pic_set() + OptionalShortTermRefPicSet ref_pic_set = ParseShortTermRefPicSet( + st_rps_idx, sps.num_short_term_ref_pic_sets, sps.short_term_ref_pic_set, + sps_max_dec_pic_buffering_minus1, reader); + if (ref_pic_set) { + sps.short_term_ref_pic_set[st_rps_idx] = *ref_pic_set; + } else { + return absl::nullopt; + } + } + + // long_term_ref_pics_present_flag: u(1) + sps.long_term_ref_pics_present_flag = reader.Read<bool>(); + if (sps.long_term_ref_pics_present_flag) { + // num_long_term_ref_pics_sps: ue(v) + sps.num_long_term_ref_pics_sps = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.num_long_term_ref_pics_sps, 0, + kMaxLongTermRefPicSets); + sps.used_by_curr_pic_lt_sps_flag.resize(sps.num_long_term_ref_pics_sps, 0); + for (uint32_t i = 0; i < sps.num_long_term_ref_pics_sps; i++) { + // lt_ref_pic_poc_lsb_sps: u(v) + uint32_t lt_ref_pic_poc_lsb_sps_bits = + sps.log2_max_pic_order_cnt_lsb_minus4 + 4; + reader.ConsumeBits(lt_ref_pic_poc_lsb_sps_bits); + // used_by_curr_pic_lt_sps_flag: u(1) + sps.used_by_curr_pic_lt_sps_flag[i] = reader.Read<bool>(); + } + } + + // sps_temporal_mvp_enabled_flag: u(1) + sps.sps_temporal_mvp_enabled_flag = reader.Read<bool>(); + + // Far enough! We don't use the rest of the SPS. + + sps.vps_id = sps_video_parameter_set_id; + + sps.pic_width_in_luma_samples = pic_width_in_luma_samples; + sps.pic_height_in_luma_samples = pic_height_in_luma_samples; + + // Start with the resolution determined by the pic_width/pic_height fields. + sps.width = pic_width_in_luma_samples; + sps.height = pic_height_in_luma_samples; + + if (conformance_window_flag) { + int sub_width_c = + ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) && + (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + int sub_height_c = + (1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + // the offset includes the pixel within conformance window. so don't need to + // +1 as per spec + sps.width -= sub_width_c * (conf_win_right_offset + conf_win_left_offset); + sps.height -= sub_height_c * (conf_win_top_offset + conf_win_bottom_offset); + } + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalSps(sps); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_sps_parser.h b/third_party/libwebrtc/common_video/h265/h265_sps_parser.h new file mode 100644 index 0000000000..854c0f29eb --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_sps_parser.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_SPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_SPS_PARSER_H_ + +#include <vector> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// For explanations of each struct and its members, see H.265 specification +// at http://www.itu.int/rec/T-REC-H.265. +enum { + kMaxLongTermRefPicSets = 32, // 7.4.3.2.1 + kMaxShortTermRefPicSets = 64, // 7.4.3.2.1 + kMaxSubLayers = 7, // 7.4.3.1 & 7.4.3.2.1 [v|s]ps_max_sub_layers_minus1 + 1 +}; + +enum H265ProfileIdc { + kProfileIdcMain = 1, + kProfileIdcMain10 = 2, + kProfileIdcMainStill = 3, + kProfileIdcRangeExtensions = 4, + kProfileIdcHighThroughput = 5, + kProfileIdcMultiviewMain = 6, + kProfileIdcScalableMain = 7, + kProfileIdc3dMain = 8, + kProfileIdcScreenContentCoding = 9, + kProfileIdcScalableRangeExtensions = 10, + kProfileIdcHighThroughputScreenContentCoding = 11, +}; + +// A class for parsing out sequence parameter set (SPS) data from an H265 NALU. +class H265SpsParser { + public: + struct ProfileTierLevel { + ProfileTierLevel(); + // Syntax elements. + int general_profile_idc = 0; + int general_level_idc = 0; // 30x the actual level. + uint32_t general_profile_compatibility_flags = 0; + bool general_progressive_source_flag = false; + bool general_interlaced_source_flag = false; + bool general_non_packed_constraint_flag = false; + bool general_frame_only_constraint_flag = false; + bool general_one_picture_only_constraint_flag = false; + }; + + struct ShortTermRefPicSet { + ShortTermRefPicSet(); + + // Syntax elements. + uint32_t num_negative_pics = 0; + uint32_t num_positive_pics = 0; + uint32_t delta_poc_s0[kMaxShortTermRefPicSets] = {}; + uint32_t used_by_curr_pic_s0[kMaxShortTermRefPicSets] = {}; + uint32_t delta_poc_s1[kMaxShortTermRefPicSets] = {}; + uint32_t used_by_curr_pic_s1[kMaxShortTermRefPicSets] = {}; + + // Calculated fields. + uint32_t num_delta_pocs = 0; + }; + + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState() = default; + + uint32_t sps_max_sub_layers_minus1 = 0; + uint32_t chroma_format_idc = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0; + uint32_t sps_max_dec_pic_buffering_minus1[kMaxSubLayers] = {}; + uint32_t log2_min_luma_coding_block_size_minus3 = 0; + uint32_t log2_diff_max_min_luma_coding_block_size = 0; + uint32_t sample_adaptive_offset_enabled_flag = 0; + uint32_t num_short_term_ref_pic_sets = 0; + std::vector<H265SpsParser::ShortTermRefPicSet> short_term_ref_pic_set; + uint32_t long_term_ref_pics_present_flag = 0; + uint32_t num_long_term_ref_pics_sps = 0; + std::vector<uint32_t> used_by_curr_pic_lt_sps_flag; + uint32_t sps_temporal_mvp_enabled_flag = 0; + uint32_t width = 0; + uint32_t height = 0; + uint32_t sps_id = 0; + uint32_t vps_id = 0; + uint32_t pic_width_in_ctbs_y = 0; + uint32_t pic_height_in_ctbs_y = 0; + uint32_t bit_depth_luma_minus8 = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static absl::optional<SpsState> ParseSps(const uint8_t* data, size_t length); + + static bool ParseScalingListData(BitstreamReader& reader); + + static absl::optional<ShortTermRefPicSet> ParseShortTermRefPicSet( + uint32_t st_rps_idx, + uint32_t num_short_term_ref_pic_sets, + const std::vector<ShortTermRefPicSet>& ref_pic_sets, + uint32_t sps_max_dec_pic_buffering_minus1, + BitstreamReader& reader); + + static absl::optional<H265SpsParser::ProfileTierLevel> ParseProfileTierLevel( + bool profile_present, + int max_num_sub_layers_minus1, + BitstreamReader& reader); + + protected: + // Parse the SPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional<SpsState> ParseSpsInternal( + rtc::ArrayView<const uint8_t> buffer); + static bool ParseProfileTierLevel(BitstreamReader& reader, + uint32_t sps_max_sub_layers_minus1); + + // From Table A.8 - General tier and level limits. + static int GetMaxLumaPs(int general_level_idc); + // From A.4.2 - Profile-specific level limits for the video profiles. + static size_t GetDpbMaxPicBuf(int general_profile_idc); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_SPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_sps_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_sps_parser_unittest.cc new file mode 100644 index 0000000000..26af4b1170 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_sps_parser_unittest.cc @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_sps_parser.h" + +#include "common_video/h265/h265_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +static constexpr size_t kSpsBufferMaxSize = 256; + +// Generates a fake SPS with basically everything empty but the width/height, +// max_num_sublayer_minus1 and num_short_term_ref_pic_sets. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +// num_short_term_ref_pic_sets is set to 11 followed with 11 +// short_term_ref_pic_set data in this fake sps. +void WriteSps(uint16_t width, + uint16_t height, + int id, + uint32_t max_num_sublayer_minus1, + bool sub_layer_ordering_info_present_flag, + bool long_term_ref_pics_present_flag, + rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // sps_video_parameter_set_id + writer.WriteBits(0, 4); + // sps_max_sub_layers_minus1 + writer.WriteBits(max_num_sublayer_minus1, 3); + // sps_temporal_id_nesting_flag + writer.WriteBits(1, 1); + // profile_tier_level(profilePresentFlag=1, maxNumSublayersMinus1=0) + // profile-space=0, tier=0, profile-idc=1 + writer.WriteBits(0, 2); + writer.WriteBits(0, 1); + writer.WriteBits(1, 5); + // general_prfile_compatibility_flag[32] + writer.WriteBits(0, 32); + // general_progressive_source_flag + writer.WriteBits(1, 1); + // general_interlace_source_flag + writer.WriteBits(0, 1); + // general_non_packed_constraint_flag + writer.WriteBits(0, 1); + // general_frame_only_constraint_flag + writer.WriteBits(1, 1); + // general_reserved_zero_7bits + writer.WriteBits(0, 7); + // general_one_picture_only_flag + writer.WriteBits(0, 1); + // general_reserved_zero_35bits + writer.WriteBits(0, 35); + // general_inbld_flag + writer.WriteBits(0, 1); + // general_level_idc + writer.WriteBits(93, 8); + // if max_sub_layers_minus1 >=1, read the sublayer profile information + std::vector<uint32_t> sub_layer_profile_present_flags; + std::vector<uint32_t> sub_layer_level_present_flags; + for (uint32_t i = 0; i < max_num_sublayer_minus1; i++) { + // sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2) + writer.WriteBits(1, 1); + writer.WriteBits(1, 1); + sub_layer_profile_present_flags.push_back(1); + sub_layer_level_present_flags.push_back(1); + } + if (max_num_sublayer_minus1 > 0) { + for (uint32_t j = max_num_sublayer_minus1; j < 8; j++) { + // reserved 2 bits: u(2) + writer.WriteBits(0, 2); + } + } + for (uint32_t k = 0; k < max_num_sublayer_minus1; k++) { + if (sub_layer_profile_present_flags[k]) { // + // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8) + writer.WriteBits(0, 8); + // profile_compatability_flag: u(32) + writer.WriteBits(0, 32); + // sub_layer progressive_source_flag/interlaced_source_flag/ + // non_packed_constraint_flag/frame_only_constraint_flag: u(4) + writer.WriteBits(0, 4); + // following 43-bits are profile_idc specific. We simply read/skip it. + // u(43) + writer.WriteBits(0, 43); + // 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1) + writer.WriteBits(0, 1); + } + if (sub_layer_level_present_flags[k]) { + // sub_layer_level_idc: u(8) + writer.WriteBits(0, 8); + } + } + + // seq_parameter_set_id + writer.WriteExponentialGolomb(id); + // chroma_format_idc + writer.WriteExponentialGolomb(2); + if (width % 8 != 0 || height % 8 != 0) { + int width_delta = 8 - width % 8; + int height_delta = 8 - height % 8; + if (width_delta != 8) { + // pic_width_in_luma_samples + writer.WriteExponentialGolomb(width + width_delta); + } else { + writer.WriteExponentialGolomb(width); + } + if (height_delta != 8) { + // pic_height_in_luma_samples + writer.WriteExponentialGolomb(height + height_delta); + } else { + writer.WriteExponentialGolomb(height); + } + // conformance_window_flag + writer.WriteBits(1, 1); + // conf_win_left_offset + writer.WriteExponentialGolomb((width % 8) / 2); + // conf_win_right_offset + writer.WriteExponentialGolomb(0); + // conf_win_top_offset + writer.WriteExponentialGolomb(height_delta); + // conf_win_bottom_offset + writer.WriteExponentialGolomb(0); + } else { + // pic_width_in_luma_samples + writer.WriteExponentialGolomb(width); + // pic_height_in_luma_samples + writer.WriteExponentialGolomb(height); + // conformance_window_flag + writer.WriteBits(0, 1); + } + // bit_depth_luma_minus8 + writer.WriteExponentialGolomb(0); + // bit_depth_chroma_minus8 + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4 + writer.WriteExponentialGolomb(4); + // sps_sub_layer_ordering_info_present_flag + writer.WriteBits(sub_layer_ordering_info_present_flag, 1); + for (uint32_t i = (sub_layer_ordering_info_present_flag != 0) + ? 0 + : max_num_sublayer_minus1; + i <= max_num_sublayer_minus1; i++) { + // sps_max_dec_pic_buffering_minus1: ue(v) + writer.WriteExponentialGolomb(4); + // sps_max_num_reorder_pics: ue(v) + writer.WriteExponentialGolomb(3); + // sps_max_latency_increase_plus1: ue(v) + writer.WriteExponentialGolomb(0); + } + // log2_min_luma_coding_block_size_minus3 + writer.WriteExponentialGolomb(0); + // log2_diff_max_min_luma_coding_block_size + writer.WriteExponentialGolomb(3); + // log2_min_luma_transform_block_size_minus2 + writer.WriteExponentialGolomb(0); + // log2_diff_max_min_luma_transform_block_size + writer.WriteExponentialGolomb(3); + // max_transform_hierarchy_depth_inter + writer.WriteExponentialGolomb(0); + // max_transform_hierarchy_depth_intra + writer.WriteExponentialGolomb(0); + // scaling_list_enabled_flag + writer.WriteBits(0, 1); + // apm_enabled_flag + writer.WriteBits(0, 1); + // sample_adaptive_offset_enabled_flag + writer.WriteBits(1, 1); + // pcm_enabled_flag + writer.WriteBits(0, 1); + // num_short_term_ref_pic_sets + writer.WriteExponentialGolomb(11); + // short_term_ref_pic_set[0] + // num_negative_pics + writer.WriteExponentialGolomb(4); + // num_positive_pics + writer.WriteExponentialGolomb(0); + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(7); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + for (int i = 0; i < 2; i++) { + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(1); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + } + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(3); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + // short_term_ref_pic_set[1] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(3); + for (int i = 0; i < 2; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + for (int i = 0; i < 2; i++) { + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + // short_term_ref_pic_set[2] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[3] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(0); + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + for (int i = 0; i < 3; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[4] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(1, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + // short_term_ref_pic_set[5] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(1, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(2); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + // short_term_ref_pic_set[6] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(0); + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + for (int i = 0; i < 3; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[7] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(1, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + // short_term_ref_pic_set[8] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(0, 1); + // num_negative_pics + writer.WriteExponentialGolomb(1); + // num_positive_pics + writer.WriteExponentialGolomb(0); + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(7); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + // short_term_ref_pic_set[9] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(3); + for (int i = 0; i < 2; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[10] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 3; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // long_term_ref_pics_present_flag + writer.WriteBits(long_term_ref_pics_present_flag, 1); + if (long_term_ref_pics_present_flag) { + // num_long_term_ref_pics_sps + writer.WriteExponentialGolomb(1); + // lt_ref_pic_poc_lsb_sps + writer.WriteExponentialGolomb(1); + // used_by_curr_pic_lt_sps_flag + writer.WriteBits(1, 8); + } + // sps_temproal_mvp_enabled_flag + writer.WriteBits(1, 1); + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + out_buffer->Clear(); + H265::WriteRbsp(rbsp, byte_count, out_buffer); +} + +class H265SpsParserTest : public ::testing::Test { + public: + H265SpsParserTest() {} + ~H265SpsParserTest() override {} +}; + +TEST_F(H265SpsParserTest, TestSampleSPSHdLandscape) { + // SPS for a 1280x720 camera capture from ffmpeg on linux. Contains + // emulation bytes but no cropping. This buffer is generated + // with following command: + // 1) ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 1280x720 camera.h265 + // + // 2) Open camera.h265 and find the SPS, generally everything between the + // second and third start codes (0 0 0 1 or 0 0 1). The first two bytes should + // be 0x42 and 0x01, which should be stripped out before being passed to the + // parser. + const uint8_t buffer[] = {0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x5d, 0xb0, + 0x02, 0x80, 0x80, 0x2d, 0x16, 0x59, 0x59, 0xa4, + 0x93, 0x2b, 0x80, 0x40, 0x00, 0x00, 0x03, 0x00, + 0x40, 0x00, 0x00, 0x07, 0x82}; + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(1280u, sps->width); + EXPECT_EQ(720u, sps->height); +} + +TEST_F(H265SpsParserTest, TestSampleSPSVerticalCropLandscape) { + // SPS for a 640x260 camera captureH265SpsParser::ParseSps(buffer.data(), + // buffer.size()) from ffmpeg on Linux,. Contains emulation bytes and vertical + // cropping (crop from 640x264). The buffer is generated + // with following command: + // 1) Generate a video, from the camera: + // ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 640x264 camera.h265 + // + // 2) Crop the video to expected size(for example, 640x260 which will crop + // from 640x264): + // ffmpeg -i camera.h265 -filter:v crop=640:260:200:200 -c:v libx265 + // cropped.h265 + // + // 3) Open cropped.h265 and find the SPS, generally everything between the + // second and third start codes (0 0 0 1 or 0 0 1). The first two bytes should + // be 0x42 and 0x01, which should be stripped out before being passed to the + // parser. + const uint8_t buffer[] = {0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3f, 0xb0, + 0x05, 0x02, 0x01, 0x09, 0xf2, 0xe5, 0x95, 0x9a, + 0x49, 0x32, 0xb8, 0x04, 0x00, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x03, 0x00, 0x78, 0x20}; + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(640u, sps->width); + EXPECT_EQ(260u, sps->height); +} + +TEST_F(H265SpsParserTest, TestSampleSPSHorizontalAndVerticalCrop) { + // SPS for a 260x260 camera capture from ffmpeg on Linux. Contains emulation + // bytes. Horizontal and veritcal crop (Crop from 264x264). The buffer is + // generated with following command: + // 1) Generate a video, from the camera: + // ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 264x264 camera.h265 + // + // 2) Crop the video to expected size(for example, 260x260 which will crop + // from 264x264): + // ffmpeg -i camera.h265 -filter:v crop=260:260:200:200 -c:v libx265 + // cropped.h265 + // + // 3) Open cropped.h265 and find the SPS, generally everything between the + // second and third start codes (0 0 0 1 or 0 0 1). The first two bytes should + // be 0x42 and 0x01, which should be stripped out before being passed to the + // parser. + const uint8_t buffer[] = {0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3c, 0xb0, + 0x08, 0x48, 0x04, 0x27, 0x72, 0xe5, 0x95, 0x9a, + 0x49, 0x32, 0xb8, 0x04, 0x00, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x03, 0x00, 0x78, 0x20}; + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(260u, sps->width); + EXPECT_EQ(260u, sps->height); +} + +TEST_F(H265SpsParserTest, TestSyntheticSPSQvgaLandscape) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); +} + +TEST_F(H265SpsParserTest, TestSyntheticSPSWeirdResolution) { + rtc::Buffer buffer; + WriteSps(156u, 122u, 2, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(156u, sps->width); + EXPECT_EQ(122u, sps->height); + EXPECT_EQ(2u, sps->sps_id); +} + +TEST_F(H265SpsParserTest, TestLog2MaxSubLayersMinus1) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); + EXPECT_EQ(0u, sps->sps_max_sub_layers_minus1); + + WriteSps(320u, 180u, 1, 6, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps1 = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps1.has_value()); + EXPECT_EQ(320u, sps1->width); + EXPECT_EQ(180u, sps1->height); + EXPECT_EQ(1u, sps1->sps_id); + EXPECT_EQ(6u, sps1->sps_max_sub_layers_minus1); + + WriteSps(320u, 180u, 1, 7, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> result = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + EXPECT_FALSE(result.has_value()); +} + +TEST_F(H265SpsParserTest, TestSubLayerOrderingInfoPresentFlag) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 6, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); + EXPECT_EQ(6u, sps->sps_max_sub_layers_minus1); + + WriteSps(320u, 180u, 1, 6, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps1 = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps1.has_value()); + EXPECT_EQ(320u, sps1->width); + EXPECT_EQ(180u, sps1->height); + EXPECT_EQ(1u, sps1->sps_id); + EXPECT_EQ(6u, sps1->sps_max_sub_layers_minus1); +} + +TEST_F(H265SpsParserTest, TestLongTermRefPicsPresentFlag) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); + EXPECT_EQ(0u, sps->long_term_ref_pics_present_flag); + + WriteSps(320u, 180u, 1, 6, 1, 1, &buffer); + absl::optional<H265SpsParser::SpsState> sps1 = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps1.has_value()); + EXPECT_EQ(320u, sps1->width); + EXPECT_EQ(180u, sps1->height); + EXPECT_EQ(1u, sps1->sps_id); + EXPECT_EQ(1u, sps1->long_term_ref_pics_present_flag); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_vps_parser.cc b/third_party/libwebrtc/common_video/h265/h265_vps_parser.cc new file mode 100644 index 0000000000..16b967dad4 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_vps_parser.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_vps_parser.h" + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +H265VpsParser::VpsState::VpsState() = default; + +// General note: this is based off the 08/2021 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse VPS state from the supplied buffer. +absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseVps( + const uint8_t* data, + size_t length) { + RTC_DCHECK(data); + return ParseInternal(H265::ParseRbsp(data, length)); +} + +absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseInternal( + rtc::ArrayView<const uint8_t> buffer) { + BitstreamReader reader(buffer); + + // Now, we need to use a bit buffer to parse through the actual H265 VPS + // format. See Section 7.3.2.1 ("Video parameter set RBSP syntax") of the + // H.265 standard for a complete description. + VpsState vps; + + // vps_video_parameter_set_id: u(4) + vps.id = reader.ReadBits(4); + + if (!reader.Ok()) { + return absl::nullopt; + } + + return vps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_vps_parser.h b/third_party/libwebrtc/common_video/h265/h265_vps_parser.h new file mode 100644 index 0000000000..e391d47401 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_vps_parser.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_VPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_VPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace webrtc { + +// A class for parsing out video parameter set (VPS) data from an H265 NALU. +class H265VpsParser { + public: + // The parsed state of the VPS. Only some select values are stored. + // Add more as they are actually needed. + struct VpsState { + VpsState(); + + uint32_t id = 0; + }; + + // Unpack RBSP and parse VPS state from the supplied buffer. + static absl::optional<VpsState> ParseVps(const uint8_t* data, size_t length); + + protected: + // Parse the VPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional<VpsState> ParseInternal( + rtc::ArrayView<const uint8_t> buffer); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_VPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_vps_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_vps_parser_unittest.cc new file mode 100644 index 0000000000..24e8a77154 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_vps_parser_unittest.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_vps_parser.h" + +#include "common_video/h265/h265_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +// Example VPS can be generated with ffmpeg. Here's an example set of commands, +// runnable on Linux: +// 1) Generate a video, from the camera: +// ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 1280x720 camera.h265 +// +// 2) Open camera.h265 and find the VPS, generally everything between the first +// and second start codes (0 0 0 1 or 0 0 1). The first two bytes should be 0x40 +// and 0x01, which should be stripped out before being passed to the parser. + +class H265VpsParserTest : public ::testing::Test { + public: + H265VpsParserTest() {} + ~H265VpsParserTest() override {} + + absl::optional<H265VpsParser::VpsState> vps_; +}; + +TEST_F(H265VpsParserTest, TestSampleVPSId) { + // VPS id 1 + const uint8_t buffer[] = { + 0x1c, 0x01, 0xff, 0xff, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, 0x95, 0x98, 0x09, + }; + EXPECT_TRUE(static_cast<bool>( + vps_ = H265VpsParser::ParseVps(buffer, arraysize(buffer)))); + EXPECT_EQ(1u, vps_->id); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/include/bitrate_adjuster.h b/third_party/libwebrtc/common_video/include/bitrate_adjuster.h new file mode 100644 index 0000000000..4b208307a1 --- /dev/null +++ b/third_party/libwebrtc/common_video/include/bitrate_adjuster.h @@ -0,0 +1,92 @@ +/* + * Copyright 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_INCLUDE_BITRATE_ADJUSTER_H_ +#define COMMON_VIDEO_INCLUDE_BITRATE_ADJUSTER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "rtc_base/rate_statistics.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/system/rtc_export.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +// Certain hardware encoders tend to consistently overshoot the bitrate that +// they are configured to encode at. This class estimates an adjusted bitrate +// that when set on the encoder will produce the desired bitrate. +class RTC_EXPORT BitrateAdjuster { + public: + // min_adjusted_bitrate_pct and max_adjusted_bitrate_pct are the lower and + // upper bound outputted adjusted bitrates as a percentage of the target + // bitrate. + BitrateAdjuster(float min_adjusted_bitrate_pct, + float max_adjusted_bitrate_pct); + virtual ~BitrateAdjuster() {} + + static const uint32_t kBitrateUpdateIntervalMs; + static const uint32_t kBitrateUpdateFrameInterval; + static const float kBitrateTolerancePct; + static const float kBytesPerMsToBitsPerSecond; + + // Sets the desired bitrate in bps (bits per second). + // Should be called at least once before Update. + void SetTargetBitrateBps(uint32_t bitrate_bps); + uint32_t GetTargetBitrateBps() const; + + // Returns the adjusted bitrate in bps. + uint32_t GetAdjustedBitrateBps() const; + + // Returns what we think the current bitrate is. + absl::optional<uint32_t> GetEstimatedBitrateBps(); + + // This should be called after each frame is encoded. The timestamp at which + // it is called is used to estimate the output bitrate of the encoder. + // Should be called from only one thread. + void Update(size_t frame_size); + + private: + // Returns true if the bitrate is within kBitrateTolerancePct of bitrate_bps. + bool IsWithinTolerance(uint32_t bitrate_bps, uint32_t target_bitrate_bps); + + // Returns smallest possible adjusted value. + uint32_t GetMinAdjustedBitrateBps() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + // Returns largest possible adjusted value. + uint32_t GetMaxAdjustedBitrateBps() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + void Reset(); + void UpdateBitrate(uint32_t current_time_ms) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + mutable Mutex mutex_; + const float min_adjusted_bitrate_pct_; + const float max_adjusted_bitrate_pct_; + // The bitrate we want. + volatile uint32_t target_bitrate_bps_ RTC_GUARDED_BY(mutex_); + // The bitrate we use to get what we want. + volatile uint32_t adjusted_bitrate_bps_ RTC_GUARDED_BY(mutex_); + // The target bitrate that the adjusted bitrate was computed from. + volatile uint32_t last_adjusted_target_bitrate_bps_ RTC_GUARDED_BY(mutex_); + // Used to estimate bitrate. + RateStatistics bitrate_tracker_ RTC_GUARDED_BY(mutex_); + // The last time we tried to adjust the bitrate. + uint32_t last_bitrate_update_time_ms_ RTC_GUARDED_BY(mutex_); + // The number of frames since the last time we tried to adjust the bitrate. + uint32_t frames_since_last_update_ RTC_GUARDED_BY(mutex_); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_INCLUDE_BITRATE_ADJUSTER_H_ diff --git a/third_party/libwebrtc/common_video/include/quality_limitation_reason.h b/third_party/libwebrtc/common_video/include/quality_limitation_reason.h new file mode 100644 index 0000000000..068136a4b2 --- /dev/null +++ b/third_party/libwebrtc/common_video/include/quality_limitation_reason.h @@ -0,0 +1,26 @@ +/* + * Copyright 2019 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_INCLUDE_QUALITY_LIMITATION_REASON_H_ +#define COMMON_VIDEO_INCLUDE_QUALITY_LIMITATION_REASON_H_ + +namespace webrtc { + +// https://w3c.github.io/webrtc-stats/#rtcqualitylimitationreason-enum +enum class QualityLimitationReason { + kNone, + kCpu, + kBandwidth, + kOther, +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_INCLUDE_QUALITY_LIMITATION_REASON_H_ diff --git a/third_party/libwebrtc/common_video/include/video_frame_buffer.h b/third_party/libwebrtc/common_video/include/video_frame_buffer.h new file mode 100644 index 0000000000..1f6331b94d --- /dev/null +++ b/third_party/libwebrtc/common_video/include/video_frame_buffer.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_INCLUDE_VIDEO_FRAME_BUFFER_H_ +#define COMMON_VIDEO_INCLUDE_VIDEO_FRAME_BUFFER_H_ + +#include <stdint.h> + +#include <functional> + +#include "api/scoped_refptr.h" +#include "api/video/video_frame_buffer.h" + +namespace webrtc { + +rtc::scoped_refptr<I420BufferInterface> WrapI420Buffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<I422BufferInterface> WrapI422Buffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<I444BufferInterface> WrapI444Buffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<I420ABufferInterface> WrapI420ABuffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + const uint8_t* a_plane, + int a_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<PlanarYuvBuffer> WrapYuvBuffer( + VideoFrameBuffer::Type type, + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<I010BufferInterface> WrapI010Buffer( + int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<I210BufferInterface> WrapI210Buffer( + int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); + +rtc::scoped_refptr<I410BufferInterface> WrapI410Buffer( + int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used); +} // namespace webrtc + +#endif // COMMON_VIDEO_INCLUDE_VIDEO_FRAME_BUFFER_H_ diff --git a/third_party/libwebrtc/common_video/include/video_frame_buffer_pool.h b/third_party/libwebrtc/common_video/include/video_frame_buffer_pool.h new file mode 100644 index 0000000000..3d94bc5669 --- /dev/null +++ b/third_party/libwebrtc/common_video/include/video_frame_buffer_pool.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_INCLUDE_VIDEO_FRAME_BUFFER_POOL_H_ +#define COMMON_VIDEO_INCLUDE_VIDEO_FRAME_BUFFER_POOL_H_ + +#include <stddef.h> + +#include <list> + +#include "api/scoped_refptr.h" +#include "api/video/i010_buffer.h" +#include "api/video/i210_buffer.h" +#include "api/video/i410_buffer.h" +#include "api/video/i420_buffer.h" +#include "api/video/i422_buffer.h" +#include "api/video/i444_buffer.h" +#include "api/video/nv12_buffer.h" +#include "rtc_base/race_checker.h" + +namespace webrtc { + +// Simple buffer pool to avoid unnecessary allocations of video frame buffers. +// The pool manages the memory of the I420Buffer/NV12Buffer returned from +// Create(I420|NV12)Buffer. When the buffer is destructed, the memory is +// returned to the pool for use by subsequent calls to Create(I420|NV12)Buffer. +// If the resolution passed to Create(I420|NV12)Buffer changes or requested +// pixel format changes, old buffers will be purged from the pool. +// Note that Create(I420|NV12)Buffer will crash if more than +// kMaxNumberOfFramesBeforeCrash are created. This is to prevent memory leaks +// where frames are not returned. +class VideoFrameBufferPool { + public: + VideoFrameBufferPool(); + explicit VideoFrameBufferPool(bool zero_initialize); + VideoFrameBufferPool(bool zero_initialize, size_t max_number_of_buffers); + ~VideoFrameBufferPool(); + + // Returns a buffer from the pool. If no suitable buffer exist in the pool + // and there are less than `max_number_of_buffers` pending, a buffer is + // created. Returns null otherwise. + rtc::scoped_refptr<I420Buffer> CreateI420Buffer(int width, int height); + rtc::scoped_refptr<I422Buffer> CreateI422Buffer(int width, int height); + rtc::scoped_refptr<I444Buffer> CreateI444Buffer(int width, int height); + rtc::scoped_refptr<I010Buffer> CreateI010Buffer(int width, int height); + rtc::scoped_refptr<I210Buffer> CreateI210Buffer(int width, int height); + rtc::scoped_refptr<I410Buffer> CreateI410Buffer(int width, int height); + rtc::scoped_refptr<NV12Buffer> CreateNV12Buffer(int width, int height); + + // Changes the max amount of buffers in the pool to the new value. + // Returns true if change was successful and false if the amount of already + // allocated buffers is bigger than new value. + bool Resize(size_t max_number_of_buffers); + + // Clears buffers_ and detaches the thread checker so that it can be reused + // later from another thread. + void Release(); + + private: + rtc::scoped_refptr<VideoFrameBuffer> + GetExistingBuffer(int width, int height, VideoFrameBuffer::Type type); + + rtc::RaceChecker race_checker_; + std::list<rtc::scoped_refptr<VideoFrameBuffer>> buffers_; + // If true, newly allocated buffers are zero-initialized. Note that recycled + // buffers are not zero'd before reuse. This is required of buffers used by + // FFmpeg according to http://crbug.com/390941, which only requires it for the + // initial allocation (as shown by FFmpeg's own buffer allocation code). It + // has to do with "Use-of-uninitialized-value" on "Linux_msan_chrome". + const bool zero_initialize_; + // Max number of buffers this pool can have pending. + size_t max_number_of_buffers_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_INCLUDE_VIDEO_FRAME_BUFFER_POOL_H_ diff --git a/third_party/libwebrtc/common_video/libyuv/include/webrtc_libyuv.h b/third_party/libwebrtc/common_video/libyuv/include/webrtc_libyuv.h new file mode 100644 index 0000000000..253a33294d --- /dev/null +++ b/third_party/libwebrtc/common_video/libyuv/include/webrtc_libyuv.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * WebRTC's wrapper to libyuv. + */ + +#ifndef COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_ +#define COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_ + +#include <stdint.h> +#include <stdio.h> + +#include <vector> + +#include "api/scoped_refptr.h" +#include "api/video/video_frame.h" +#include "api/video/video_frame_buffer.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { + +enum class VideoType { + kUnknown, + kI420, + kIYUV, + kRGB24, + kBGR24, + kARGB, + kABGR, + kARGB4444, + kRGB565, + kARGB1555, + kYUY2, + kYV12, + kUYVY, + kMJPEG, + kNV21, + kBGRA, + kNV12, +}; + +// This is the max PSNR value our algorithms can return. +const double kPerfectPSNR = 48.0f; + +// Calculate the required buffer size. +// Input: +// - type :The type of the designated video frame. +// - width :frame width in pixels. +// - height :frame height in pixels. +// Return value: :The required size in bytes to accommodate the specified +// video frame. +size_t CalcBufferSize(VideoType type, int width, int height); + +// Extract buffer from VideoFrame or I420BufferInterface (consecutive +// planes, no stride) +// Input: +// - frame : Reference to video frame. +// - size : pointer to the size of the allocated buffer. If size is +// insufficient, an error will be returned. +// - buffer : Pointer to buffer +// Return value: length of buffer if OK, < 0 otherwise. +int ExtractBuffer(const rtc::scoped_refptr<I420BufferInterface>& input_frame, + size_t size, + uint8_t* buffer); +int ExtractBuffer(const VideoFrame& input_frame, size_t size, uint8_t* buffer); +// Convert From I420 +// Input: +// - src_frame : Reference to a source frame. +// - dst_video_type : Type of output video. +// - dst_sample_size : Required only for the parsing of MJPG. +// - dst_frame : Pointer to a destination frame. +// Return value: 0 if OK, < 0 otherwise. +// It is assumed that source and destination have equal height. +int ConvertFromI420(const VideoFrame& src_frame, + VideoType dst_video_type, + int dst_sample_size, + uint8_t* dst_frame); + +rtc::scoped_refptr<I420BufferInterface> ScaleVideoFrameBuffer( + const I420BufferInterface& source, + int dst_width, + int dst_height); + +double I420SSE(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer); + +// Compute PSNR for an I420 frame (all planes). +// Returns the PSNR in decibel, to a maximum of kPerfectPSNR. +double I420PSNR(const VideoFrame* ref_frame, const VideoFrame* test_frame); +double I420PSNR(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer); + +// Computes the weighted PSNR-YUV for an I420 buffer. +// +// For the definition and motivation, see +// J. Ohm, G. J. Sullivan, H. Schwarz, T. K. Tan and T. Wiegand, +// "Comparison of the Coding Efficiency of Video Coding Standards—Including +// High Efficiency Video Coding (HEVC)," in IEEE Transactions on Circuits and +// Systems for Video Technology, vol. 22, no. 12, pp. 1669-1684, Dec. 2012 +// doi: 10.1109/TCSVT.2012.2221192. +// +// Returns the PSNR-YUV in decibel, to a maximum of kPerfectPSNR. +double I420WeightedPSNR(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer); + +// Compute SSIM for an I420 frame (all planes). +double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame); +double I420SSIM(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer); + +// Helper function for scaling NV12 to NV12. +// If the `src_width` and `src_height` matches the `dst_width` and `dst_height`, +// then `tmp_buffer` is not used. In other cases, the minimum size of +// `tmp_buffer` should be: +// (src_width/2) * (src_height/2) * 2 + (dst_width/2) * (dst_height/2) * 2 +void NV12Scale(uint8_t* tmp_buffer, + const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height); + +// Helper class for directly converting and scaling NV12 to I420. The Y-plane +// will be scaled directly to the I420 destination, which makes this faster +// than separate NV12->I420 + I420->I420 scaling. +class RTC_EXPORT NV12ToI420Scaler { + public: + NV12ToI420Scaler(); + ~NV12ToI420Scaler(); + void NV12ToI420Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height); + + private: + std::vector<uint8_t> tmp_uv_planes_; +}; + +// Convert VideoType to libyuv FourCC type +int ConvertVideoType(VideoType video_type); + +} // namespace webrtc + +#endif // COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_ diff --git a/third_party/libwebrtc/common_video/libyuv/libyuv_unittest.cc b/third_party/libwebrtc/common_video/libyuv/libyuv_unittest.cc new file mode 100644 index 0000000000..f9c82f6284 --- /dev/null +++ b/third_party/libwebrtc/common_video/libyuv/libyuv_unittest.cc @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "third_party/libyuv/include/libyuv.h" + +#include <math.h> +#include <string.h> + +#include <memory> + +#include "api/video/i420_buffer.h" +#include "api/video/video_frame.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "test/frame_utils.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { +void Calc16ByteAlignedStride(int width, int* stride_y, int* stride_uv) { + *stride_y = 16 * ((width + 15) / 16); + *stride_uv = 16 * ((width + 31) / 32); +} + +int PrintPlane(const uint8_t* buf, + int width, + int height, + int stride, + FILE* file) { + for (int i = 0; i < height; i++, buf += stride) { + if (fwrite(buf, 1, width, file) != static_cast<unsigned int>(width)) + return -1; + } + return 0; +} + +int PrintVideoFrame(const I420BufferInterface& frame, FILE* file) { + int width = frame.width(); + int height = frame.height(); + int chroma_width = frame.ChromaWidth(); + int chroma_height = frame.ChromaHeight(); + + if (PrintPlane(frame.DataY(), width, height, frame.StrideY(), file) < 0) { + return -1; + } + if (PrintPlane(frame.DataU(), chroma_width, chroma_height, frame.StrideU(), + file) < 0) { + return -1; + } + if (PrintPlane(frame.DataV(), chroma_width, chroma_height, frame.StrideV(), + file) < 0) { + return -1; + } + return 0; +} + +} // Anonymous namespace + +class TestLibYuv : public ::testing::Test { + protected: + TestLibYuv(); + void SetUp() override; + void TearDown() override; + + FILE* source_file_; + std::unique_ptr<VideoFrame> orig_frame_; + const int width_; + const int height_; + const int size_y_; + const int size_uv_; + const size_t frame_length_; +}; + +TestLibYuv::TestLibYuv() + : source_file_(NULL), + orig_frame_(), + width_(352), + height_(288), + size_y_(width_ * height_), + size_uv_(((width_ + 1) / 2) * ((height_ + 1) / 2)), + frame_length_(CalcBufferSize(VideoType::kI420, 352, 288)) {} + +void TestLibYuv::SetUp() { + const std::string input_file_name = + webrtc::test::ResourcePath("foreman_cif", "yuv"); + source_file_ = fopen(input_file_name.c_str(), "rb"); + ASSERT_TRUE(source_file_ != NULL) + << "Cannot read file: " << input_file_name << "\n"; + + rtc::scoped_refptr<I420BufferInterface> buffer( + test::ReadI420Buffer(width_, height_, source_file_)); + + orig_frame_ = + std::make_unique<VideoFrame>(VideoFrame::Builder() + .set_video_frame_buffer(buffer) + .set_rotation(webrtc::kVideoRotation_0) + .set_timestamp_us(0) + .build()); +} + +void TestLibYuv::TearDown() { + if (source_file_ != NULL) { + ASSERT_EQ(0, fclose(source_file_)); + } + source_file_ = NULL; +} + +TEST_F(TestLibYuv, ConvertTest) { + // Reading YUV frame - testing on the first frame of the foreman sequence + int j = 0; + std::string output_file_name = + webrtc::test::OutputPath() + "LibYuvTest_conversion.yuv"; + FILE* output_file = fopen(output_file_name.c_str(), "wb"); + ASSERT_TRUE(output_file != NULL); + + double psnr = 0.0; + + rtc::scoped_refptr<I420Buffer> res_i420_buffer = + I420Buffer::Create(width_, height_); + + printf("\nConvert #%d I420 <-> I420 \n", j); + std::unique_ptr<uint8_t[]> out_i420_buffer(new uint8_t[frame_length_]); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kI420, 0, + out_i420_buffer.get())); + int y_size = width_ * height_; + int u_size = res_i420_buffer->ChromaWidth() * res_i420_buffer->ChromaHeight(); + int ret = libyuv::I420Copy( + out_i420_buffer.get(), width_, out_i420_buffer.get() + y_size, + width_ >> 1, out_i420_buffer.get() + y_size + u_size, width_ >> 1, + res_i420_buffer.get()->MutableDataY(), res_i420_buffer.get()->StrideY(), + res_i420_buffer.get()->MutableDataU(), res_i420_buffer.get()->StrideU(), + res_i420_buffer.get()->MutableDataV(), res_i420_buffer.get()->StrideV(), + width_, height_); + EXPECT_EQ(0, ret); + + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + EXPECT_EQ(48.0, psnr); + j++; + + printf("\nConvert #%d I420 <-> RGB24\n", j); + std::unique_ptr<uint8_t[]> res_rgb_buffer2(new uint8_t[width_ * height_ * 3]); + // Align the stride values for the output frame. + int stride_y = 0; + int stride_uv = 0; + Calc16ByteAlignedStride(width_, &stride_y, &stride_uv); + res_i420_buffer = + I420Buffer::Create(width_, height_, stride_y, stride_uv, stride_uv); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kRGB24, 0, + res_rgb_buffer2.get())); + + ret = libyuv::ConvertToI420( + res_rgb_buffer2.get(), 0, res_i420_buffer.get()->MutableDataY(), + res_i420_buffer.get()->StrideY(), res_i420_buffer.get()->MutableDataU(), + res_i420_buffer.get()->StrideU(), res_i420_buffer.get()->MutableDataV(), + res_i420_buffer.get()->StrideV(), 0, 0, width_, height_, + res_i420_buffer->width(), res_i420_buffer->height(), libyuv::kRotate0, + ConvertVideoType(VideoType::kRGB24)); + + EXPECT_EQ(0, ret); + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + + // Optimization Speed- quality trade-off => 45 dB only (platform dependant). + EXPECT_GT(ceil(psnr), 44); + j++; + + printf("\nConvert #%d I420 <-> UYVY\n", j); + std::unique_ptr<uint8_t[]> out_uyvy_buffer(new uint8_t[width_ * height_ * 2]); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kUYVY, 0, + out_uyvy_buffer.get())); + + ret = libyuv::ConvertToI420( + out_uyvy_buffer.get(), 0, res_i420_buffer.get()->MutableDataY(), + res_i420_buffer.get()->StrideY(), res_i420_buffer.get()->MutableDataU(), + res_i420_buffer.get()->StrideU(), res_i420_buffer.get()->MutableDataV(), + res_i420_buffer.get()->StrideV(), 0, 0, width_, height_, + res_i420_buffer->width(), res_i420_buffer->height(), libyuv::kRotate0, + ConvertVideoType(VideoType::kUYVY)); + + EXPECT_EQ(0, ret); + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + EXPECT_EQ(48.0, psnr); + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + j++; + + printf("\nConvert #%d I420 <-> YUY2\n", j); + std::unique_ptr<uint8_t[]> out_yuy2_buffer(new uint8_t[width_ * height_ * 2]); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kYUY2, 0, + out_yuy2_buffer.get())); + + ret = libyuv::ConvertToI420( + out_yuy2_buffer.get(), 0, res_i420_buffer.get()->MutableDataY(), + res_i420_buffer.get()->StrideY(), res_i420_buffer.get()->MutableDataU(), + res_i420_buffer.get()->StrideU(), res_i420_buffer.get()->MutableDataV(), + res_i420_buffer.get()->StrideV(), 0, 0, width_, height_, + res_i420_buffer->width(), res_i420_buffer->height(), libyuv::kRotate0, + ConvertVideoType(VideoType::kYUY2)); + + EXPECT_EQ(0, ret); + + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + EXPECT_EQ(48.0, psnr); + + printf("\nConvert #%d I420 <-> RGB565\n", j); + std::unique_ptr<uint8_t[]> out_rgb565_buffer( + new uint8_t[width_ * height_ * 2]); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kRGB565, 0, + out_rgb565_buffer.get())); + + ret = libyuv::ConvertToI420( + out_rgb565_buffer.get(), 0, res_i420_buffer.get()->MutableDataY(), + res_i420_buffer.get()->StrideY(), res_i420_buffer.get()->MutableDataU(), + res_i420_buffer.get()->StrideU(), res_i420_buffer.get()->MutableDataV(), + res_i420_buffer.get()->StrideV(), 0, 0, width_, height_, + res_i420_buffer->width(), res_i420_buffer->height(), libyuv::kRotate0, + ConvertVideoType(VideoType::kRGB565)); + + EXPECT_EQ(0, ret); + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + j++; + + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + // TODO(leozwang) Investigate the right psnr should be set for I420ToRGB565, + // Another example is I420ToRGB24, the psnr is 44 + // TODO(mikhal): Add psnr for RGB565, 1555, 4444, convert to ARGB. + EXPECT_GT(ceil(psnr), 40); + + printf("\nConvert #%d I420 <-> ARGB8888\n", j); + std::unique_ptr<uint8_t[]> out_argb8888_buffer( + new uint8_t[width_ * height_ * 4]); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kARGB, 0, + out_argb8888_buffer.get())); + + ret = libyuv::ConvertToI420( + out_argb8888_buffer.get(), 0, res_i420_buffer.get()->MutableDataY(), + res_i420_buffer.get()->StrideY(), res_i420_buffer.get()->MutableDataU(), + res_i420_buffer.get()->StrideU(), res_i420_buffer.get()->MutableDataV(), + res_i420_buffer.get()->StrideV(), 0, 0, width_, height_, + res_i420_buffer->width(), res_i420_buffer->height(), libyuv::kRotate0, + ConvertVideoType(VideoType::kARGB)); + + EXPECT_EQ(0, ret); + + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + // TODO(leozwang) Investigate the right psnr should be set for + // I420ToARGB8888, + EXPECT_GT(ceil(psnr), 42); + + ASSERT_EQ(0, fclose(output_file)); +} + +TEST_F(TestLibYuv, ConvertAlignedFrame) { + // Reading YUV frame - testing on the first frame of the foreman sequence + std::string output_file_name = + webrtc::test::OutputPath() + "LibYuvTest_conversion.yuv"; + FILE* output_file = fopen(output_file_name.c_str(), "wb"); + ASSERT_TRUE(output_file != NULL); + + double psnr = 0.0; + + int stride_y = 0; + int stride_uv = 0; + Calc16ByteAlignedStride(width_, &stride_y, &stride_uv); + + rtc::scoped_refptr<I420Buffer> res_i420_buffer = + I420Buffer::Create(width_, height_, stride_y, stride_uv, stride_uv); + std::unique_ptr<uint8_t[]> out_i420_buffer(new uint8_t[frame_length_]); + EXPECT_EQ(0, ConvertFromI420(*orig_frame_, VideoType::kI420, 0, + out_i420_buffer.get())); + int y_size = width_ * height_; + int u_size = res_i420_buffer->ChromaWidth() * res_i420_buffer->ChromaHeight(); + int ret = libyuv::I420Copy( + out_i420_buffer.get(), width_, out_i420_buffer.get() + y_size, + width_ >> 1, out_i420_buffer.get() + y_size + u_size, width_ >> 1, + res_i420_buffer.get()->MutableDataY(), res_i420_buffer.get()->StrideY(), + res_i420_buffer.get()->MutableDataU(), res_i420_buffer.get()->StrideU(), + res_i420_buffer.get()->MutableDataV(), res_i420_buffer.get()->StrideV(), + width_, height_); + + EXPECT_EQ(0, ret); + + if (PrintVideoFrame(*res_i420_buffer, output_file) < 0) { + return; + } + psnr = + I420PSNR(*orig_frame_->video_frame_buffer()->GetI420(), *res_i420_buffer); + EXPECT_EQ(48.0, psnr); +} + +static uint8_t Average(int a, int b, int c, int d) { + return (a + b + c + d + 2) / 4; +} + +TEST_F(TestLibYuv, NV12Scale2x2to2x2) { + const std::vector<uint8_t> src_y = {0, 1, 2, 3}; + const std::vector<uint8_t> src_uv = {0, 1}; + std::vector<uint8_t> dst_y(4); + std::vector<uint8_t> dst_uv(2); + + uint8_t* tmp_buffer = nullptr; + + NV12Scale(tmp_buffer, src_y.data(), 2, src_uv.data(), 2, 2, 2, dst_y.data(), + 2, dst_uv.data(), 2, 2, 2); + + EXPECT_THAT(dst_y, ::testing::ContainerEq(src_y)); + EXPECT_THAT(dst_uv, ::testing::ContainerEq(src_uv)); +} + +TEST_F(TestLibYuv, NV12Scale4x4to2x2) { + const uint8_t src_y[] = {0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + const uint8_t src_uv[] = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector<uint8_t> dst_y(4); + std::vector<uint8_t> dst_uv(2); + + std::vector<uint8_t> tmp_buffer; + const int src_chroma_width = (4 + 1) / 2; + const int src_chroma_height = (4 + 1) / 2; + const int dst_chroma_width = (2 + 1) / 2; + const int dst_chroma_height = (2 + 1) / 2; + tmp_buffer.resize(src_chroma_width * src_chroma_height * 2 + + dst_chroma_width * dst_chroma_height * 2); + tmp_buffer.shrink_to_fit(); + + NV12Scale(tmp_buffer.data(), src_y, 4, src_uv, 4, 4, 4, dst_y.data(), 2, + dst_uv.data(), 2, 2, 2); + + EXPECT_THAT(dst_y, ::testing::ElementsAre( + Average(0, 1, 4, 5), Average(2, 3, 6, 7), + Average(8, 9, 12, 13), Average(10, 11, 14, 15))); + EXPECT_THAT(dst_uv, + ::testing::ElementsAre(Average(0, 2, 4, 6), Average(1, 3, 5, 7))); +} + +TEST(I420WeightedPSNRTest, SmokeTest) { + uint8_t ref_y[] = {0, 0, 0, 0}; + uint8_t ref_uv[] = {0}; + rtc::scoped_refptr<I420Buffer> ref_buffer = + I420Buffer::Copy(/*width=*/2, /*height=*/2, ref_y, /*stride_y=*/2, ref_uv, + /*stride_u=*/1, ref_uv, /*stride_v=*/1); + + uint8_t test_y[] = {1, 1, 1, 1}; + uint8_t test_uv[] = {2}; + rtc::scoped_refptr<I420Buffer> test_buffer = I420Buffer::Copy( + /*width=*/2, /*height=*/2, test_y, /*stride_y=*/2, test_uv, + /*stride_u=*/1, test_uv, /*stride_v=*/1); + + auto psnr = [](double mse) { return 10.0 * log10(255.0 * 255.0 / mse); }; + EXPECT_NEAR(I420WeightedPSNR(*ref_buffer, *test_buffer), + (6.0 * psnr(1.0) + psnr(4.0) + psnr(4.0)) / 8.0, + /*abs_error=*/0.001); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/libyuv/webrtc_libyuv.cc b/third_party/libwebrtc/common_video/libyuv/webrtc_libyuv.cc new file mode 100644 index 0000000000..05a4b184c2 --- /dev/null +++ b/third_party/libwebrtc/common_video/libyuv/webrtc_libyuv.cc @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/libyuv/include/webrtc_libyuv.h" + +#include <cstdint> + +#include "api/video/i420_buffer.h" +#include "common_video/include/video_frame_buffer.h" +#include "rtc_base/checks.h" +#include "third_party/libyuv/include/libyuv.h" + +namespace webrtc { + +size_t CalcBufferSize(VideoType type, int width, int height) { + RTC_DCHECK_GE(width, 0); + RTC_DCHECK_GE(height, 0); + switch (type) { + case VideoType::kI420: + case VideoType::kNV21: + case VideoType::kIYUV: + case VideoType::kYV12: + case VideoType::kNV12: { + int half_width = (width + 1) >> 1; + int half_height = (height + 1) >> 1; + return width * height + half_width * half_height * 2; + } + case VideoType::kARGB4444: + case VideoType::kRGB565: + case VideoType::kARGB1555: + case VideoType::kYUY2: + case VideoType::kUYVY: + return width * height * 2; + case VideoType::kRGB24: + case VideoType::kBGR24: + return width * height * 3; + case VideoType::kBGRA: + case VideoType::kARGB: + case VideoType::kABGR: + return width * height * 4; + case VideoType::kMJPEG: + case VideoType::kUnknown: + break; + } + RTC_DCHECK_NOTREACHED() << "Unexpected pixel format " << type; + return 0; +} + +int ExtractBuffer(const rtc::scoped_refptr<I420BufferInterface>& input_frame, + size_t size, + uint8_t* buffer) { + RTC_DCHECK(buffer); + if (!input_frame) + return -1; + int width = input_frame->width(); + int height = input_frame->height(); + size_t length = CalcBufferSize(VideoType::kI420, width, height); + if (size < length) { + return -1; + } + + int chroma_width = input_frame->ChromaWidth(); + int chroma_height = input_frame->ChromaHeight(); + + libyuv::I420Copy(input_frame->DataY(), input_frame->StrideY(), + input_frame->DataU(), input_frame->StrideU(), + input_frame->DataV(), input_frame->StrideV(), buffer, width, + buffer + width * height, chroma_width, + buffer + width * height + chroma_width * chroma_height, + chroma_width, width, height); + + return static_cast<int>(length); +} + +int ExtractBuffer(const VideoFrame& input_frame, size_t size, uint8_t* buffer) { + return ExtractBuffer(input_frame.video_frame_buffer()->ToI420(), size, + buffer); +} + +int ConvertVideoType(VideoType video_type) { + switch (video_type) { + case VideoType::kUnknown: + return libyuv::FOURCC_ANY; + case VideoType::kI420: + return libyuv::FOURCC_I420; + case VideoType::kIYUV: // same as VideoType::kYV12 + case VideoType::kYV12: + return libyuv::FOURCC_YV12; + case VideoType::kRGB24: + return libyuv::FOURCC_24BG; + case VideoType::kBGR24: + return libyuv::FOURCC_RAW; + case VideoType::kABGR: + return libyuv::FOURCC_ABGR; + case VideoType::kRGB565: + return libyuv::FOURCC_RGBP; + case VideoType::kYUY2: + return libyuv::FOURCC_YUY2; + case VideoType::kUYVY: + return libyuv::FOURCC_UYVY; + case VideoType::kMJPEG: + return libyuv::FOURCC_MJPG; + case VideoType::kNV21: + return libyuv::FOURCC_NV21; + case VideoType::kARGB: + return libyuv::FOURCC_ARGB; + case VideoType::kBGRA: + return libyuv::FOURCC_BGRA; + case VideoType::kARGB4444: + return libyuv::FOURCC_R444; + case VideoType::kARGB1555: + return libyuv::FOURCC_RGBO; + case VideoType::kNV12: + return libyuv::FOURCC_NV12; + } + RTC_DCHECK_NOTREACHED() << "Unexpected pixel format " << video_type; + return libyuv::FOURCC_ANY; +} + +int ConvertFromI420(const VideoFrame& src_frame, + VideoType dst_video_type, + int dst_sample_size, + uint8_t* dst_frame) { + rtc::scoped_refptr<I420BufferInterface> i420_buffer = + src_frame.video_frame_buffer()->ToI420(); + return libyuv::ConvertFromI420( + i420_buffer->DataY(), i420_buffer->StrideY(), i420_buffer->DataU(), + i420_buffer->StrideU(), i420_buffer->DataV(), i420_buffer->StrideV(), + dst_frame, dst_sample_size, src_frame.width(), src_frame.height(), + ConvertVideoType(dst_video_type)); +} + +rtc::scoped_refptr<I420ABufferInterface> ScaleI420ABuffer( + const I420ABufferInterface& buffer, + int target_width, + int target_height) { + rtc::scoped_refptr<I420Buffer> yuv_buffer = + I420Buffer::Create(target_width, target_height); + yuv_buffer->ScaleFrom(buffer); + rtc::scoped_refptr<I420Buffer> axx_buffer = + I420Buffer::Create(target_width, target_height); + libyuv::ScalePlane(buffer.DataA(), buffer.StrideA(), buffer.width(), + buffer.height(), axx_buffer->MutableDataY(), + axx_buffer->StrideY(), target_width, target_height, + libyuv::kFilterBox); + rtc::scoped_refptr<I420ABufferInterface> merged_buffer = WrapI420ABuffer( + yuv_buffer->width(), yuv_buffer->height(), yuv_buffer->DataY(), + yuv_buffer->StrideY(), yuv_buffer->DataU(), yuv_buffer->StrideU(), + yuv_buffer->DataV(), yuv_buffer->StrideV(), axx_buffer->DataY(), + axx_buffer->StrideY(), + // To keep references alive. + [yuv_buffer, axx_buffer] {}); + return merged_buffer; +} + +rtc::scoped_refptr<I420BufferInterface> ScaleVideoFrameBuffer( + const I420BufferInterface& source, + int dst_width, + int dst_height) { + rtc::scoped_refptr<I420Buffer> scaled_buffer = + I420Buffer::Create(dst_width, dst_height); + scaled_buffer->ScaleFrom(source); + return scaled_buffer; +} + +double I420SSE(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer) { + RTC_DCHECK_EQ(ref_buffer.width(), test_buffer.width()); + RTC_DCHECK_EQ(ref_buffer.height(), test_buffer.height()); + const uint64_t width = test_buffer.width(); + const uint64_t height = test_buffer.height(); + const uint64_t sse_y = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataY(), ref_buffer.StrideY(), test_buffer.DataY(), + test_buffer.StrideY(), width, height); + const int width_uv = (width + 1) >> 1; + const int height_uv = (height + 1) >> 1; + const uint64_t sse_u = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataU(), ref_buffer.StrideU(), test_buffer.DataU(), + test_buffer.StrideU(), width_uv, height_uv); + const uint64_t sse_v = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataV(), ref_buffer.StrideV(), test_buffer.DataV(), + test_buffer.StrideV(), width_uv, height_uv); + const double samples = width * height + 2 * (width_uv * height_uv); + const double sse = sse_y + sse_u + sse_v; + return sse / (samples * 255.0 * 255.0); +} + +// Compute PSNR for an I420A frame (all planes). Can upscale test frame. +double I420APSNR(const I420ABufferInterface& ref_buffer, + const I420ABufferInterface& test_buffer) { + RTC_DCHECK_GE(ref_buffer.width(), test_buffer.width()); + RTC_DCHECK_GE(ref_buffer.height(), test_buffer.height()); + if ((ref_buffer.width() != test_buffer.width()) || + (ref_buffer.height() != test_buffer.height())) { + rtc::scoped_refptr<I420ABufferInterface> scaled_buffer = + ScaleI420ABuffer(test_buffer, ref_buffer.width(), ref_buffer.height()); + return I420APSNR(ref_buffer, *scaled_buffer); + } + const int width = test_buffer.width(); + const int height = test_buffer.height(); + const uint64_t sse_y = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataY(), ref_buffer.StrideY(), test_buffer.DataY(), + test_buffer.StrideY(), width, height); + const int width_uv = (width + 1) >> 1; + const int height_uv = (height + 1) >> 1; + const uint64_t sse_u = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataU(), ref_buffer.StrideU(), test_buffer.DataU(), + test_buffer.StrideU(), width_uv, height_uv); + const uint64_t sse_v = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataV(), ref_buffer.StrideV(), test_buffer.DataV(), + test_buffer.StrideV(), width_uv, height_uv); + const uint64_t sse_a = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataA(), ref_buffer.StrideA(), test_buffer.DataA(), + test_buffer.StrideA(), width, height); + const uint64_t samples = 2 * (uint64_t)width * (uint64_t)height + + 2 * ((uint64_t)width_uv * (uint64_t)height_uv); + const uint64_t sse = sse_y + sse_u + sse_v + sse_a; + const double psnr = libyuv::SumSquareErrorToPsnr(sse, samples); + return (psnr > kPerfectPSNR) ? kPerfectPSNR : psnr; +} + +// Compute PSNR for an I420A frame (all planes) +double I420APSNR(const VideoFrame* ref_frame, const VideoFrame* test_frame) { + if (!ref_frame || !test_frame) + return -1; + RTC_DCHECK(ref_frame->video_frame_buffer()->type() == + VideoFrameBuffer::Type::kI420A); + RTC_DCHECK(test_frame->video_frame_buffer()->type() == + VideoFrameBuffer::Type::kI420A); + return I420APSNR(*ref_frame->video_frame_buffer()->GetI420A(), + *test_frame->video_frame_buffer()->GetI420A()); +} + +// Compute PSNR for an I420 frame (all planes). Can upscale test frame. +double I420PSNR(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer) { + RTC_DCHECK_GE(ref_buffer.width(), test_buffer.width()); + RTC_DCHECK_GE(ref_buffer.height(), test_buffer.height()); + if ((ref_buffer.width() != test_buffer.width()) || + (ref_buffer.height() != test_buffer.height())) { + rtc::scoped_refptr<I420Buffer> scaled_buffer = + I420Buffer::Create(ref_buffer.width(), ref_buffer.height()); + scaled_buffer->ScaleFrom(test_buffer); + return I420PSNR(ref_buffer, *scaled_buffer); + } + double psnr = libyuv::I420Psnr( + ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(), + ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(), + test_buffer.DataY(), test_buffer.StrideY(), test_buffer.DataU(), + test_buffer.StrideU(), test_buffer.DataV(), test_buffer.StrideV(), + test_buffer.width(), test_buffer.height()); + // LibYuv sets the max psnr value to 128, we restrict it here. + // In case of 0 mse in one frame, 128 can skew the results significantly. + return (psnr > kPerfectPSNR) ? kPerfectPSNR : psnr; +} + +// Compute PSNR for an I420 frame (all planes) +double I420PSNR(const VideoFrame* ref_frame, const VideoFrame* test_frame) { + if (!ref_frame || !test_frame) + return -1; + return I420PSNR(*ref_frame->video_frame_buffer()->ToI420(), + *test_frame->video_frame_buffer()->ToI420()); +} + +double I420WeightedPSNR(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer) { + RTC_DCHECK_GE(ref_buffer.width(), test_buffer.width()); + RTC_DCHECK_GE(ref_buffer.height(), test_buffer.height()); + if ((ref_buffer.width() != test_buffer.width()) || + (ref_buffer.height() != test_buffer.height())) { + rtc::scoped_refptr<I420Buffer> scaled_ref_buffer = + I420Buffer::Create(test_buffer.width(), test_buffer.height()); + scaled_ref_buffer->ScaleFrom(ref_buffer); + return I420WeightedPSNR(*scaled_ref_buffer, test_buffer); + } + + // Luma. + int width_y = test_buffer.width(); + int height_y = test_buffer.height(); + uint64_t sse_y = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataY(), ref_buffer.StrideY(), test_buffer.DataY(), + test_buffer.StrideY(), width_y, height_y); + uint64_t num_samples_y = (uint64_t)width_y * (uint64_t)height_y; + double psnr_y = libyuv::SumSquareErrorToPsnr(sse_y, num_samples_y); + + // Chroma. + int width_uv = (width_y + 1) >> 1; + int height_uv = (height_y + 1) >> 1; + uint64_t sse_u = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataU(), ref_buffer.StrideU(), test_buffer.DataU(), + test_buffer.StrideU(), width_uv, height_uv); + uint64_t num_samples_uv = (uint64_t)width_uv * (uint64_t)height_uv; + double psnr_u = libyuv::SumSquareErrorToPsnr(sse_u, num_samples_uv); + uint64_t sse_v = libyuv::ComputeSumSquareErrorPlane( + ref_buffer.DataV(), ref_buffer.StrideV(), test_buffer.DataV(), + test_buffer.StrideV(), width_uv, height_uv); + double psnr_v = libyuv::SumSquareErrorToPsnr(sse_v, num_samples_uv); + + // Weights from Ohm et. al 2012. + double psnr_yuv = (6.0 * psnr_y + psnr_u + psnr_v) / 8.0; + return (psnr_yuv > kPerfectPSNR) ? kPerfectPSNR : psnr_yuv; +} + +// Compute SSIM for an I420A frame (all planes). Can upscale test frame. +double I420ASSIM(const I420ABufferInterface& ref_buffer, + const I420ABufferInterface& test_buffer) { + RTC_DCHECK_GE(ref_buffer.width(), test_buffer.width()); + RTC_DCHECK_GE(ref_buffer.height(), test_buffer.height()); + if ((ref_buffer.width() != test_buffer.width()) || + (ref_buffer.height() != test_buffer.height())) { + rtc::scoped_refptr<I420ABufferInterface> scaled_buffer = + ScaleI420ABuffer(test_buffer, ref_buffer.width(), ref_buffer.height()); + return I420ASSIM(ref_buffer, *scaled_buffer); + } + const double yuv_ssim = libyuv::I420Ssim( + ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(), + ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(), + test_buffer.DataY(), test_buffer.StrideY(), test_buffer.DataU(), + test_buffer.StrideU(), test_buffer.DataV(), test_buffer.StrideV(), + test_buffer.width(), test_buffer.height()); + const double a_ssim = libyuv::CalcFrameSsim( + ref_buffer.DataA(), ref_buffer.StrideA(), test_buffer.DataA(), + test_buffer.StrideA(), test_buffer.width(), test_buffer.height()); + return (yuv_ssim + (a_ssim * 0.8)) / 1.8; +} + +// Compute SSIM for an I420A frame (all planes) +double I420ASSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame) { + if (!ref_frame || !test_frame) + return -1; + RTC_DCHECK(ref_frame->video_frame_buffer()->type() == + VideoFrameBuffer::Type::kI420A); + RTC_DCHECK(test_frame->video_frame_buffer()->type() == + VideoFrameBuffer::Type::kI420A); + return I420ASSIM(*ref_frame->video_frame_buffer()->GetI420A(), + *test_frame->video_frame_buffer()->GetI420A()); +} + +// Compute SSIM for an I420 frame (all planes). Can upscale test_buffer. +double I420SSIM(const I420BufferInterface& ref_buffer, + const I420BufferInterface& test_buffer) { + RTC_DCHECK_GE(ref_buffer.width(), test_buffer.width()); + RTC_DCHECK_GE(ref_buffer.height(), test_buffer.height()); + if ((ref_buffer.width() != test_buffer.width()) || + (ref_buffer.height() != test_buffer.height())) { + rtc::scoped_refptr<I420Buffer> scaled_buffer = + I420Buffer::Create(ref_buffer.width(), ref_buffer.height()); + scaled_buffer->ScaleFrom(test_buffer); + return I420SSIM(ref_buffer, *scaled_buffer); + } + return libyuv::I420Ssim( + ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(), + ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(), + test_buffer.DataY(), test_buffer.StrideY(), test_buffer.DataU(), + test_buffer.StrideU(), test_buffer.DataV(), test_buffer.StrideV(), + test_buffer.width(), test_buffer.height()); +} + +double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame) { + if (!ref_frame || !test_frame) + return -1; + return I420SSIM(*ref_frame->video_frame_buffer()->ToI420(), + *test_frame->video_frame_buffer()->ToI420()); +} + +void NV12Scale(uint8_t* tmp_buffer, + const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height) { + const int src_chroma_width = (src_width + 1) / 2; + const int src_chroma_height = (src_height + 1) / 2; + + if (src_width == dst_width && src_height == dst_height) { + // No scaling. + libyuv::CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, src_width, + src_height); + libyuv::CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, + src_chroma_width * 2, src_chroma_height); + return; + } + + // Scaling. + // Allocate temporary memory for spitting UV planes and scaling them. + const int dst_chroma_width = (dst_width + 1) / 2; + const int dst_chroma_height = (dst_height + 1) / 2; + + uint8_t* const src_u = tmp_buffer; + uint8_t* const src_v = src_u + src_chroma_width * src_chroma_height; + uint8_t* const dst_u = src_v + src_chroma_width * src_chroma_height; + uint8_t* const dst_v = dst_u + dst_chroma_width * dst_chroma_height; + + // Split source UV plane into separate U and V plane using the temporary data. + libyuv::SplitUVPlane(src_uv, src_stride_uv, src_u, src_chroma_width, src_v, + src_chroma_width, src_chroma_width, src_chroma_height); + + // Scale the planes. + libyuv::I420Scale( + src_y, src_stride_y, src_u, src_chroma_width, src_v, src_chroma_width, + src_width, src_height, dst_y, dst_stride_y, dst_u, dst_chroma_width, + dst_v, dst_chroma_width, dst_width, dst_height, libyuv::kFilterBox); + + // Merge the UV planes into the destination. + libyuv::MergeUVPlane(dst_u, dst_chroma_width, dst_v, dst_chroma_width, dst_uv, + dst_stride_uv, dst_chroma_width, dst_chroma_height); +} + +NV12ToI420Scaler::NV12ToI420Scaler() = default; +NV12ToI420Scaler::~NV12ToI420Scaler() = default; + +void NV12ToI420Scaler::NV12ToI420Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height) { + if (src_width == dst_width && src_height == dst_height) { + // No scaling. + tmp_uv_planes_.clear(); + tmp_uv_planes_.shrink_to_fit(); + libyuv::NV12ToI420(src_y, src_stride_y, src_uv, src_stride_uv, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, + src_width, src_height); + return; + } + + // Scaling. + // Allocate temporary memory for spitting UV planes. + const int src_uv_width = (src_width + 1) / 2; + const int src_uv_height = (src_height + 1) / 2; + tmp_uv_planes_.resize(src_uv_width * src_uv_height * 2); + tmp_uv_planes_.shrink_to_fit(); + + // Split source UV plane into separate U and V plane using the temporary data. + uint8_t* const src_u = tmp_uv_planes_.data(); + uint8_t* const src_v = tmp_uv_planes_.data() + src_uv_width * src_uv_height; + libyuv::SplitUVPlane(src_uv, src_stride_uv, src_u, src_uv_width, src_v, + src_uv_width, src_uv_width, src_uv_height); + + // Scale the planes into the destination. + libyuv::I420Scale(src_y, src_stride_y, src_u, src_uv_width, src_v, + src_uv_width, src_width, src_height, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width, + dst_height, libyuv::kFilterBox); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/test/BUILD.gn b/third_party/libwebrtc/common_video/test/BUILD.gn new file mode 100644 index 0000000000..10ebbaaae3 --- /dev/null +++ b/third_party/libwebrtc/common_video/test/BUILD.gn @@ -0,0 +1,24 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +if (rtc_include_tests) { + rtc_library("utilities") { + testonly = true + sources = [ + "utilities.cc", + "utilities.h", + ] + deps = [ + "../../api:rtp_packet_info", + "../../api/video:video_frame", + "../../api/video:video_rtp_headers", + ] + } +} diff --git a/third_party/libwebrtc/common_video/test/utilities.cc b/third_party/libwebrtc/common_video/test/utilities.cc new file mode 100644 index 0000000000..c2a3266dc1 --- /dev/null +++ b/third_party/libwebrtc/common_video/test/utilities.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/test/utilities.h" + +#include <utility> + +namespace webrtc { + +HdrMetadata CreateTestHdrMetadata() { + // Random but reasonable (in the sense of a valid range) HDR metadata. + HdrMetadata hdr_metadata; + hdr_metadata.mastering_metadata.luminance_max = 2000.0; + hdr_metadata.mastering_metadata.luminance_min = 2.0001; + hdr_metadata.mastering_metadata.primary_r.x = 0.3003; + hdr_metadata.mastering_metadata.primary_r.y = 0.4004; + hdr_metadata.mastering_metadata.primary_g.x = 0.3201; + hdr_metadata.mastering_metadata.primary_g.y = 0.4604; + hdr_metadata.mastering_metadata.primary_b.x = 0.3409; + hdr_metadata.mastering_metadata.primary_b.y = 0.4907; + hdr_metadata.mastering_metadata.white_point.x = 0.4103; + hdr_metadata.mastering_metadata.white_point.y = 0.4806; + hdr_metadata.max_content_light_level = 2345; + hdr_metadata.max_frame_average_light_level = 1789; + return hdr_metadata; +} + +ColorSpace CreateTestColorSpace(bool with_hdr_metadata) { + HdrMetadata hdr_metadata = CreateTestHdrMetadata(); + return ColorSpace( + ColorSpace::PrimaryID::kBT709, ColorSpace::TransferID::kGAMMA22, + ColorSpace::MatrixID::kSMPTE2085, ColorSpace::RangeID::kFull, + ColorSpace::ChromaSiting::kCollocated, + ColorSpace::ChromaSiting::kCollocated, + with_hdr_metadata ? &hdr_metadata : nullptr); +} + +RtpPacketInfos CreatePacketInfos(size_t count) { + return RtpPacketInfos(RtpPacketInfos::vector_type(count)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/test/utilities.h b/third_party/libwebrtc/common_video/test/utilities.h new file mode 100644 index 0000000000..7e15cf947c --- /dev/null +++ b/third_party/libwebrtc/common_video/test/utilities.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_TEST_UTILITIES_H_ +#define COMMON_VIDEO_TEST_UTILITIES_H_ + +#include <initializer_list> + +#include "api/rtp_packet_infos.h" +#include "api/video/color_space.h" + +namespace webrtc { + +HdrMetadata CreateTestHdrMetadata(); +ColorSpace CreateTestColorSpace(bool with_hdr_metadata); +RtpPacketInfos CreatePacketInfos(size_t count); + +} // namespace webrtc +#endif // COMMON_VIDEO_TEST_UTILITIES_H_ diff --git a/third_party/libwebrtc/common_video/video_frame_buffer.cc b/third_party/libwebrtc/common_video/video_frame_buffer.cc new file mode 100644 index 0000000000..ca2916e580 --- /dev/null +++ b/third_party/libwebrtc/common_video/video_frame_buffer.cc @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/include/video_frame_buffer.h" + +#include "api/make_ref_counted.h" +#include "api/video/i420_buffer.h" +#include "rtc_base/checks.h" +#include "third_party/libyuv/include/libyuv/convert.h" + +namespace webrtc { + +namespace { + +// Template to implement a wrapped buffer for a I4??BufferInterface. +template <typename Base> +class WrappedYuvBuffer : public Base { + public: + WrappedYuvBuffer(int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) + : width_(width), + height_(height), + y_plane_(y_plane), + u_plane_(u_plane), + v_plane_(v_plane), + y_stride_(y_stride), + u_stride_(u_stride), + v_stride_(v_stride), + no_longer_used_cb_(no_longer_used) {} + + ~WrappedYuvBuffer() override { no_longer_used_cb_(); } + + int width() const override { return width_; } + + int height() const override { return height_; } + + const uint8_t* DataY() const override { return y_plane_; } + + const uint8_t* DataU() const override { return u_plane_; } + + const uint8_t* DataV() const override { return v_plane_; } + + int StrideY() const override { return y_stride_; } + + int StrideU() const override { return u_stride_; } + + int StrideV() const override { return v_stride_; } + + private: + friend class rtc::RefCountedObject<WrappedYuvBuffer>; + + const int width_; + const int height_; + const uint8_t* const y_plane_; + const uint8_t* const u_plane_; + const uint8_t* const v_plane_; + const int y_stride_; + const int u_stride_; + const int v_stride_; + std::function<void()> no_longer_used_cb_; +}; + +// Template to implement a wrapped buffer for a I4??BufferInterface. +template <typename BaseWithA> +class WrappedYuvaBuffer : public WrappedYuvBuffer<BaseWithA> { + public: + WrappedYuvaBuffer(int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + const uint8_t* a_plane, + int a_stride, + std::function<void()> no_longer_used) + : WrappedYuvBuffer<BaseWithA>(width, + height, + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + no_longer_used), + a_plane_(a_plane), + a_stride_(a_stride) {} + + const uint8_t* DataA() const override { return a_plane_; } + int StrideA() const override { return a_stride_; } + + private: + const uint8_t* const a_plane_; + const int a_stride_; +}; + +class I444BufferBase : public I444BufferInterface { + public: + rtc::scoped_refptr<I420BufferInterface> ToI420() final; +}; + +rtc::scoped_refptr<I420BufferInterface> I444BufferBase::ToI420() { + rtc::scoped_refptr<I420Buffer> i420_buffer = + I420Buffer::Create(width(), height()); + libyuv::I444ToI420(DataY(), StrideY(), DataU(), StrideU(), DataV(), StrideV(), + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + width(), height()); + return i420_buffer; +} + +class I422BufferBase : public I422BufferInterface { + public: + rtc::scoped_refptr<I420BufferInterface> ToI420() final; +}; + +rtc::scoped_refptr<I420BufferInterface> I422BufferBase::ToI420() { + rtc::scoped_refptr<I420Buffer> i420_buffer = + I420Buffer::Create(width(), height()); + libyuv::I422ToI420(DataY(), StrideY(), DataU(), StrideU(), DataV(), StrideV(), + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + width(), height()); + return i420_buffer; +} + +// Template to implement a wrapped buffer for a PlanarYuv16BBuffer. +template <typename Base> +class WrappedYuv16BBuffer : public Base { + public: + WrappedYuv16BBuffer(int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) + : width_(width), + height_(height), + y_plane_(y_plane), + u_plane_(u_plane), + v_plane_(v_plane), + y_stride_(y_stride), + u_stride_(u_stride), + v_stride_(v_stride), + no_longer_used_cb_(no_longer_used) {} + + ~WrappedYuv16BBuffer() override { no_longer_used_cb_(); } + + int width() const override { return width_; } + + int height() const override { return height_; } + + const uint16_t* DataY() const override { return y_plane_; } + + const uint16_t* DataU() const override { return u_plane_; } + + const uint16_t* DataV() const override { return v_plane_; } + + int StrideY() const override { return y_stride_; } + + int StrideU() const override { return u_stride_; } + + int StrideV() const override { return v_stride_; } + + private: + friend class rtc::RefCountedObject<WrappedYuv16BBuffer>; + + const int width_; + const int height_; + const uint16_t* const y_plane_; + const uint16_t* const u_plane_; + const uint16_t* const v_plane_; + const int y_stride_; + const int u_stride_; + const int v_stride_; + std::function<void()> no_longer_used_cb_; +}; + +class I010BufferBase : public I010BufferInterface { + public: + rtc::scoped_refptr<I420BufferInterface> ToI420() final; +}; + +rtc::scoped_refptr<I420BufferInterface> I010BufferBase::ToI420() { + rtc::scoped_refptr<I420Buffer> i420_buffer = + I420Buffer::Create(width(), height()); + libyuv::I010ToI420(DataY(), StrideY(), DataU(), StrideU(), DataV(), StrideV(), + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + width(), height()); + return i420_buffer; +} + +class I210BufferBase : public I210BufferInterface { + public: + rtc::scoped_refptr<I420BufferInterface> ToI420() final; +}; + +rtc::scoped_refptr<I420BufferInterface> I210BufferBase::ToI420() { + rtc::scoped_refptr<I420Buffer> i420_buffer = + I420Buffer::Create(width(), height()); + libyuv::I210ToI420(DataY(), StrideY(), DataU(), StrideU(), DataV(), StrideV(), + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + width(), height()); + return i420_buffer; +} + +class I410BufferBase : public I410BufferInterface { + public: + rtc::scoped_refptr<I420BufferInterface> ToI420() final; +}; + +rtc::scoped_refptr<I420BufferInterface> I410BufferBase::ToI420() { + rtc::scoped_refptr<I420Buffer> i420_buffer = + I420Buffer::Create(width(), height()); + libyuv::I410ToI420(DataY(), StrideY(), DataU(), StrideU(), DataV(), StrideV(), + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + width(), height()); + return i420_buffer; +} + +} // namespace + +rtc::scoped_refptr<I420BufferInterface> WrapI420Buffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I420BufferInterface>( + rtc::make_ref_counted<WrappedYuvBuffer<I420BufferInterface>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, no_longer_used)); +} + +rtc::scoped_refptr<I420ABufferInterface> WrapI420ABuffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + const uint8_t* a_plane, + int a_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I420ABufferInterface>( + rtc::make_ref_counted<WrappedYuvaBuffer<I420ABufferInterface>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, a_plane, a_stride, no_longer_used)); +} + +rtc::scoped_refptr<I422BufferInterface> WrapI422Buffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I422BufferBase>( + rtc::make_ref_counted<WrappedYuvBuffer<I422BufferBase>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, no_longer_used)); +} + +rtc::scoped_refptr<I444BufferInterface> WrapI444Buffer( + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I444BufferInterface>( + rtc::make_ref_counted<WrappedYuvBuffer<I444BufferBase>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, no_longer_used)); +} + +rtc::scoped_refptr<PlanarYuvBuffer> WrapYuvBuffer( + VideoFrameBuffer::Type type, + int width, + int height, + const uint8_t* y_plane, + int y_stride, + const uint8_t* u_plane, + int u_stride, + const uint8_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + switch (type) { + case VideoFrameBuffer::Type::kI420: + return WrapI420Buffer(width, height, y_plane, y_stride, u_plane, u_stride, + v_plane, v_stride, no_longer_used); + case VideoFrameBuffer::Type::kI422: + return WrapI422Buffer(width, height, y_plane, y_stride, u_plane, u_stride, + v_plane, v_stride, no_longer_used); + case VideoFrameBuffer::Type::kI444: + return WrapI444Buffer(width, height, y_plane, y_stride, u_plane, u_stride, + v_plane, v_stride, no_longer_used); + default: + RTC_CHECK_NOTREACHED(); + } +} + +rtc::scoped_refptr<I010BufferInterface> WrapI010Buffer( + int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I010BufferInterface>( + rtc::make_ref_counted<WrappedYuv16BBuffer<I010BufferBase>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, no_longer_used)); +} + +rtc::scoped_refptr<I210BufferInterface> WrapI210Buffer( + int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I210BufferInterface>( + rtc::make_ref_counted<WrappedYuv16BBuffer<I210BufferBase>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, no_longer_used)); +} + +rtc::scoped_refptr<I410BufferInterface> WrapI410Buffer( + int width, + int height, + const uint16_t* y_plane, + int y_stride, + const uint16_t* u_plane, + int u_stride, + const uint16_t* v_plane, + int v_stride, + std::function<void()> no_longer_used) { + return rtc::scoped_refptr<I410BufferInterface>( + rtc::make_ref_counted<WrappedYuv16BBuffer<I410BufferBase>>( + width, height, y_plane, y_stride, u_plane, u_stride, v_plane, + v_stride, no_longer_used)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/video_frame_buffer_pool.cc b/third_party/libwebrtc/common_video/video_frame_buffer_pool.cc new file mode 100644 index 0000000000..c0215110fd --- /dev/null +++ b/third_party/libwebrtc/common_video/video_frame_buffer_pool.cc @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/include/video_frame_buffer_pool.h" + +#include <limits> + +#include "api/make_ref_counted.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +bool HasOneRef(const rtc::scoped_refptr<VideoFrameBuffer>& buffer) { + // Cast to rtc::RefCountedObject is safe because this function is only called + // on locally created VideoFrameBuffers, which are either + // `rtc::RefCountedObject<I420Buffer>`, `rtc::RefCountedObject<I444Buffer>` or + // `rtc::RefCountedObject<NV12Buffer>`. + switch (buffer->type()) { + case VideoFrameBuffer::Type::kI420: { + return static_cast<rtc::RefCountedObject<I420Buffer>*>(buffer.get()) + ->HasOneRef(); + } + case VideoFrameBuffer::Type::kI444: { + return static_cast<rtc::RefCountedObject<I444Buffer>*>(buffer.get()) + ->HasOneRef(); + } + case VideoFrameBuffer::Type::kI422: { + return static_cast<rtc::RefCountedObject<I422Buffer>*>(buffer.get()) + ->HasOneRef(); + } + case VideoFrameBuffer::Type::kI010: { + return static_cast<rtc::RefCountedObject<I010Buffer>*>(buffer.get()) + ->HasOneRef(); + } + case VideoFrameBuffer::Type::kI210: { + return static_cast<rtc::RefCountedObject<I210Buffer>*>(buffer.get()) + ->HasOneRef(); + } + case VideoFrameBuffer::Type::kI410: { + return static_cast<rtc::RefCountedObject<I410Buffer>*>(buffer.get()) + ->HasOneRef(); + } + case VideoFrameBuffer::Type::kNV12: { + return static_cast<rtc::RefCountedObject<NV12Buffer>*>(buffer.get()) + ->HasOneRef(); + } + default: + RTC_DCHECK_NOTREACHED(); + } + return false; +} + +} // namespace + +VideoFrameBufferPool::VideoFrameBufferPool() : VideoFrameBufferPool(false) {} + +VideoFrameBufferPool::VideoFrameBufferPool(bool zero_initialize) + : VideoFrameBufferPool(zero_initialize, + std::numeric_limits<size_t>::max()) {} + +VideoFrameBufferPool::VideoFrameBufferPool(bool zero_initialize, + size_t max_number_of_buffers) + : zero_initialize_(zero_initialize), + max_number_of_buffers_(max_number_of_buffers) {} + +VideoFrameBufferPool::~VideoFrameBufferPool() = default; + +void VideoFrameBufferPool::Release() { + buffers_.clear(); +} + +bool VideoFrameBufferPool::Resize(size_t max_number_of_buffers) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + size_t used_buffers_count = 0; + for (const rtc::scoped_refptr<VideoFrameBuffer>& buffer : buffers_) { + // If the buffer is in use, the ref count will be >= 2, one from the list we + // are looping over and one from the application. If the ref count is 1, + // then the list we are looping over holds the only reference and it's safe + // to reuse. + if (!HasOneRef(buffer)) { + used_buffers_count++; + } + } + if (used_buffers_count > max_number_of_buffers) { + return false; + } + max_number_of_buffers_ = max_number_of_buffers; + + size_t buffers_to_purge = buffers_.size() - max_number_of_buffers_; + auto iter = buffers_.begin(); + while (iter != buffers_.end() && buffers_to_purge > 0) { + if (HasOneRef(*iter)) { + iter = buffers_.erase(iter); + buffers_to_purge--; + } else { + ++iter; + } + } + return true; +} + +rtc::scoped_refptr<I420Buffer> VideoFrameBufferPool::CreateI420Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kI420); + if (existing_buffer) { + // Cast is safe because the only way kI420 buffer is created is + // in the same function below, where `RefCountedObject<I420Buffer>` is + // created. + rtc::RefCountedObject<I420Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<I420Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<I420Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<I420Buffer> buffer = + rtc::make_ref_counted<I420Buffer>(width, height); + + if (zero_initialize_) + buffer->InitializeData(); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<I444Buffer> VideoFrameBufferPool::CreateI444Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kI444); + if (existing_buffer) { + // Cast is safe because the only way kI444 buffer is created is + // in the same function below, where |RefCountedObject<I444Buffer>| + // is created. + rtc::RefCountedObject<I444Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<I444Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<I444Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<I444Buffer> buffer = + rtc::make_ref_counted<I444Buffer>(width, height); + + if (zero_initialize_) + buffer->InitializeData(); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<I422Buffer> VideoFrameBufferPool::CreateI422Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kI422); + if (existing_buffer) { + // Cast is safe because the only way kI422 buffer is created is + // in the same function below, where |RefCountedObject<I422Buffer>| + // is created. + rtc::RefCountedObject<I422Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<I422Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<I422Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<I422Buffer> buffer = + rtc::make_ref_counted<I422Buffer>(width, height); + + if (zero_initialize_) + buffer->InitializeData(); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<NV12Buffer> VideoFrameBufferPool::CreateNV12Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kNV12); + if (existing_buffer) { + // Cast is safe because the only way kI420 buffer is created is + // in the same function below, where `RefCountedObject<I420Buffer>` is + // created. + rtc::RefCountedObject<NV12Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<NV12Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<NV12Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<NV12Buffer> buffer = + rtc::make_ref_counted<NV12Buffer>(width, height); + + if (zero_initialize_) + buffer->InitializeData(); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<I010Buffer> VideoFrameBufferPool::CreateI010Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kI010); + if (existing_buffer) { + // Cast is safe because the only way kI010 buffer is created is + // in the same function below, where |RefCountedObject<I010Buffer>| + // is created. + rtc::RefCountedObject<I010Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<I010Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<I010Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<I010Buffer> buffer = I010Buffer::Create(width, height); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<I210Buffer> VideoFrameBufferPool::CreateI210Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kI210); + if (existing_buffer) { + // Cast is safe because the only way kI210 buffer is created is + // in the same function below, where |RefCountedObject<I210Buffer>| + // is created. + rtc::RefCountedObject<I210Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<I210Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<I210Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<I210Buffer> buffer = I210Buffer::Create(width, height); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<I410Buffer> VideoFrameBufferPool::CreateI410Buffer( + int width, + int height) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + + rtc::scoped_refptr<VideoFrameBuffer> existing_buffer = + GetExistingBuffer(width, height, VideoFrameBuffer::Type::kI410); + if (existing_buffer) { + // Cast is safe because the only way kI410 buffer is created is + // in the same function below, where |RefCountedObject<I410Buffer>| + // is created. + rtc::RefCountedObject<I410Buffer>* raw_buffer = + static_cast<rtc::RefCountedObject<I410Buffer>*>(existing_buffer.get()); + // Creates a new scoped_refptr, which is also pointing to the same + // RefCountedObject as buffer, increasing ref count. + return rtc::scoped_refptr<I410Buffer>(raw_buffer); + } + + if (buffers_.size() >= max_number_of_buffers_) + return nullptr; + // Allocate new buffer. + rtc::scoped_refptr<I410Buffer> buffer = I410Buffer::Create(width, height); + + buffers_.push_back(buffer); + return buffer; +} + +rtc::scoped_refptr<VideoFrameBuffer> VideoFrameBufferPool::GetExistingBuffer( + int width, + int height, + VideoFrameBuffer::Type type) { + // Release buffers with wrong resolution or different type. + for (auto it = buffers_.begin(); it != buffers_.end();) { + const auto& buffer = *it; + if (buffer->width() != width || buffer->height() != height || + buffer->type() != type) { + it = buffers_.erase(it); + } else { + ++it; + } + } + // Look for a free buffer. + for (const rtc::scoped_refptr<VideoFrameBuffer>& buffer : buffers_) { + // If the buffer is in use, the ref count will be >= 2, one from the list we + // are looping over and one from the application. If the ref count is 1, + // then the list we are looping over holds the only reference and it's safe + // to reuse. + if (HasOneRef(buffer)) { + RTC_CHECK(buffer->type() == type); + return buffer; + } + } + return nullptr; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/video_frame_buffer_pool_unittest.cc b/third_party/libwebrtc/common_video/video_frame_buffer_pool_unittest.cc new file mode 100644 index 0000000000..f177468617 --- /dev/null +++ b/third_party/libwebrtc/common_video/video_frame_buffer_pool_unittest.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/include/video_frame_buffer_pool.h" + +#include <stdint.h> +#include <string.h> + +#include "api/scoped_refptr.h" +#include "api/video/i420_buffer.h" +#include "api/video/video_frame_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(TestVideoFrameBufferPool, SimpleFrameReuse) { + VideoFrameBufferPool pool; + auto buffer = pool.CreateI420Buffer(16, 16); + EXPECT_EQ(16, buffer->width()); + EXPECT_EQ(16, buffer->height()); + // Extract non-refcounted pointers for testing. + const uint8_t* y_ptr = buffer->DataY(); + const uint8_t* u_ptr = buffer->DataU(); + const uint8_t* v_ptr = buffer->DataV(); + // Release buffer so that it is returned to the pool. + buffer = nullptr; + // Check that the memory is resued. + buffer = pool.CreateI420Buffer(16, 16); + EXPECT_EQ(y_ptr, buffer->DataY()); + EXPECT_EQ(u_ptr, buffer->DataU()); + EXPECT_EQ(v_ptr, buffer->DataV()); +} + +TEST(TestVideoFrameBufferPool, FailToReuseWrongSize) { + // Set max frames to 1, just to make sure the first buffer is being released. + VideoFrameBufferPool pool(/*zero_initialize=*/false, 1); + auto buffer = pool.CreateI420Buffer(16, 16); + EXPECT_EQ(16, buffer->width()); + EXPECT_EQ(16, buffer->height()); + // Release buffer so that it is returned to the pool. + buffer = nullptr; + // Check that the pool doesn't try to reuse buffers of incorrect size. + buffer = pool.CreateI420Buffer(32, 16); + ASSERT_TRUE(buffer); + EXPECT_EQ(32, buffer->width()); + EXPECT_EQ(16, buffer->height()); +} + +TEST(TestVideoFrameBufferPool, FrameValidAfterPoolDestruction) { + rtc::scoped_refptr<I420Buffer> buffer; + { + VideoFrameBufferPool pool; + buffer = pool.CreateI420Buffer(16, 16); + } + EXPECT_EQ(16, buffer->width()); + EXPECT_EQ(16, buffer->height()); + // Access buffer, so that ASAN could find any issues if buffer + // doesn't outlive the buffer pool. + memset(buffer->MutableDataY(), 0xA5, 16 * buffer->StrideY()); +} + +TEST(TestVideoFrameBufferPool, MaxNumberOfBuffers) { + VideoFrameBufferPool pool(false, 1); + auto buffer = pool.CreateI420Buffer(16, 16); + EXPECT_NE(nullptr, buffer.get()); + EXPECT_EQ(nullptr, pool.CreateI420Buffer(16, 16).get()); +} + +TEST(TestVideoFrameBufferPool, ProducesNv12) { + VideoFrameBufferPool pool(false, 1); + auto buffer = pool.CreateNV12Buffer(16, 16); + EXPECT_NE(nullptr, buffer.get()); +} + +TEST(TestVideoFrameBufferPool, ProducesI422) { + VideoFrameBufferPool pool(false, 1); + auto buffer = pool.CreateI422Buffer(16, 16); + EXPECT_NE(nullptr, buffer.get()); +} + +TEST(TestVideoFrameBufferPool, ProducesI444) { + VideoFrameBufferPool pool(false, 1); + auto buffer = pool.CreateI444Buffer(16, 16); + EXPECT_NE(nullptr, buffer.get()); +} + +TEST(TestVideoFrameBufferPool, ProducesI010) { + VideoFrameBufferPool pool(false, 1); + auto buffer = pool.CreateI010Buffer(16, 16); + EXPECT_NE(nullptr, buffer.get()); +} + +TEST(TestVideoFrameBufferPool, ProducesI210) { + VideoFrameBufferPool pool(false, 1); + auto buffer = pool.CreateI210Buffer(16, 16); + EXPECT_NE(nullptr, buffer.get()); +} + +TEST(TestVideoFrameBufferPool, SwitchingPixelFormat) { + VideoFrameBufferPool pool(false, 1); + auto buffeNV12 = pool.CreateNV12Buffer(16, 16); + EXPECT_EQ(nullptr, pool.CreateNV12Buffer(16, 16).get()); + + auto bufferI420 = pool.CreateI420Buffer(16, 16); + EXPECT_NE(nullptr, bufferI420.get()); + EXPECT_EQ(nullptr, pool.CreateI420Buffer(16, 16).get()); + + auto bufferI444 = pool.CreateI444Buffer(16, 16); + EXPECT_NE(nullptr, bufferI444.get()); + EXPECT_EQ(nullptr, pool.CreateI444Buffer(16, 16).get()); + + auto bufferI422 = pool.CreateI422Buffer(16, 16); + EXPECT_NE(nullptr, bufferI422.get()); + EXPECT_EQ(nullptr, pool.CreateI422Buffer(16, 16).get()); + + auto bufferI010 = pool.CreateI010Buffer(16, 16); + EXPECT_NE(nullptr, bufferI010.get()); + EXPECT_EQ(nullptr, pool.CreateI010Buffer(16, 16).get()); + + auto bufferI210 = pool.CreateI210Buffer(16, 16); + EXPECT_NE(nullptr, bufferI210.get()); + EXPECT_EQ(nullptr, pool.CreateI210Buffer(16, 16).get()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/video_frame_unittest.cc b/third_party/libwebrtc/common_video/video_frame_unittest.cc new file mode 100644 index 0000000000..ae8e54e7d3 --- /dev/null +++ b/third_party/libwebrtc/common_video/video_frame_unittest.cc @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/video/video_frame.h" + +#include <math.h> +#include <string.h> + +#include "api/video/i010_buffer.h" +#include "api/video/i210_buffer.h" +#include "api/video/i410_buffer.h" +#include "api/video/i420_buffer.h" +#include "api/video/i422_buffer.h" +#include "api/video/i444_buffer.h" +#include "api/video/nv12_buffer.h" +#include "rtc_base/time_utils.h" +#include "test/fake_texture_frame.h" +#include "test/frame_utils.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +struct SubSampling { + int x; + int y; +}; + +SubSampling SubSamplingForType(VideoFrameBuffer::Type type) { + switch (type) { + case VideoFrameBuffer::Type::kI420: + return {.x = 2, .y = 2}; + case VideoFrameBuffer::Type::kI420A: + return {.x = 2, .y = 2}; + case VideoFrameBuffer::Type::kI422: + return {.x = 2, .y = 1}; + case VideoFrameBuffer::Type::kI444: + return {.x = 1, .y = 1}; + case VideoFrameBuffer::Type::kI010: + return {.x = 2, .y = 2}; + case VideoFrameBuffer::Type::kI210: + return {.x = 2, .y = 1}; + case VideoFrameBuffer::Type::kI410: + return {.x = 1, .y = 1}; + default: + return {}; + } +} + +// Helper function to create a buffer and fill it with a gradient for +// PlanarYuvBuffer based buffers. +template <class T> +rtc::scoped_refptr<T> CreateGradient(int width, int height) { + rtc::scoped_refptr<T> buffer(T::Create(width, height)); + // Initialize with gradient, Y = 128(x/w + y/h), U = 256 x/w, V = 256 y/h + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + buffer->MutableDataY()[x + y * width] = + 128 * (x * height + y * width) / (width * height); + } + } + int chroma_width = buffer->ChromaWidth(); + int chroma_height = buffer->ChromaHeight(); + for (int x = 0; x < chroma_width; x++) { + for (int y = 0; y < chroma_height; y++) { + buffer->MutableDataU()[x + y * chroma_width] = + 255 * x / (chroma_width - 1); + buffer->MutableDataV()[x + y * chroma_width] = + 255 * y / (chroma_height - 1); + } + } + return buffer; +} + +// Helper function to create a buffer and fill it with a gradient. +rtc::scoped_refptr<NV12BufferInterface> CreateNV12Gradient(int width, + int height) { + rtc::scoped_refptr<NV12Buffer> buffer(NV12Buffer::Create(width, height)); + // Initialize with gradient, Y = 128(x/w + y/h), U = 256 x/w, V = 256 y/h + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + buffer->MutableDataY()[x + y * width] = + 128 * (x * height + y * width) / (width * height); + } + } + int chroma_width = buffer->ChromaWidth(); + int chroma_height = buffer->ChromaHeight(); + for (int x = 0; x < chroma_width; x++) { + for (int y = 0; y < chroma_height; y++) { + buffer->MutableDataUV()[x * 2 + y * buffer->StrideUV()] = + 255 * x / (chroma_width - 1); + buffer->MutableDataUV()[x * 2 + 1 + y * buffer->StrideUV()] = + 255 * y / (chroma_height - 1); + } + } + return buffer; +} + +// The offsets and sizes describe the rectangle extracted from the +// original (gradient) frame, in relative coordinates where the +// original frame correspond to the unit square, 0.0 <= x, y < 1.0. +template <class T> +void CheckCrop(const T& frame, + double offset_x, + double offset_y, + double rel_width, + double rel_height) { + int width = frame.width(); + int height = frame.height(); + + SubSampling plane_divider = SubSamplingForType(frame.type()); + + // Check that pixel values in the corners match the gradient used + // for initialization. + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + // Pixel coordinates of the corner. + int x = i * (width - 1); + int y = j * (height - 1); + // Relative coordinates, range 0.0 - 1.0 correspond to the + // size of the uncropped input frame. + double orig_x = offset_x + i * rel_width; + double orig_y = offset_y + j * rel_height; + + EXPECT_NEAR(frame.DataY()[x + y * frame.StrideY()] / 256.0, + (orig_x + orig_y) / 2, 0.02); + EXPECT_NEAR(frame.DataU()[x / plane_divider.x + + (y / plane_divider.y) * frame.StrideU()] / + 256.0, + orig_x, 0.02); + EXPECT_NEAR(frame.DataV()[x / plane_divider.x + + (y / plane_divider.y) * frame.StrideV()] / + 256.0, + orig_y, 0.02); + } + } +} + +template <class T> +void CheckRotate(int width, + int height, + webrtc::VideoRotation rotation, + const T& rotated) { + int rotated_width = width; + int rotated_height = height; + + if (rotation == kVideoRotation_90 || rotation == kVideoRotation_270) { + std::swap(rotated_width, rotated_height); + } + EXPECT_EQ(rotated_width, rotated.width()); + EXPECT_EQ(rotated_height, rotated.height()); + + // Clock-wise order (with 0,0 at top-left) + const struct { + int x; + int y; + } corners[] = {{0, 0}, {1, 0}, {1, 1}, {0, 1}}; + // Corresponding corner colors of the frame produced by CreateGradient. + const struct { + int y; + int u; + int v; + } colors[] = {{0, 0, 0}, {127, 255, 0}, {255, 255, 255}, {127, 0, 255}}; + int corner_offset = static_cast<int>(rotation) / 90; + + SubSampling plane_divider = SubSamplingForType(rotated.type()); + + for (int i = 0; i < 4; i++) { + int j = (i + corner_offset) % 4; + int x = corners[j].x * (rotated_width - 1); + int y = corners[j].y * (rotated_height - 1); + EXPECT_EQ(colors[i].y, rotated.DataY()[x + y * rotated.StrideY()]); + if (rotated.type() == VideoFrameBuffer::Type::kI422 || + rotated.type() == VideoFrameBuffer::Type::kI210) { + EXPECT_NEAR(colors[i].u, + rotated.DataU()[(x / plane_divider.x) + + (y / plane_divider.y) * rotated.StrideU()], + 1); + EXPECT_NEAR(colors[i].v, + rotated.DataV()[(x / plane_divider.x) + + (y / plane_divider.y) * rotated.StrideV()], + 1); + } else { + EXPECT_EQ(colors[i].u, + rotated.DataU()[(x / plane_divider.x) + + (y / plane_divider.y) * rotated.StrideU()]); + EXPECT_EQ(colors[i].v, + rotated.DataV()[(x / plane_divider.x) + + (y / plane_divider.y) * rotated.StrideV()]); + } + } +} + +} // namespace + +TEST(TestVideoFrame, WidthHeightValues) { + VideoFrame frame = + VideoFrame::Builder() + .set_video_frame_buffer(I420Buffer::Create(10, 10, 10, 14, 90)) + .set_rotation(webrtc::kVideoRotation_0) + .set_timestamp_ms(789) + .build(); + const int valid_value = 10; + EXPECT_EQ(valid_value, frame.width()); + EXPECT_EQ(valid_value, frame.height()); + frame.set_timestamp(123u); + EXPECT_EQ(123u, frame.timestamp()); + frame.set_ntp_time_ms(456); + EXPECT_EQ(456, frame.ntp_time_ms()); + EXPECT_EQ(789, frame.render_time_ms()); +} + +TEST(TestVideoFrame, ShallowCopy) { + uint32_t timestamp = 1; + int64_t ntp_time_ms = 2; + int64_t timestamp_us = 3; + int stride_y = 15; + int stride_u = 10; + int stride_v = 10; + int width = 15; + int height = 15; + + const int kSizeY = 400; + const int kSizeU = 100; + const int kSizeV = 100; + const VideoRotation kRotation = kVideoRotation_270; + uint8_t buffer_y[kSizeY]; + uint8_t buffer_u[kSizeU]; + uint8_t buffer_v[kSizeV]; + memset(buffer_y, 16, kSizeY); + memset(buffer_u, 8, kSizeU); + memset(buffer_v, 4, kSizeV); + + VideoFrame frame1 = VideoFrame::Builder() + .set_video_frame_buffer(I420Buffer::Copy( + width, height, buffer_y, stride_y, buffer_u, + stride_u, buffer_v, stride_v)) + .set_rotation(kRotation) + .set_timestamp_us(0) + .build(); + frame1.set_timestamp(timestamp); + frame1.set_ntp_time_ms(ntp_time_ms); + frame1.set_timestamp_us(timestamp_us); + VideoFrame frame2(frame1); + + EXPECT_EQ(frame1.video_frame_buffer(), frame2.video_frame_buffer()); + const webrtc::I420BufferInterface* yuv1 = + frame1.video_frame_buffer()->GetI420(); + const webrtc::I420BufferInterface* yuv2 = + frame2.video_frame_buffer()->GetI420(); + EXPECT_EQ(yuv1->DataY(), yuv2->DataY()); + EXPECT_EQ(yuv1->DataU(), yuv2->DataU()); + EXPECT_EQ(yuv1->DataV(), yuv2->DataV()); + + EXPECT_EQ(frame2.timestamp(), frame1.timestamp()); + EXPECT_EQ(frame2.ntp_time_ms(), frame1.ntp_time_ms()); + EXPECT_EQ(frame2.timestamp_us(), frame1.timestamp_us()); + EXPECT_EQ(frame2.rotation(), frame1.rotation()); + + frame2.set_timestamp(timestamp + 1); + frame2.set_ntp_time_ms(ntp_time_ms + 1); + frame2.set_timestamp_us(timestamp_us + 1); + frame2.set_rotation(kVideoRotation_90); + + EXPECT_NE(frame2.timestamp(), frame1.timestamp()); + EXPECT_NE(frame2.ntp_time_ms(), frame1.ntp_time_ms()); + EXPECT_NE(frame2.timestamp_us(), frame1.timestamp_us()); + EXPECT_NE(frame2.rotation(), frame1.rotation()); +} + +TEST(TestVideoFrame, TextureInitialValues) { + VideoFrame frame = test::FakeNativeBuffer::CreateFrame( + 640, 480, 100, 10, webrtc::kVideoRotation_0); + EXPECT_EQ(640, frame.width()); + EXPECT_EQ(480, frame.height()); + EXPECT_EQ(100u, frame.timestamp()); + EXPECT_EQ(10, frame.render_time_ms()); + ASSERT_TRUE(frame.video_frame_buffer() != nullptr); + EXPECT_TRUE(frame.video_frame_buffer()->type() == + VideoFrameBuffer::Type::kNative); + + frame.set_timestamp(200); + EXPECT_EQ(200u, frame.timestamp()); + frame.set_timestamp_us(20); + EXPECT_EQ(20, frame.timestamp_us()); +} + +template <typename T> +class TestPlanarYuvBuffer : public ::testing::Test {}; +TYPED_TEST_SUITE_P(TestPlanarYuvBuffer); + +template <class T> +rtc::scoped_refptr<T> CreateAndFillBuffer() { + auto buf = T::Create(20, 10); + memset(buf->MutableDataY(), 1, 200); + + if (buf->type() == VideoFrameBuffer::Type::kI444 || + buf->type() == VideoFrameBuffer::Type::kI410) { + memset(buf->MutableDataU(), 2, 200); + memset(buf->MutableDataV(), 3, 200); + } else if (buf->type() == VideoFrameBuffer::Type::kI422 || + buf->type() == VideoFrameBuffer::Type::kI210) { + memset(buf->MutableDataU(), 2, 100); + memset(buf->MutableDataV(), 3, 100); + } else { + memset(buf->MutableDataU(), 2, 50); + memset(buf->MutableDataV(), 3, 50); + } + + return buf; +} + +TYPED_TEST_P(TestPlanarYuvBuffer, Copy) { + rtc::scoped_refptr<TypeParam> buf1 = CreateAndFillBuffer<TypeParam>(); + rtc::scoped_refptr<TypeParam> buf2 = TypeParam::Copy(*buf1); + EXPECT_TRUE(test::FrameBufsEqual(buf1, buf2)); +} + +TYPED_TEST_P(TestPlanarYuvBuffer, CropXCenter) { + rtc::scoped_refptr<TypeParam> buf = CreateGradient<TypeParam>(200, 100); + + // Pure center cropping, no scaling. + rtc::scoped_refptr<TypeParam> scaled_buffer = TypeParam::Create(100, 100); + scaled_buffer->CropAndScaleFrom(*buf, 50, 0, 100, 100); + CheckCrop<TypeParam>(*scaled_buffer, 0.25, 0.0, 0.5, 1.0); +} + +TYPED_TEST_P(TestPlanarYuvBuffer, CropXNotCenter) { + rtc::scoped_refptr<TypeParam> buf = CreateGradient<TypeParam>(200, 100); + + // Non-center cropping, no scaling. + rtc::scoped_refptr<TypeParam> scaled_buffer = TypeParam::Create(100, 100); + scaled_buffer->CropAndScaleFrom(*buf, 25, 0, 100, 100); + CheckCrop<TypeParam>(*scaled_buffer, 0.125, 0.0, 0.5, 1.0); +} + +TYPED_TEST_P(TestPlanarYuvBuffer, CropYCenter) { + rtc::scoped_refptr<TypeParam> buf = CreateGradient<TypeParam>(100, 200); + + // Pure center cropping, no scaling. + rtc::scoped_refptr<TypeParam> scaled_buffer = TypeParam::Create(100, 100); + scaled_buffer->CropAndScaleFrom(*buf, 0, 50, 100, 100); + CheckCrop<TypeParam>(*scaled_buffer, 0.0, 0.25, 1.0, 0.5); +} + +TYPED_TEST_P(TestPlanarYuvBuffer, CropYNotCenter) { + rtc::scoped_refptr<TypeParam> buf = CreateGradient<TypeParam>(100, 200); + + // Pure center cropping, no scaling. + rtc::scoped_refptr<TypeParam> scaled_buffer = TypeParam::Create(100, 100); + scaled_buffer->CropAndScaleFrom(*buf, 0, 25, 100, 100); + CheckCrop<TypeParam>(*scaled_buffer, 0.0, 0.125, 1.0, 0.5); +} + +TYPED_TEST_P(TestPlanarYuvBuffer, CropAndScale16x9) { + const int buffer_width = 640; + const int buffer_height = 480; + const int crop_width = 320; + const int crop_height = 180; + rtc::scoped_refptr<TypeParam> buf = CreateGradient<TypeParam>(640, 480); + + // Pure center cropping, no scaling. + const int out_width = + std::min(buffer_width, crop_width * buffer_height / crop_height); + const int out_height = + std::min(buffer_height, crop_height * buffer_width / crop_width); + rtc::scoped_refptr<TypeParam> scaled_buffer = + TypeParam::Create(out_width, out_height); + scaled_buffer->CropAndScaleFrom(*buf, (buffer_width - out_width) / 2, + (buffer_height - out_height) / 2, out_width, + out_height); + CheckCrop<TypeParam>(*scaled_buffer, 0.0, 0.125, 1.0, 0.75); +} + +REGISTER_TYPED_TEST_SUITE_P(TestPlanarYuvBuffer, + Copy, + CropXCenter, + CropXNotCenter, + CropYCenter, + CropYNotCenter, + CropAndScale16x9); + +using TestTypesAll = ::testing::Types<I420Buffer, + I010Buffer, + I444Buffer, + I422Buffer, + I210Buffer, + I410Buffer>; +INSTANTIATE_TYPED_TEST_SUITE_P(All, TestPlanarYuvBuffer, TestTypesAll); + +template <class T> +class TestPlanarYuvBufferScale : public ::testing::Test {}; +TYPED_TEST_SUITE_P(TestPlanarYuvBufferScale); + +TYPED_TEST_P(TestPlanarYuvBufferScale, Scale) { + rtc::scoped_refptr<TypeParam> buf = CreateGradient<TypeParam>(200, 100); + + // Pure scaling, no cropping. + rtc::scoped_refptr<TypeParam> scaled_buffer = TypeParam::Create(150, 75); + scaled_buffer->ScaleFrom(*buf); + CheckCrop<TypeParam>(*scaled_buffer, 0.0, 0.0, 1.0, 1.0); +} + +REGISTER_TYPED_TEST_SUITE_P(TestPlanarYuvBufferScale, Scale); + +using TestTypesScale = + ::testing::Types<I420Buffer, I010Buffer, I210Buffer, I410Buffer>; +INSTANTIATE_TYPED_TEST_SUITE_P(All, TestPlanarYuvBufferScale, TestTypesScale); + +template <class T> +class TestPlanarYuvBufferRotate : public ::testing::Test { + public: + std::vector<webrtc::VideoRotation> RotationParams = { + kVideoRotation_0, kVideoRotation_90, kVideoRotation_180, + kVideoRotation_270}; +}; + +TYPED_TEST_SUITE_P(TestPlanarYuvBufferRotate); + +TYPED_TEST_P(TestPlanarYuvBufferRotate, Rotates) { + for (const webrtc::VideoRotation& rotation : this->RotationParams) { + rtc::scoped_refptr<TypeParam> buffer = CreateGradient<TypeParam>(640, 480); + rtc::scoped_refptr<TypeParam> rotated_buffer = + TypeParam::Rotate(*buffer, rotation); + CheckRotate(640, 480, rotation, *rotated_buffer); + } +} + +REGISTER_TYPED_TEST_SUITE_P(TestPlanarYuvBufferRotate, Rotates); + +using TestTypesRotate = ::testing:: + Types<I420Buffer, I010Buffer, I444Buffer, I422Buffer, I210Buffer>; +INSTANTIATE_TYPED_TEST_SUITE_P(Rotate, + TestPlanarYuvBufferRotate, + TestTypesRotate); + +TEST(TestNV12Buffer, CropAndScale) { + const int kSourceWidth = 640; + const int kSourceHeight = 480; + const int kScaledWidth = 320; + const int kScaledHeight = 240; + const int kCropLeft = 40; + const int kCropTop = 30; + const int kCropRight = 0; + const int kCropBottom = 30; + + rtc::scoped_refptr<VideoFrameBuffer> buf = + CreateNV12Gradient(kSourceWidth, kSourceHeight); + + rtc::scoped_refptr<VideoFrameBuffer> scaled_buffer = buf->CropAndScale( + kCropLeft, kCropTop, kSourceWidth - kCropLeft - kCropRight, + kSourceHeight - kCropTop - kCropBottom, kScaledWidth, kScaledHeight); + + // Parameters to CheckCrop indicate what part of the source frame is in the + // scaled frame. + const float kOffsetX = (kCropLeft + 0.0) / kSourceWidth; + const float kOffsetY = (kCropTop + 0.0) / kSourceHeight; + const float kRelativeWidth = + (kSourceWidth - kCropLeft - kCropRight + 0.0) / kSourceWidth; + const float kRelativeHeight = + (kSourceHeight - kCropTop - kCropBottom + 0.0) / kSourceHeight; + CheckCrop(*scaled_buffer->ToI420(), kOffsetX, kOffsetY, kRelativeWidth, + kRelativeHeight); +} + +TEST(TestUpdateRect, CanCompare) { + VideoFrame::UpdateRect a = {0, 0, 100, 200}; + VideoFrame::UpdateRect b = {0, 0, 100, 200}; + VideoFrame::UpdateRect c = {1, 0, 100, 200}; + VideoFrame::UpdateRect d = {0, 1, 100, 200}; + EXPECT_TRUE(a == b); + EXPECT_FALSE(a == c); + EXPECT_FALSE(a == d); +} + +TEST(TestUpdateRect, ComputesIsEmpty) { + VideoFrame::UpdateRect a = {0, 0, 0, 0}; + VideoFrame::UpdateRect b = {0, 0, 100, 200}; + VideoFrame::UpdateRect c = {1, 100, 0, 0}; + VideoFrame::UpdateRect d = {1, 100, 100, 200}; + EXPECT_TRUE(a.IsEmpty()); + EXPECT_FALSE(b.IsEmpty()); + EXPECT_TRUE(c.IsEmpty()); + EXPECT_FALSE(d.IsEmpty()); +} + +TEST(TestUpdateRectUnion, NonIntersecting) { + VideoFrame::UpdateRect a = {0, 0, 10, 20}; + VideoFrame::UpdateRect b = {100, 200, 10, 20}; + a.Union(b); + EXPECT_EQ(a, VideoFrame::UpdateRect({0, 0, 110, 220})); +} + +TEST(TestUpdateRectUnion, Intersecting) { + VideoFrame::UpdateRect a = {0, 0, 10, 10}; + VideoFrame::UpdateRect b = {5, 5, 30, 20}; + a.Union(b); + EXPECT_EQ(a, VideoFrame::UpdateRect({0, 0, 35, 25})); +} + +TEST(TestUpdateRectUnion, OneInsideAnother) { + VideoFrame::UpdateRect a = {0, 0, 100, 100}; + VideoFrame::UpdateRect b = {5, 5, 30, 20}; + a.Union(b); + EXPECT_EQ(a, VideoFrame::UpdateRect({0, 0, 100, 100})); +} + +TEST(TestUpdateRectIntersect, NonIntersecting) { + VideoFrame::UpdateRect a = {0, 0, 10, 20}; + VideoFrame::UpdateRect b = {100, 200, 10, 20}; + a.Intersect(b); + EXPECT_EQ(a, VideoFrame::UpdateRect({0, 0, 0, 0})); +} + +TEST(TestUpdateRectIntersect, Intersecting) { + VideoFrame::UpdateRect a = {0, 0, 10, 10}; + VideoFrame::UpdateRect b = {5, 5, 30, 20}; + a.Intersect(b); + EXPECT_EQ(a, VideoFrame::UpdateRect({5, 5, 5, 5})); +} + +TEST(TestUpdateRectIntersect, OneInsideAnother) { + VideoFrame::UpdateRect a = {0, 0, 100, 100}; + VideoFrame::UpdateRect b = {5, 5, 30, 20}; + a.Intersect(b); + EXPECT_EQ(a, VideoFrame::UpdateRect({5, 5, 30, 20})); +} + +TEST(TestUpdateRectScale, NoScale) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {100, 50, 100, 200}; + VideoFrame::UpdateRect scaled = + a.ScaleWithFrame(width, height, 0, 0, width, height, width, height); + EXPECT_EQ(scaled, VideoFrame::UpdateRect({100, 50, 100, 200})); +} + +TEST(TestUpdateRectScale, CropOnly) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {100, 50, 100, 200}; + VideoFrame::UpdateRect scaled = a.ScaleWithFrame( + width, height, 10, 10, width - 20, height - 20, width - 20, height - 20); + EXPECT_EQ(scaled, VideoFrame::UpdateRect({90, 40, 100, 200})); +} + +TEST(TestUpdateRectScale, CropOnlyToOddOffset) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {100, 50, 100, 200}; + VideoFrame::UpdateRect scaled = a.ScaleWithFrame( + width, height, 5, 5, width - 10, height - 10, width - 10, height - 10); + EXPECT_EQ(scaled, VideoFrame::UpdateRect({94, 44, 102, 202})); +} + +TEST(TestUpdateRectScale, ScaleByHalf) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {100, 60, 100, 200}; + VideoFrame::UpdateRect scaled = a.ScaleWithFrame( + width, height, 0, 0, width, height, width / 2, height / 2); + // Scaled by half and +2 pixels in all directions. + EXPECT_EQ(scaled, VideoFrame::UpdateRect({48, 28, 54, 104})); +} + +TEST(TestUpdateRectScale, CropToUnchangedRegionBelowUpdateRect) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {100, 60, 100, 200}; + VideoFrame::UpdateRect scaled = a.ScaleWithFrame( + width, height, (width - 10) / 2, (height - 10) / 2, 10, 10, 10, 10); + // Update is out of the cropped frame. + EXPECT_EQ(scaled, VideoFrame::UpdateRect({0, 0, 0, 0})); +} + +TEST(TestUpdateRectScale, CropToUnchangedRegionAboveUpdateRect) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {600, 400, 10, 10}; + VideoFrame::UpdateRect scaled = a.ScaleWithFrame( + width, height, (width - 10) / 2, (height - 10) / 2, 10, 10, 10, 10); + // Update is out of the cropped frame. + EXPECT_EQ(scaled, VideoFrame::UpdateRect({0, 0, 0, 0})); +} + +TEST(TestUpdateRectScale, CropInsideUpdate) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {300, 200, 100, 100}; + VideoFrame::UpdateRect scaled = a.ScaleWithFrame( + width, height, (width - 10) / 2, (height - 10) / 2, 10, 10, 10, 10); + // Cropped frame is inside the update rect. + EXPECT_EQ(scaled, VideoFrame::UpdateRect({0, 0, 10, 10})); +} + +TEST(TestUpdateRectScale, CropAndScaleByHalf) { + const int width = 640; + const int height = 480; + VideoFrame::UpdateRect a = {100, 60, 100, 200}; + VideoFrame::UpdateRect scaled = + a.ScaleWithFrame(width, height, 10, 10, width - 20, height - 20, + (width - 20) / 2, (height - 20) / 2); + // Scaled by half and +3 pixels in all directions, because of odd offset after + // crop and scale. + EXPECT_EQ(scaled, VideoFrame::UpdateRect({42, 22, 56, 106})); +} + +} // namespace webrtc |