diff options
Diffstat (limited to 'third_party/libwebrtc/common_video/h264')
15 files changed, 2750 insertions, 0 deletions
diff --git a/third_party/libwebrtc/common_video/h264/OWNERS b/third_party/libwebrtc/common_video/h264/OWNERS new file mode 100644 index 0000000000..361ed7e84a --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/OWNERS @@ -0,0 +1 @@ +ssilkin@webrtc.org diff --git a/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc new file mode 100644 index 0000000000..2311d0d2ee --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.cc @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/h264/h264_bitstream_parser.h" + +#include <stdlib.h> + +#include <cstdint> +#include <vector> + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +constexpr int kMaxAbsQpDeltaValue = 51; +constexpr int kMinQpValue = 0; +constexpr int kMaxQpValue = 51; + +} // namespace + +H264BitstreamParser::H264BitstreamParser() = default; +H264BitstreamParser::~H264BitstreamParser() = default; + +H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( + const uint8_t* source, + size_t source_length, + uint8_t nalu_type) { + if (!sps_ || !pps_) + return kInvalidStream; + + last_slice_qp_delta_ = absl::nullopt; + const std::vector<uint8_t> slice_rbsp = + H264::ParseRbsp(source, source_length); + if (slice_rbsp.size() < H264::kNaluTypeSize) + return kInvalidStream; + + BitstreamReader slice_reader(slice_rbsp); + slice_reader.ConsumeBits(H264::kNaluTypeSize * 8); + + // Check to see if this is an IDR slice, which has an extra field to parse + // out. + bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr; + uint8_t nal_ref_idc = (source[0] & 0x60) >> 5; + + // first_mb_in_slice: ue(v) + slice_reader.ReadExponentialGolomb(); + // slice_type: ue(v) + uint32_t slice_type = slice_reader.ReadExponentialGolomb(); + // slice_type's 5..9 range is used to indicate that all slices of a picture + // have the same value of slice_type % 5, we don't care about that, so we map + // to the corresponding 0..4 range. + slice_type %= 5; + // pic_parameter_set_id: ue(v) + slice_reader.ReadExponentialGolomb(); + if (sps_->separate_colour_plane_flag == 1) { + // colour_plane_id + slice_reader.ConsumeBits(2); + } + // frame_num: u(v) + // Represented by log2_max_frame_num bits. + slice_reader.ConsumeBits(sps_->log2_max_frame_num); + bool field_pic_flag = false; + if (sps_->frame_mbs_only_flag == 0) { + // field_pic_flag: u(1) + field_pic_flag = slice_reader.Read<bool>(); + if (field_pic_flag) { + // bottom_field_flag: u(1) + slice_reader.ConsumeBits(1); + } + } + if (is_idr) { + // idr_pic_id: ue(v) + slice_reader.ReadExponentialGolomb(); + } + // pic_order_cnt_lsb: u(v) + // Represented by sps_.log2_max_pic_order_cnt_lsb bits. + if (sps_->pic_order_cnt_type == 0) { + slice_reader.ConsumeBits(sps_->log2_max_pic_order_cnt_lsb); + if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { + // delta_pic_order_cnt_bottom: se(v) + slice_reader.ReadExponentialGolomb(); + } + } + if (sps_->pic_order_cnt_type == 1 && + !sps_->delta_pic_order_always_zero_flag) { + // delta_pic_order_cnt[0]: se(v) + slice_reader.ReadExponentialGolomb(); + if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { + // delta_pic_order_cnt[1]: se(v) + slice_reader.ReadExponentialGolomb(); + } + } + if (pps_->redundant_pic_cnt_present_flag) { + // redundant_pic_cnt: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (slice_type == H264::SliceType::kB) { + // direct_spatial_mv_pred_flag: u(1) + slice_reader.ConsumeBits(1); + } + switch (slice_type) { + case H264::SliceType::kP: + case H264::SliceType::kB: + case H264::SliceType::kSp: + // num_ref_idx_active_override_flag: u(1) + if (slice_reader.Read<bool>()) { + // num_ref_idx_l0_active_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + if (slice_type == H264::SliceType::kB) { + // num_ref_idx_l1_active_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } + break; + default: + break; + } + if (!slice_reader.Ok()) { + return kInvalidStream; + } + // assume nal_unit_type != 20 && nal_unit_type != 21: + if (nalu_type == 20 || nalu_type == 21) { + RTC_LOG(LS_ERROR) << "Unsupported nal unit type."; + return kUnsupportedStream; + } + // if (nal_unit_type == 20 || nal_unit_type == 21) + // ref_pic_list_mvc_modification() + // else + { + // ref_pic_list_modification(): + // `slice_type` checks here don't use named constants as they aren't named + // in the spec for this segment. Keeping them consistent makes it easier to + // verify that they are both the same. + if (slice_type % 5 != 2 && slice_type % 5 != 4) { + // ref_pic_list_modification_flag_l0: u(1) + if (slice_reader.Read<bool>()) { + uint32_t modification_of_pic_nums_idc; + do { + // modification_of_pic_nums_idc: ue(v) + modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb(); + if (modification_of_pic_nums_idc == 0 || + modification_of_pic_nums_idc == 1) { + // abs_diff_pic_num_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } else if (modification_of_pic_nums_idc == 2) { + // long_term_pic_num: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok()); + } + } + if (slice_type % 5 == 1) { + // ref_pic_list_modification_flag_l1: u(1) + if (slice_reader.Read<bool>()) { + uint32_t modification_of_pic_nums_idc; + do { + // modification_of_pic_nums_idc: ue(v) + modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb(); + if (modification_of_pic_nums_idc == 0 || + modification_of_pic_nums_idc == 1) { + // abs_diff_pic_num_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } else if (modification_of_pic_nums_idc == 2) { + // long_term_pic_num: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok()); + } + } + } + if (!slice_reader.Ok()) { + return kInvalidStream; + } + // TODO(pbos): Do we need support for pred_weight_table()? + if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP || + slice_type == H264::SliceType::kSp)) || + (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) { + RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; + return kUnsupportedStream; + } + // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) || + // (weighted_bipred_idc == 1 && slice_type == B)) { + // pred_weight_table() + // } + if (nal_ref_idc != 0) { + // dec_ref_pic_marking(): + if (is_idr) { + // no_output_of_prior_pics_flag: u(1) + // long_term_reference_flag: u(1) + slice_reader.ConsumeBits(2); + } else { + // adaptive_ref_pic_marking_mode_flag: u(1) + if (slice_reader.Read<bool>()) { + uint32_t memory_management_control_operation; + do { + // memory_management_control_operation: ue(v) + memory_management_control_operation = + slice_reader.ReadExponentialGolomb(); + if (memory_management_control_operation == 1 || + memory_management_control_operation == 3) { + // difference_of_pic_nums_minus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (memory_management_control_operation == 2) { + // long_term_pic_num: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (memory_management_control_operation == 3 || + memory_management_control_operation == 6) { + // long_term_frame_idx: ue(v) + slice_reader.ReadExponentialGolomb(); + } + if (memory_management_control_operation == 4) { + // max_long_term_frame_idx_plus1: ue(v) + slice_reader.ReadExponentialGolomb(); + } + } while (memory_management_control_operation != 0 && slice_reader.Ok()); + } + } + } + if (pps_->entropy_coding_mode_flag && slice_type != H264::SliceType::kI && + slice_type != H264::SliceType::kSi) { + // cabac_init_idc: ue(v) + slice_reader.ReadExponentialGolomb(); + } + + int last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); + if (!slice_reader.Ok()) { + return kInvalidStream; + } + if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) { + // Something has gone wrong, and the parsed value is invalid. + RTC_LOG(LS_WARNING) << "Parsed QP value out of range."; + return kInvalidStream; + } + + last_slice_qp_delta_ = last_slice_qp_delta; + return kOk; +} + +void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { + H264::NaluType nalu_type = H264::ParseNaluType(slice[0]); + switch (nalu_type) { + case H264::NaluType::kSps: { + sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize, + length - H264::kNaluTypeSize); + if (!sps_) + RTC_DLOG(LS_WARNING) << "Unable to parse SPS from H264 bitstream."; + break; + } + case H264::NaluType::kPps: { + pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize, + length - H264::kNaluTypeSize); + if (!pps_) + RTC_DLOG(LS_WARNING) << "Unable to parse PPS from H264 bitstream."; + break; + } + case H264::NaluType::kAud: + case H264::NaluType::kSei: + case H264::NaluType::kPrefix: + break; // Ignore these nalus, as we don't care about their contents. + default: + Result res = ParseNonParameterSetNalu(slice, length, nalu_type); + if (res != kOk) + RTC_DLOG(LS_INFO) << "Failed to parse bitstream. Error: " << res; + break; + } +} + +void H264BitstreamParser::ParseBitstream( + rtc::ArrayView<const uint8_t> bitstream) { + std::vector<H264::NaluIndex> nalu_indices = + H264::FindNaluIndices(bitstream.data(), bitstream.size()); + for (const H264::NaluIndex& index : nalu_indices) + ParseSlice(bitstream.data() + index.payload_start_offset, + index.payload_size); +} + +absl::optional<int> H264BitstreamParser::GetLastSliceQp() const { + if (!last_slice_qp_delta_ || !pps_) + return absl::nullopt; + const int qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_; + if (qp < kMinQpValue || qp > kMaxQpValue) { + RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; + return absl::nullopt; + } + return qp; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.h b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.h new file mode 100644 index 0000000000..05427825ac --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_H264_BITSTREAM_PARSER_H_ +#define COMMON_VIDEO_H264_H264_BITSTREAM_PARSER_H_ +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "api/video_codecs/bitstream_parser.h" +#include "common_video/h264/pps_parser.h" +#include "common_video/h264/sps_parser.h" + +namespace webrtc { + +// Stateful H264 bitstream parser (due to SPS/PPS). Used to parse out QP values +// from the bitstream. +// TODO(pbos): Unify with RTP SPS parsing and only use one H264 parser. +// TODO(pbos): If/when this gets used on the receiver side CHECKs must be +// removed and gracefully abort as we have no control over receive-side +// bitstreams. +class H264BitstreamParser : public BitstreamParser { + public: + H264BitstreamParser(); + ~H264BitstreamParser() override; + + void ParseBitstream(rtc::ArrayView<const uint8_t> bitstream) override; + absl::optional<int> GetLastSliceQp() const override; + + protected: + enum Result { + kOk, + kInvalidStream, + kUnsupportedStream, + }; + void ParseSlice(const uint8_t* slice, size_t length); + Result ParseNonParameterSetNalu(const uint8_t* source, + size_t source_length, + uint8_t nalu_type); + + // SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices. + absl::optional<SpsParser::SpsState> sps_; + absl::optional<PpsParser::PpsState> pps_; + + // Last parsed slice QP. + absl::optional<int32_t> last_slice_qp_delta_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_H264_BITSTREAM_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h264/h264_bitstream_parser_unittest.cc b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser_unittest.cc new file mode 100644 index 0000000000..3f4f202af2 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_bitstream_parser_unittest.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/h264_bitstream_parser.h" + +#include "test/gtest.h" + +namespace webrtc { + +// SPS/PPS part of below chunk. +uint8_t kH264SpsPps[] = {0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, + 0x01, 0x40, 0x16, 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, + 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, 0xe2}; + +// Contains enough of the image slice to contain slice QP. +uint8_t kH264BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, 0x01, 0x40, 0x16, + 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, + 0xe2, 0x00, 0x00, 0x00, 0x01, 0x65, 0xb8, 0x40, 0xf0, 0x8c, 0x03, 0xf2, + 0x75, 0x67, 0xad, 0x41, 0x64, 0x24, 0x0e, 0xa0, 0xb2, 0x12, 0x1e, 0xf8, +}; + +uint8_t kH264BitstreamChunkCabac[] = { + 0x00, 0x00, 0x00, 0x01, 0x27, 0x64, 0x00, 0x0d, 0xac, 0x52, 0x30, + 0x50, 0x7e, 0xc0, 0x5a, 0x81, 0x01, 0x01, 0x18, 0x56, 0xbd, 0xef, + 0x80, 0x80, 0x00, 0x00, 0x00, 0x01, 0x28, 0xfe, 0x09, 0x8b, +}; + +// Contains enough of the image slice to contain slice QP. +uint8_t kH264BitstreamNextImageSliceChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x41, 0xe2, 0x01, 0x16, 0x0e, 0x3e, 0x2b, 0x86, +}; + +// Contains enough of the image slice to contain slice QP. +uint8_t kH264BitstreamNextImageSliceChunkCabac[] = { + 0x00, 0x00, 0x00, 0x01, 0x21, 0xe1, 0x05, 0x11, 0x3f, 0x9a, 0xae, 0x46, + 0x70, 0xbf, 0xc1, 0x4a, 0x16, 0x8f, 0x51, 0xf4, 0xca, 0xfb, 0xa3, 0x65, +}; + +TEST(H264BitstreamParserTest, ReportsNoQpWithoutParsedSlices) { + H264BitstreamParser h264_parser; + EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); +} + +TEST(H264BitstreamParserTest, ReportsNoQpWithOnlyParsedPpsAndSpsSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264SpsPps); + EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); +} + +TEST(H264BitstreamParserTest, ReportsLastSliceQpForImageSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264BitstreamChunk); + absl::optional<int> qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(35, *qp); + + // Parse an additional image slice. + h264_parser.ParseBitstream(kH264BitstreamNextImageSliceChunk); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(37, *qp); +} + +TEST(H264BitstreamParserTest, ReportsLastSliceQpForCABACImageSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264BitstreamChunkCabac); + EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); + + // Parse an additional image slice. + h264_parser.ParseBitstream(kH264BitstreamNextImageSliceChunkCabac); + absl::optional<int> qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(24, *qp); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/h264_common.cc b/third_party/libwebrtc/common_video/h264/h264_common.cc new file mode 100644 index 0000000000..06d94e0305 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_common.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/h264_common.h" + +#include <cstdint> + +namespace webrtc { +namespace H264 { + +const uint8_t kNaluTypeMask = 0x1F; + +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + // This is sorta like Boyer-Moore, but with only the first optimization step: + // given a 3-byte sequence we're looking at, if the 3rd byte isn't 1 or 0, + // skip ahead to the next 3-byte sequence. 0s and 1s are relatively rare, so + // this will skip the majority of reads/checks. + std::vector<NaluIndex> sequences; + if (buffer_size < kNaluShortStartSequenceSize) + return sequences; + + static_assert(kNaluShortStartSequenceSize >= 2, + "kNaluShortStartSequenceSize must be larger or equals to 2"); + const size_t end = buffer_size - kNaluShortStartSequenceSize; + for (size_t i = 0; i < end;) { + if (buffer[i + 2] > 1) { + i += 3; + } else if (buffer[i + 2] == 1) { + if (buffer[i + 1] == 0 && buffer[i] == 0) { + // We found a start sequence, now check if it was a 3 of 4 byte one. + NaluIndex index = {i, i + 3, 0}; + if (index.start_offset > 0 && buffer[index.start_offset - 1] == 0) + --index.start_offset; + + // Update length of previous entry. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = index.start_offset - it->payload_start_offset; + + sequences.push_back(index); + } + + i += 3; + } else { + ++i; + } + } + + // Update length of last entry, if any. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = buffer_size - it->payload_start_offset; + + return sequences; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast<NaluType>(data & kNaluTypeMask); +} + +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length) { + std::vector<uint8_t> out; + out.reserve(length); + + for (size_t i = 0; i < length;) { + // Be careful about over/underflow here. byte_length_ - 3 can underflow, and + // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ + // above, and that expression will produce the number of bytes left in + // the stream including the byte at i. + if (length - i >= 3 && !data[i] && !data[i + 1] && data[i + 2] == 3) { + // Two rbsp bytes. + out.push_back(data[i++]); + out.push_back(data[i++]); + // Skip the emulation byte. + i++; + } else { + // Single rbsp byte. + out.push_back(data[i++]); + } + } + return out; +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + static const uint8_t kZerosInStartSequence = 2; + static const uint8_t kEmulationByte = 0x03u; + size_t num_consecutive_zeros = 0; + destination->EnsureCapacity(destination->size() + length); + + for (size_t i = 0; i < length; ++i) { + uint8_t byte = bytes[i]; + if (byte <= kEmulationByte && + num_consecutive_zeros >= kZerosInStartSequence) { + // Need to escape. + destination->AppendData(kEmulationByte); + num_consecutive_zeros = 0; + } + destination->AppendData(byte); + if (byte == 0) { + ++num_consecutive_zeros; + } else { + num_consecutive_zeros = 0; + } + } +} + +} // namespace H264 +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/h264_common.h b/third_party/libwebrtc/common_video/h264/h264_common.h new file mode 100644 index 0000000000..0b1843ee38 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/h264_common.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_H264_COMMON_H_ +#define COMMON_VIDEO_H264_H264_COMMON_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "rtc_base/buffer.h" + +namespace webrtc { + +namespace H264 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +const size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +const size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU type byte (1). +const size_t kNaluTypeSize = 1; + +enum NaluType : uint8_t { + kSlice = 1, + kIdr = 5, + kSei = 6, + kSps = 7, + kPps = 8, + kAud = 9, + kEndOfSequence = 10, + kEndOfStream = 11, + kFiller = 12, + kPrefix = 14, + kStapA = 24, + kFuA = 28 +}; + +enum SliceType : uint8_t { kP = 0, kB = 1, kI = 2, kSp = 3, kSi = 4 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset; + // Start index of NALU payload, typically type header. + size_t payload_start_offset; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.1 of the H264 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); +} // namespace H264 +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_H264_COMMON_H_ diff --git a/third_party/libwebrtc/common_video/h264/pps_parser.cc b/third_party/libwebrtc/common_video/h264/pps_parser.cc new file mode 100644 index 0000000000..2fc9749e8c --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/pps_parser.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/pps_parser.h" + +#include <cstdint> +#include <limits> +#include <vector> + +#include "absl/numeric/bits.h" +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +constexpr int kMaxPicInitQpDeltaValue = 25; +constexpr int kMinPicInitQpDeltaValue = -26; +} // namespace + +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +absl::optional<PpsParser::PpsState> PpsParser::ParsePps(const uint8_t* data, + size_t length) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.264 standard. + return ParseInternal(H264::ParseRbsp(data, length)); +} + +bool PpsParser::ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id) { + RTC_DCHECK(pps_id); + RTC_DCHECK(sps_id); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.264 standard. + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + *pps_id = reader.ReadExponentialGolomb(); + *sps_id = reader.ReadExponentialGolomb(); + return reader.Ok(); +} + +absl::optional<uint32_t> PpsParser::ParsePpsIdFromSlice(const uint8_t* data, + size_t length) { + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader slice_reader(unpacked_buffer); + + // first_mb_in_slice: ue(v) + slice_reader.ReadExponentialGolomb(); + // slice_type: ue(v) + slice_reader.ReadExponentialGolomb(); + // pic_parameter_set_id: ue(v) + uint32_t slice_pps_id = slice_reader.ReadExponentialGolomb(); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + return slice_pps_id; +} + +absl::optional<PpsParser::PpsState> PpsParser::ParseInternal( + rtc::ArrayView<const uint8_t> buffer) { + BitstreamReader reader(buffer); + PpsState pps; + pps.id = reader.ReadExponentialGolomb(); + pps.sps_id = reader.ReadExponentialGolomb(); + + // entropy_coding_mode_flag: u(1) + pps.entropy_coding_mode_flag = reader.Read<bool>(); + // bottom_field_pic_order_in_frame_present_flag: u(1) + pps.bottom_field_pic_order_in_frame_present_flag = reader.Read<bool>(); + + // num_slice_groups_minus1: ue(v) + uint32_t num_slice_groups_minus1 = reader.ReadExponentialGolomb(); + if (num_slice_groups_minus1 > 0) { + // slice_group_map_type: ue(v) + uint32_t slice_group_map_type = reader.ReadExponentialGolomb(); + if (slice_group_map_type == 0) { + for (uint32_t i_group = 0; + i_group <= num_slice_groups_minus1 && reader.Ok(); ++i_group) { + // run_length_minus1[iGroup]: ue(v) + reader.ReadExponentialGolomb(); + } + } else if (slice_group_map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + } else if (slice_group_map_type == 2) { + for (uint32_t i_group = 0; + i_group <= num_slice_groups_minus1 && reader.Ok(); ++i_group) { + // top_left[iGroup]: ue(v) + reader.ReadExponentialGolomb(); + // bottom_right[iGroup]: ue(v) + reader.ReadExponentialGolomb(); + } + } else if (slice_group_map_type == 3 || slice_group_map_type == 4 || + slice_group_map_type == 5) { + // slice_group_change_direction_flag: u(1) + reader.ConsumeBits(1); + // slice_group_change_rate_minus1: ue(v) + reader.ReadExponentialGolomb(); + } else if (slice_group_map_type == 6) { + // pic_size_in_map_units_minus1: ue(v) + uint32_t pic_size_in_map_units = reader.ReadExponentialGolomb() + 1; + int slice_group_id_bits = 1 + absl::bit_width(num_slice_groups_minus1); + + // slice_group_id: array of size pic_size_in_map_units, each element + // is represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + int64_t bits_to_consume = + int64_t{slice_group_id_bits} * pic_size_in_map_units; + if (!reader.Ok() || bits_to_consume > std::numeric_limits<int>::max()) { + return absl::nullopt; + } + reader.ConsumeBits(bits_to_consume); + } + } + // num_ref_idx_l0_default_active_minus1: ue(v) + reader.ReadExponentialGolomb(); + // num_ref_idx_l1_default_active_minus1: ue(v) + reader.ReadExponentialGolomb(); + // weighted_pred_flag: u(1) + pps.weighted_pred_flag = reader.Read<bool>(); + // weighted_bipred_idc: u(2) + pps.weighted_bipred_idc = reader.ReadBits(2); + + // pic_init_qp_minus26: se(v) + pps.pic_init_qp_minus26 = reader.ReadSignedExponentialGolomb(); + // Sanity-check parsed value + if (!reader.Ok() || pps.pic_init_qp_minus26 > kMaxPicInitQpDeltaValue || + pps.pic_init_qp_minus26 < kMinPicInitQpDeltaValue) { + return absl::nullopt; + } + // pic_init_qs_minus26: se(v) + reader.ReadExponentialGolomb(); + // chroma_qp_index_offset: se(v) + reader.ReadExponentialGolomb(); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + reader.ConsumeBits(2); + // redundant_pic_cnt_present_flag: u(1) + pps.redundant_pic_cnt_present_flag = reader.ReadBit(); + if (!reader.Ok()) { + return absl::nullopt; + } + + return pps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/pps_parser.h b/third_party/libwebrtc/common_video/h264/pps_parser.h new file mode 100644 index 0000000000..52717dcc26 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/pps_parser.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_PPS_PARSER_H_ +#define COMMON_VIDEO_H264_PPS_PARSER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H264 NALU. +class PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool bottom_field_pic_order_in_frame_present_flag = false; + bool weighted_pred_flag = false; + bool entropy_coding_mode_flag = false; + uint32_t weighted_bipred_idc = false; + uint32_t redundant_pic_cnt_present_flag = 0; + int pic_init_qp_minus26 = 0; + uint32_t id = 0; + uint32_t sps_id = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static absl::optional<PpsState> ParsePps(const uint8_t* data, size_t length); + + static bool ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id); + + static absl::optional<uint32_t> ParsePpsIdFromSlice(const uint8_t* data, + size_t length); + + protected: + // Parse the PPS state, for a buffer where RBSP decoding has already been + // performed. + static absl::optional<PpsState> ParseInternal( + rtc::ArrayView<const uint8_t> buffer); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_PPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h264/pps_parser_unittest.cc b/third_party/libwebrtc/common_video/h264/pps_parser_unittest.cc new file mode 100644 index 0000000000..652f4c7ce0 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/pps_parser_unittest.cc @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/pps_parser.h" + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +// Contains enough of the image slice to contain slice QP. +const uint8_t kH264BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, 0x01, 0x40, 0x16, + 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, + 0xe2, 0x00, 0x00, 0x00, 0x01, 0x65, 0xb8, 0x40, 0xf0, 0x8c, 0x03, 0xf2, + 0x75, 0x67, 0xad, 0x41, 0x64, 0x24, 0x0e, 0xa0, 0xb2, 0x12, 0x1e, 0xf8, +}; +const size_t kPpsBufferMaxSize = 256; +const uint32_t kIgnored = 0; +} // namespace + +void WritePps(const PpsParser::PpsState& pps, + int slice_group_map_type, + int num_slice_groups, + int pic_size_in_map_units, + rtc::Buffer* out_buffer) { + uint8_t data[kPpsBufferMaxSize] = {0}; + rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize); + + // pic_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.id); + // seq_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.sps_id); + // entropy_coding_mode_flag: u(1) + bit_buffer.WriteBits(pps.entropy_coding_mode_flag, 1); + // bottom_field_pic_order_in_frame_present_flag: u(1) + bit_buffer.WriteBits(pps.bottom_field_pic_order_in_frame_present_flag ? 1 : 0, + 1); + // num_slice_groups_minus1: ue(v) + RTC_CHECK_GT(num_slice_groups, 0); + bit_buffer.WriteExponentialGolomb(num_slice_groups - 1); + + if (num_slice_groups > 1) { + // slice_group_map_type: ue(v) + bit_buffer.WriteExponentialGolomb(slice_group_map_type); + switch (slice_group_map_type) { + case 0: + for (int i = 0; i < num_slice_groups; ++i) { + // run_length_minus1[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + break; + case 2: + for (int i = 0; i < num_slice_groups; ++i) { + // top_left[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // bottom_right[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + break; + case 3: + case 4: + case 5: + // slice_group_change_direction_flag: u(1) + bit_buffer.WriteBits(kIgnored, 1); + // slice_group_change_rate_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + break; + case 6: { + bit_buffer.WriteExponentialGolomb(pic_size_in_map_units - 1); + + uint32_t slice_group_id_bits = 0; + // If num_slice_groups is not a power of two an additional bit is + // required + // to account for the ceil() of log2() below. + if ((num_slice_groups & (num_slice_groups - 1)) != 0) + ++slice_group_id_bits; + while (num_slice_groups > 0) { + num_slice_groups >>= 1; + ++slice_group_id_bits; + } + + for (int i = 0; i < pic_size_in_map_units; ++i) { + // slice_group_id[i]: u(v) + // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + bit_buffer.WriteBits(kIgnored, slice_group_id_bits); + } + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + } + + // num_ref_idx_l0_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // num_ref_idx_l1_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // weighted_pred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_pred_flag ? 1 : 0, 1); + // weighted_bipred_idc: u(2) + bit_buffer.WriteBits(pps.weighted_bipred_idc, 2); + + // pic_init_qp_minus26: se(v) + bit_buffer.WriteSignedExponentialGolomb(pps.pic_init_qp_minus26); + // pic_init_qs_minus26: se(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // chroma_qp_index_offset: se(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + bit_buffer.WriteBits(kIgnored, 2); + // redundant_pic_cnt_present_flag: u(1) + bit_buffer.WriteBits(pps.redundant_pic_cnt_present_flag, 1); + + size_t byte_offset; + size_t bit_offset; + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + bit_buffer.WriteBits(0, 8 - bit_offset); + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + } + + H264::WriteRbsp(data, byte_offset, out_buffer); +} + +class PpsParserTest : public ::testing::Test { + public: + PpsParserTest() {} + ~PpsParserTest() override {} + + void RunTest() { + VerifyParsing(generated_pps_, 0, 1, 0); + const int kMaxSliceGroups = 17; // Arbitrarily large. + const int kMaxMapType = 6; + int slice_group_bits = 0; + for (int slice_group = 2; slice_group < kMaxSliceGroups; ++slice_group) { + if ((slice_group & (slice_group - 1)) == 0) { + // Slice group at a new power of two - increase slice_group_bits. + ++slice_group_bits; + } + for (int map_type = 0; map_type <= kMaxMapType; ++map_type) { + if (map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + continue; + } else if (map_type == 6) { + int max_pic_size = 1 << slice_group_bits; + for (int pic_size = 1; pic_size < max_pic_size; ++pic_size) + VerifyParsing(generated_pps_, map_type, slice_group, pic_size); + } else { + VerifyParsing(generated_pps_, map_type, slice_group, 0); + } + } + } + } + + void VerifyParsing(const PpsParser::PpsState& pps, + int slice_group_map_type, + int num_slice_groups, + int pic_size_in_map_units) { + buffer_.Clear(); + WritePps(pps, slice_group_map_type, num_slice_groups, pic_size_in_map_units, + &buffer_); + parsed_pps_ = PpsParser::ParsePps(buffer_.data(), buffer_.size()); + ASSERT_TRUE(parsed_pps_); + EXPECT_EQ(pps.bottom_field_pic_order_in_frame_present_flag, + parsed_pps_->bottom_field_pic_order_in_frame_present_flag); + EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag); + EXPECT_EQ(pps.weighted_bipred_idc, parsed_pps_->weighted_bipred_idc); + EXPECT_EQ(pps.entropy_coding_mode_flag, + parsed_pps_->entropy_coding_mode_flag); + EXPECT_EQ(pps.redundant_pic_cnt_present_flag, + parsed_pps_->redundant_pic_cnt_present_flag); + EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26); + EXPECT_EQ(pps.id, parsed_pps_->id); + EXPECT_EQ(pps.sps_id, parsed_pps_->sps_id); + } + + PpsParser::PpsState generated_pps_; + rtc::Buffer buffer_; + absl::optional<PpsParser::PpsState> parsed_pps_; +}; + +TEST_F(PpsParserTest, ZeroPps) { + RunTest(); +} + +TEST_F(PpsParserTest, MaxPps) { + generated_pps_.bottom_field_pic_order_in_frame_present_flag = true; + generated_pps_.pic_init_qp_minus26 = 25; + generated_pps_.redundant_pic_cnt_present_flag = 1; // 1 bit value. + generated_pps_.weighted_bipred_idc = (1 << 2) - 1; // 2 bit value. + generated_pps_.weighted_pred_flag = true; + generated_pps_.entropy_coding_mode_flag = true; + generated_pps_.id = 2; + generated_pps_.sps_id = 1; + RunTest(); + + generated_pps_.pic_init_qp_minus26 = -25; + RunTest(); +} + +TEST_F(PpsParserTest, PpsIdFromSlice) { + absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice( + kH264BitstreamChunk, sizeof(kH264BitstreamChunk)); + ASSERT_TRUE(pps_id); + EXPECT_EQ(2u, *pps_id); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_parser.cc b/third_party/libwebrtc/common_video/h264/sps_parser.cc new file mode 100644 index 0000000000..cfb0f24ff2 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_parser.h" + +#include <cstdint> +#include <vector> + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" + +namespace { +constexpr int kScalingDeltaMin = -128; +constexpr int kScaldingDeltaMax = 127; +} // namespace + +namespace webrtc { + +SpsParser::SpsState::SpsState() = default; +SpsParser::SpsState::SpsState(const SpsState&) = default; +SpsParser::SpsState::~SpsState() = default; + +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data, + size_t length) { + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + return ParseSpsUpToVui(reader); +} + +absl::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui( + BitstreamReader& reader) { + // Now, we need to use a bitstream reader to parse through the actual AVC SPS + // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the + // H.264 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + + SpsState sps; + + // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is + // 0. It defaults to 1, when not specified. + uint32_t chroma_format_idc = 1; + + // profile_idc: u(8). We need it to determine if we need to read/skip chroma + // formats. + uint8_t profile_idc = reader.Read<uint8_t>(); + // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits + // 1 bit each for the flags + 2 bits + 8 bits for level_idc = 16 bits. + reader.ConsumeBits(16); + // seq_parameter_set_id: ue(v) + sps.id = reader.ReadExponentialGolomb(); + sps.separate_colour_plane_flag = 0; + // See if profile_idc has chroma format information. + if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || + profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || + profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || + profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { + // chroma_format_idc: ue(v) + chroma_format_idc = reader.ReadExponentialGolomb(); + if (chroma_format_idc == 3) { + // separate_colour_plane_flag: u(1) + sps.separate_colour_plane_flag = reader.ReadBit(); + } + // bit_depth_luma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // bit_depth_chroma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // qpprime_y_zero_transform_bypass_flag: u(1) + reader.ConsumeBits(1); + // seq_scaling_matrix_present_flag: u(1) + if (reader.Read<bool>()) { + // Process the scaling lists just enough to be able to properly + // skip over them, so we can still read the resolution on streams + // where this is included. + int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8); + for (int i = 0; i < scaling_list_count; ++i) { + // seq_scaling_list_present_flag[i] : u(1) + if (reader.Read<bool>()) { + int last_scale = 8; + int next_scale = 8; + int size_of_scaling_list = i < 6 ? 16 : 64; + for (int j = 0; j < size_of_scaling_list; j++) { + if (next_scale != 0) { + // delta_scale: se(v) + int delta_scale = reader.ReadSignedExponentialGolomb(); + if (!reader.Ok() || delta_scale < kScalingDeltaMin || + delta_scale > kScaldingDeltaMax) { + return absl::nullopt; + } + next_scale = (last_scale + delta_scale + 256) % 256; + } + if (next_scale != 0) + last_scale = next_scale; + } + } + } + } + } + // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with + // BitstreamReader::ReadBits, which can read at most 64 bits at a time. We + // also have to avoid overflow when adding 4 to the on-wire golomb value, + // e.g., for evil input data, ReadExponentialGolomb might return 0xfffc. + const uint32_t kMaxLog2Minus4 = 32 - 4; + + // log2_max_frame_num_minus4: ue(v) + uint32_t log2_max_frame_num_minus4 = reader.ReadExponentialGolomb(); + if (!reader.Ok() || log2_max_frame_num_minus4 > kMaxLog2Minus4) { + return absl::nullopt; + } + sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4; + + // pic_order_cnt_type: ue(v) + sps.pic_order_cnt_type = reader.ReadExponentialGolomb(); + if (sps.pic_order_cnt_type == 0) { + // log2_max_pic_order_cnt_lsb_minus4: ue(v) + uint32_t log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); + if (!reader.Ok() || log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) { + return absl::nullopt; + } + sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4; + } else if (sps.pic_order_cnt_type == 1) { + // delta_pic_order_always_zero_flag: u(1) + sps.delta_pic_order_always_zero_flag = reader.ReadBit(); + // offset_for_non_ref_pic: se(v) + reader.ReadExponentialGolomb(); + // offset_for_top_to_bottom_field: se(v) + reader.ReadExponentialGolomb(); + // num_ref_frames_in_pic_order_cnt_cycle: ue(v) + uint32_t num_ref_frames_in_pic_order_cnt_cycle = + reader.ReadExponentialGolomb(); + for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { + // offset_for_ref_frame[i]: se(v) + reader.ReadExponentialGolomb(); + if (!reader.Ok()) { + return absl::nullopt; + } + } + } + // max_num_ref_frames: ue(v) + sps.max_num_ref_frames = reader.ReadExponentialGolomb(); + // gaps_in_frame_num_value_allowed_flag: u(1) + reader.ConsumeBits(1); + // + // IMPORTANT ONES! Now we're getting to resolution. First we read the pic + // width/height in macroblocks (16x16), which gives us the base resolution, + // and then we continue on until we hit the frame crop offsets, which are used + // to signify resolutions that aren't multiples of 16. + // + // pic_width_in_mbs_minus1: ue(v) + sps.width = 16 * (reader.ReadExponentialGolomb() + 1); + // pic_height_in_map_units_minus1: ue(v) + uint32_t pic_height_in_map_units_minus1 = reader.ReadExponentialGolomb(); + // frame_mbs_only_flag: u(1) + sps.frame_mbs_only_flag = reader.ReadBit(); + if (!sps.frame_mbs_only_flag) { + // mb_adaptive_frame_field_flag: u(1) + reader.ConsumeBits(1); + } + sps.height = + 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); + // direct_8x8_inference_flag: u(1) + reader.ConsumeBits(1); + // + // MORE IMPORTANT ONES! Now we're at the frame crop information. + // + uint32_t frame_crop_left_offset = 0; + uint32_t frame_crop_right_offset = 0; + uint32_t frame_crop_top_offset = 0; + uint32_t frame_crop_bottom_offset = 0; + // frame_cropping_flag: u(1) + if (reader.Read<bool>()) { + // frame_crop_{left, right, top, bottom}_offset: ue(v) + frame_crop_left_offset = reader.ReadExponentialGolomb(); + frame_crop_right_offset = reader.ReadExponentialGolomb(); + frame_crop_top_offset = reader.ReadExponentialGolomb(); + frame_crop_bottom_offset = reader.ReadExponentialGolomb(); + } + // vui_parameters_present_flag: u(1) + sps.vui_params_present = reader.ReadBit(); + + // Far enough! We don't use the rest of the SPS. + if (!reader.Ok()) { + return absl::nullopt; + } + + // Figure out the crop units in pixels. That's based on the chroma format's + // sampling, which is indicated by chroma_format_idc. + if (sps.separate_colour_plane_flag || chroma_format_idc == 0) { + frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag); + frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag); + } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) { + // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). + if (chroma_format_idc == 1 || chroma_format_idc == 2) { + frame_crop_left_offset *= 2; + frame_crop_right_offset *= 2; + } + // Height multipliers for format 1 (4:2:0). + if (chroma_format_idc == 1) { + frame_crop_top_offset *= 2; + frame_crop_bottom_offset *= 2; + } + } + // Subtract the crop for each dimension. + sps.width -= (frame_crop_left_offset + frame_crop_right_offset); + sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset); + + return sps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_parser.h b/third_party/libwebrtc/common_video/h264/sps_parser.h new file mode 100644 index 0000000000..da328b48b0 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H264_SPS_PARSER_H_ +#define COMMON_VIDEO_H264_SPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// A class for parsing out sequence parameter set (SPS) data from an H264 NALU. +class SpsParser { + public: + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState(); + SpsState(const SpsState&); + ~SpsState(); + + uint32_t width = 0; + uint32_t height = 0; + uint32_t delta_pic_order_always_zero_flag = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t frame_mbs_only_flag = 0; + uint32_t log2_max_frame_num = 4; // Smallest valid value. + uint32_t log2_max_pic_order_cnt_lsb = 4; // Smallest valid value. + uint32_t pic_order_cnt_type = 0; + uint32_t max_num_ref_frames = 0; + uint32_t vui_params_present = 0; + uint32_t id = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static absl::optional<SpsState> ParseSps(const uint8_t* data, size_t length); + + protected: + // Parse the SPS state, up till the VUI part, for a buffer where RBSP + // decoding has already been performed. + static absl::optional<SpsState> ParseSpsUpToVui(BitstreamReader& reader); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H264_SPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h264/sps_parser_unittest.cc b/third_party/libwebrtc/common_video/h264/sps_parser_unittest.cc new file mode 100644 index 0000000000..9e210c65d8 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser_unittest.cc @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_parser.h" + +#include "common_video/h264/h264_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +// Example SPS can be generated with ffmpeg. Here's an example set of commands, +// runnable on OS X: +// 1) Generate a video, from the camera: +// ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov +// +// 2) Scale the video to the desired size: +// ffmpeg -i camera.mov -vf scale=640x360 scaled.mov +// +// 3) Get just the H.264 bitstream in AnnexB: +// ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264 +// +// 4) Open out.h264 and find the SPS, generally everything between the first +// two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67, +// which should be stripped out before being passed to the parser. + +static const size_t kSpsBufferMaxSize = 256; + +// Generates a fake SPS with basically everything empty but the width/height. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +void GenerateFakeSps(uint16_t width, + uint16_t height, + int id, + uint32_t log2_max_frame_num_minus4, + uint32_t log2_max_pic_order_cnt_lsb_minus4, + rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // Profile byte. + writer.WriteUInt8(0); + // Constraint sets and reserved zero bits. + writer.WriteUInt8(0); + // level_idc. + writer.WriteUInt8(0x3u); + // seq_paramter_set_id. + writer.WriteExponentialGolomb(id); + // Profile is not special, so we skip all the chroma format settings. + + // Now some bit magic. + // log2_max_frame_num_minus4: ue(v). + writer.WriteExponentialGolomb(log2_max_frame_num_minus4); + // pic_order_cnt_type: ue(v). 0 is the type we want. + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(log2_max_pic_order_cnt_lsb_minus4); + // max_num_ref_frames: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + // gaps_in_frame_num_value_allowed_flag: u(1). + writer.WriteBits(0, 1); + // Next are width/height. First, calculate the mbs/map_units versions. + uint16_t width_in_mbs_minus1 = (width + 15) / 16 - 1; + + // For the height, we're going to define frame_mbs_only_flag, so we need to + // divide by 2. See the parser for the full calculation. + uint16_t height_in_map_units_minus1 = ((height + 15) / 16 - 1) / 2; + // Write each as ue(v). + writer.WriteExponentialGolomb(width_in_mbs_minus1); + writer.WriteExponentialGolomb(height_in_map_units_minus1); + // frame_mbs_only_flag: u(1). Needs to be false. + writer.WriteBits(0, 1); + // mb_adaptive_frame_field_flag: u(1). + writer.WriteBits(0, 1); + // direct_8x8_inferene_flag: u(1). + writer.WriteBits(0, 1); + // frame_cropping_flag: u(1). 1, so we can supply crop. + writer.WriteBits(1, 1); + // Now we write the left/right/top/bottom crop. For simplicity, we'll put all + // the crop at the left/top. + // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values. + // Left/right. + writer.WriteExponentialGolomb(((16 - (width % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + // Top/bottom. + writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + + // vui_parameters_present_flag: u(1) + writer.WriteBits(0, 1); + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + out_buffer->Clear(); + H264::WriteRbsp(rbsp, byte_count, out_buffer); +} + +TEST(H264SpsParserTest, TestSampleSPSHdLandscape) { + // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains + // emulation bytes but no cropping. + const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, + 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, + 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(1280u, sps->width); + EXPECT_EQ(720u, sps->height); +} + +TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) { + // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation + // bytes and cropping (360 isn't divisible by 16). + const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F, + 0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80, + 0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(640u, sps->width); + EXPECT_EQ(360u, sps->height); +} + +TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) { + // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and + // veritcal crop (neither dimension is divisible by 16). + const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E, + 0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00, + 0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(200u, sps->width); + EXPECT_EQ(400u, sps->height); +} + +TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); +} + +TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) { + rtc::Buffer buffer; + GenerateFakeSps(156u, 122u, 2, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(156u, sps->width); + EXPECT_EQ(122u, sps->height); + EXPECT_EQ(2u, sps->id); +} + +TEST(H264SpsParserTest, TestSampleSPSWithScalingLists) { + // SPS from a 1920x1080 video. Contains scaling lists (and vertical cropping). + const uint8_t buffer[] = {0x64, 0x00, 0x2a, 0xad, 0x84, 0x01, 0x0c, 0x20, + 0x08, 0x61, 0x00, 0x43, 0x08, 0x02, 0x18, 0x40, + 0x10, 0xc2, 0x00, 0x84, 0x3b, 0x50, 0x3c, 0x01, + 0x13, 0xf2, 0xcd, 0xc0, 0x40, 0x40, 0x50, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0xe8, 0x40}; + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(1920u, sps->width); + EXPECT_EQ(1080u, sps->height); +} + +TEST(H264SpsParserTest, TestLog2MaxFrameNumMinus4) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(4u, sps->log2_max_frame_num); + + GenerateFakeSps(320u, 180u, 1, 28, 0, &buffer); + sps = SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(32u, sps->log2_max_frame_num); + + GenerateFakeSps(320u, 180u, 1, 29, 0, &buffer); + EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size())); +} + +TEST(H264SpsParserTest, TestLog2MaxPicOrderCntMinus4) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); + absl::optional<SpsParser::SpsState> sps = + SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(4u, sps->log2_max_pic_order_cnt_lsb); + + GenerateFakeSps(320u, 180u, 1, 0, 28, &buffer); + EXPECT_TRUE(static_cast<bool>( + sps = SpsParser::ParseSps(buffer.data(), buffer.size()))); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->id); + EXPECT_EQ(32u, sps->log2_max_pic_order_cnt_lsb); + + GenerateFakeSps(320u, 180u, 1, 0, 29, &buffer); + EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc new file mode 100644 index 0000000000..117e92a1e5 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.cc @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include "common_video/h264/sps_vui_rewriter.h" + +#include <string.h> + +#include <algorithm> +#include <cstdint> +#include <vector> + +#include "api/video/color_space.h" +#include "common_video/h264/h264_common.h" +#include "common_video/h264/sps_parser.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// The maximum expected growth from adding a VUI to the SPS. It's actually +// closer to 24 or so, but better safe than sorry. +const size_t kMaxVuiSpsIncrease = 64; + +const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid"; +enum SpsValidEvent { + kReceivedSpsVuiOk = 1, + kReceivedSpsRewritten = 2, + kReceivedSpsParseFailure = 3, + kSentSpsPocOk = 4, + kSentSpsVuiOk = 5, + kSentSpsRewritten = 6, + kSentSpsParseFailure = 7, + kSpsRewrittenMax = 8 +}; + +#define RETURN_FALSE_ON_FAIL(x) \ + do { \ + if (!(x)) { \ + RTC_LOG_F(LS_ERROR) << " (line:" << __LINE__ << ") FAILED: " #x; \ + return false; \ + } \ + } while (0) + +uint8_t CopyUInt8(BitstreamReader& source, rtc::BitBufferWriter& destination) { + uint8_t tmp = source.Read<uint8_t>(); + if (!destination.WriteUInt8(tmp)) { + source.Invalidate(); + } + return tmp; +} + +uint32_t CopyExpGolomb(BitstreamReader& source, + rtc::BitBufferWriter& destination) { + uint32_t tmp = source.ReadExponentialGolomb(); + if (!destination.WriteExponentialGolomb(tmp)) { + source.Invalidate(); + } + return tmp; +} + +uint32_t CopyBits(int bits, + BitstreamReader& source, + rtc::BitBufferWriter& destination) { + RTC_DCHECK_GT(bits, 0); + RTC_DCHECK_LE(bits, 32); + uint64_t tmp = source.ReadBits(bits); + if (!destination.WriteBits(tmp, bits)) { + source.Invalidate(); + } + return tmp; +} + +bool CopyAndRewriteVui(const SpsParser::SpsState& sps, + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const webrtc::ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten); + +void CopyHrdParameters(BitstreamReader& source, + rtc::BitBufferWriter& destination); +bool AddBitstreamRestriction(rtc::BitBufferWriter* destination, + uint32_t max_num_ref_frames); +bool IsDefaultColorSpace(const ColorSpace& color_space); +bool AddVideoSignalTypeInfo(rtc::BitBufferWriter& destination, + const ColorSpace& color_space); +bool CopyOrRewriteVideoSignalTypeInfo( + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten); +bool CopyRemainingBits(BitstreamReader& source, + rtc::BitBufferWriter& destination); +} // namespace + +void SpsVuiRewriter::UpdateStats(ParseResult result, Direction direction) { + switch (result) { + case SpsVuiRewriter::ParseResult::kVuiRewritten: + RTC_HISTOGRAM_ENUMERATION( + kSpsValidHistogramName, + direction == SpsVuiRewriter::Direction::kIncoming + ? SpsValidEvent::kReceivedSpsRewritten + : SpsValidEvent::kSentSpsRewritten, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kVuiOk: + RTC_HISTOGRAM_ENUMERATION( + kSpsValidHistogramName, + direction == SpsVuiRewriter::Direction::kIncoming + ? SpsValidEvent::kReceivedSpsVuiOk + : SpsValidEvent::kSentSpsVuiOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kFailure: + RTC_HISTOGRAM_ENUMERATION( + kSpsValidHistogramName, + direction == SpsVuiRewriter::Direction::kIncoming + ? SpsValidEvent::kReceivedSpsParseFailure + : SpsValidEvent::kSentSpsParseFailure, + SpsValidEvent::kSpsRewrittenMax); + break; + } +} + +SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const webrtc::ColorSpace* color_space, + rtc::Buffer* destination) { + // Create temporary RBSP decoded buffer of the payload (exlcuding the + // leading nalu type header byte (the SpsParser uses only the payload). + std::vector<uint8_t> rbsp_buffer = H264::ParseRbsp(buffer, length); + BitstreamReader source_buffer(rbsp_buffer); + absl::optional<SpsParser::SpsState> sps_state = + SpsParser::ParseSpsUpToVui(source_buffer); + if (!sps_state) + return ParseResult::kFailure; + + *sps = sps_state; + + // We're going to completely muck up alignment, so we need a BitBufferWriter + // to write with. + rtc::Buffer out_buffer(length + kMaxVuiSpsIncrease); + rtc::BitBufferWriter sps_writer(out_buffer.data(), out_buffer.size()); + + // Check how far the SpsParser has read, and copy that data in bulk. + RTC_DCHECK(source_buffer.Ok()); + size_t total_bit_offset = + rbsp_buffer.size() * 8 - source_buffer.RemainingBitCount(); + size_t byte_offset = total_bit_offset / 8; + size_t bit_offset = total_bit_offset % 8; + memcpy(out_buffer.data(), rbsp_buffer.data(), + byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits. + + // SpsParser will have read the vui_params_present flag, which we want to + // modify, so back off a bit; + if (bit_offset == 0) { + --byte_offset; + bit_offset = 7; + } else { + --bit_offset; + } + sps_writer.Seek(byte_offset, bit_offset); + + ParseResult vui_updated; + if (!CopyAndRewriteVui(*sps_state, source_buffer, sps_writer, color_space, + vui_updated)) { + RTC_LOG(LS_ERROR) << "Failed to parse/copy SPS VUI."; + return ParseResult::kFailure; + } + + if (vui_updated == ParseResult::kVuiOk) { + // No update necessary after all, just return. + return vui_updated; + } + + if (!CopyRemainingBits(source_buffer, sps_writer)) { + RTC_LOG(LS_ERROR) << "Failed to parse/copy SPS VUI."; + return ParseResult::kFailure; + } + + // Pad up to next byte with zero bits. + sps_writer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + sps_writer.WriteBits(0, 8 - bit_offset); + ++byte_offset; + bit_offset = 0; + } + + RTC_DCHECK(byte_offset <= length + kMaxVuiSpsIncrease); + RTC_CHECK(destination != nullptr); + + out_buffer.SetSize(byte_offset); + + // Write updates SPS to destination with added RBSP + H264::WriteRbsp(out_buffer.data(), out_buffer.size(), destination); + + return ParseResult::kVuiRewritten; +} + +SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const webrtc::ColorSpace* color_space, + rtc::Buffer* destination, + Direction direction) { + ParseResult result = + ParseAndRewriteSps(buffer, length, sps, color_space, destination); + UpdateStats(result, direction); + return result; +} + +rtc::Buffer SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite( + rtc::ArrayView<const uint8_t> buffer, + const webrtc::ColorSpace* color_space) { + std::vector<H264::NaluIndex> nalus = + H264::FindNaluIndices(buffer.data(), buffer.size()); + + // Allocate some extra space for potentially adding a missing VUI. + rtc::Buffer output_buffer(/*size=*/0, /*capacity=*/buffer.size() + + nalus.size() * kMaxVuiSpsIncrease); + + for (const H264::NaluIndex& nalu : nalus) { + // Copy NAL unit start code. + const uint8_t* start_code_ptr = buffer.data() + nalu.start_offset; + const size_t start_code_length = + nalu.payload_start_offset - nalu.start_offset; + const uint8_t* nalu_ptr = buffer.data() + nalu.payload_start_offset; + const size_t nalu_length = nalu.payload_size; + + if (H264::ParseNaluType(nalu_ptr[0]) == H264::NaluType::kSps) { + // Check if stream uses picture order count type 0, and if so rewrite it + // to enable faster decoding. Streams in that format incur additional + // delay because it allows decode order to differ from render order. + // The mechanism used is to rewrite (edit or add) the SPS's VUI to contain + // restrictions on the maximum number of reordered pictures. This reduces + // latency significantly, though it still adds about a frame of latency to + // decoding. + // Note that we do this rewriting both here (send side, in order to + // protect legacy receive clients) in RtpDepacketizerH264::ParseSingleNalu + // (receive side, in orderer to protect us from unknown or legacy send + // clients). + absl::optional<SpsParser::SpsState> sps; + rtc::Buffer output_nalu; + + // Add the type header to the output buffer first, so that the rewriter + // can append modified payload on top of that. + output_nalu.AppendData(nalu_ptr[0]); + + ParseResult result = ParseAndRewriteSps( + nalu_ptr + H264::kNaluTypeSize, nalu_length - H264::kNaluTypeSize, + &sps, color_space, &output_nalu, Direction::kOutgoing); + if (result == ParseResult::kVuiRewritten) { + output_buffer.AppendData(start_code_ptr, start_code_length); + output_buffer.AppendData(output_nalu.data(), output_nalu.size()); + continue; + } + } else if (H264::ParseNaluType(nalu_ptr[0]) == H264::NaluType::kAud) { + // Skip the access unit delimiter copy. + continue; + } + + // vui wasn't rewritten and it is not aud, copy the nal unit as is. + output_buffer.AppendData(start_code_ptr, start_code_length); + output_buffer.AppendData(nalu_ptr, nalu_length); + } + return output_buffer; +} + +namespace { +bool CopyAndRewriteVui(const SpsParser::SpsState& sps, + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const webrtc::ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten) { + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiOk; + + // + // vui_parameters_present_flag: u(1) + // + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + + // ********* IMPORTANT! ********** + // Now we're at the VUI, so we want to (1) add it if it isn't present, and + // (2) rewrite frame reordering values so no reordering is allowed. + if (!sps.vui_params_present) { + // Write a simple VUI with the parameters we want and 0 for all other flags. + + // aspect_ratio_info_present_flag, overscan_info_present_flag. Both u(1). + RETURN_FALSE_ON_FAIL(destination.WriteBits(0, 2)); + + uint32_t video_signal_type_present_flag = + (color_space && !IsDefaultColorSpace(*color_space)) ? 1 : 0; + RETURN_FALSE_ON_FAIL( + destination.WriteBits(video_signal_type_present_flag, 1)); + if (video_signal_type_present_flag) { + RETURN_FALSE_ON_FAIL(AddVideoSignalTypeInfo(destination, *color_space)); + } + // chroma_loc_info_present_flag, timing_info_present_flag, + // nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag, + // pic_struct_present_flag, All u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits(0, 5)); + // bitstream_restriction_flag: u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + RETURN_FALSE_ON_FAIL( + AddBitstreamRestriction(&destination, sps.max_num_ref_frames)); + + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } else { + // Parse out the full VUI. + // aspect_ratio_info_present_flag: u(1) + uint32_t aspect_ratio_info_present_flag = CopyBits(1, source, destination); + if (aspect_ratio_info_present_flag) { + // aspect_ratio_idc: u(8) + uint8_t aspect_ratio_idc = CopyUInt8(source, destination); + if (aspect_ratio_idc == 255u) { // Extended_SAR + // sar_width/sar_height: u(16) each. + CopyBits(32, source, destination); + } + } + // overscan_info_present_flag: u(1) + uint32_t overscan_info_present_flag = CopyBits(1, source, destination); + if (overscan_info_present_flag) { + // overscan_appropriate_flag: u(1) + CopyBits(1, source, destination); + } + + CopyOrRewriteVideoSignalTypeInfo(source, destination, color_space, + out_vui_rewritten); + + // chroma_loc_info_present_flag: u(1) + uint32_t chroma_loc_info_present_flag = CopyBits(1, source, destination); + if (chroma_loc_info_present_flag == 1) { + // chroma_sample_loc_type_(top|bottom)_field: ue(v) each. + CopyExpGolomb(source, destination); + CopyExpGolomb(source, destination); + } + // timing_info_present_flag: u(1) + uint32_t timing_info_present_flag = CopyBits(1, source, destination); + if (timing_info_present_flag == 1) { + // num_units_in_tick, time_scale: u(32) each + CopyBits(32, source, destination); + CopyBits(32, source, destination); + // fixed_frame_rate_flag: u(1) + CopyBits(1, source, destination); + } + // nal_hrd_parameters_present_flag: u(1) + uint32_t nal_hrd_parameters_present_flag = CopyBits(1, source, destination); + if (nal_hrd_parameters_present_flag == 1) { + CopyHrdParameters(source, destination); + } + // vcl_hrd_parameters_present_flag: u(1) + uint32_t vcl_hrd_parameters_present_flag = CopyBits(1, source, destination); + if (vcl_hrd_parameters_present_flag == 1) { + CopyHrdParameters(source, destination); + } + if (nal_hrd_parameters_present_flag == 1 || + vcl_hrd_parameters_present_flag == 1) { + // low_delay_hrd_flag: u(1) + CopyBits(1, source, destination); + } + // pic_struct_present_flag: u(1) + CopyBits(1, source, destination); + + // bitstream_restriction_flag: u(1) + uint32_t bitstream_restriction_flag = source.ReadBit(); + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + if (bitstream_restriction_flag == 0) { + // We're adding one from scratch. + RETURN_FALSE_ON_FAIL( + AddBitstreamRestriction(&destination, sps.max_num_ref_frames)); + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } else { + // We're replacing. + // motion_vectors_over_pic_boundaries_flag: u(1) + CopyBits(1, source, destination); + // max_bytes_per_pic_denom: ue(v) + CopyExpGolomb(source, destination); + // max_bits_per_mb_denom: ue(v) + CopyExpGolomb(source, destination); + // log2_max_mv_length_horizontal: ue(v) + CopyExpGolomb(source, destination); + // log2_max_mv_length_vertical: ue(v) + CopyExpGolomb(source, destination); + // ********* IMPORTANT! ********** + // The next two are the ones we need to set to low numbers: + // max_num_reorder_frames: ue(v) + // max_dec_frame_buffering: ue(v) + // However, if they are already set to no greater than the numbers we + // want, then we don't need to be rewriting. + uint32_t max_num_reorder_frames = source.ReadExponentialGolomb(); + uint32_t max_dec_frame_buffering = source.ReadExponentialGolomb(); + RETURN_FALSE_ON_FAIL(destination.WriteExponentialGolomb(0)); + RETURN_FALSE_ON_FAIL( + destination.WriteExponentialGolomb(sps.max_num_ref_frames)); + if (max_num_reorder_frames != 0 || + max_dec_frame_buffering > sps.max_num_ref_frames) { + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } + } + } + return source.Ok(); +} + +// Copies a VUI HRD parameters segment. +void CopyHrdParameters(BitstreamReader& source, + rtc::BitBufferWriter& destination) { + // cbp_cnt_minus1: ue(v) + uint32_t cbp_cnt_minus1 = CopyExpGolomb(source, destination); + // bit_rate_scale and cbp_size_scale: u(4) each + CopyBits(8, source, destination); + for (size_t i = 0; source.Ok() && i <= cbp_cnt_minus1; ++i) { + // bit_rate_value_minus1 and cbp_size_value_minus1: ue(v) each + CopyExpGolomb(source, destination); + CopyExpGolomb(source, destination); + // cbr_flag: u(1) + CopyBits(1, source, destination); + } + // initial_cbp_removal_delay_length_minus1: u(5) + // cbp_removal_delay_length_minus1: u(5) + // dbp_output_delay_length_minus1: u(5) + // time_offset_length: u(5) + CopyBits(5 * 4, source, destination); +} + +// These functions are similar to webrtc::H264SpsParser::Parse, and based on the +// same version of the H.264 standard. You can find it here: +// http://www.itu.int/rec/T-REC-H.264 + +// Adds a bitstream restriction VUI segment. +bool AddBitstreamRestriction(rtc::BitBufferWriter* destination, + uint32_t max_num_ref_frames) { + // motion_vectors_over_pic_boundaries_flag: u(1) + // Default is 1 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1)); + // max_bytes_per_pic_denom: ue(v) + // Default is 2 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(2)); + // max_bits_per_mb_denom: ue(v) + // Default is 1 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(1)); + // log2_max_mv_length_horizontal: ue(v) + // log2_max_mv_length_vertical: ue(v) + // Both default to 16 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16)); + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16)); + + // ********* IMPORTANT! ********** + // max_num_reorder_frames: ue(v) + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0)); + // max_dec_frame_buffering: ue(v) + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(max_num_ref_frames)); + return true; +} + +bool IsDefaultColorSpace(const ColorSpace& color_space) { + return color_space.range() != ColorSpace::RangeID::kFull && + color_space.primaries() == ColorSpace::PrimaryID::kUnspecified && + color_space.transfer() == ColorSpace::TransferID::kUnspecified && + color_space.matrix() == ColorSpace::MatrixID::kUnspecified; +} + +bool AddVideoSignalTypeInfo(rtc::BitBufferWriter& destination, + const ColorSpace& color_space) { + // video_format: u(3). + RETURN_FALSE_ON_FAIL(destination.WriteBits(5, 3)); // 5 = Unspecified + // video_full_range_flag: u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits( + color_space.range() == ColorSpace::RangeID::kFull ? 1 : 0, 1)); + // colour_description_present_flag: u(1) + RETURN_FALSE_ON_FAIL(destination.WriteBits(1, 1)); + // colour_primaries: u(8) + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(static_cast<uint8_t>(color_space.primaries()))); + // transfer_characteristics: u(8) + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(static_cast<uint8_t>(color_space.transfer()))); + // matrix_coefficients: u(8) + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(static_cast<uint8_t>(color_space.matrix()))); + return true; +} + +bool CopyOrRewriteVideoSignalTypeInfo( + BitstreamReader& source, + rtc::BitBufferWriter& destination, + const ColorSpace* color_space, + SpsVuiRewriter::ParseResult& out_vui_rewritten) { + // Read. + uint32_t video_format = 5; // H264 default: unspecified + uint32_t video_full_range_flag = 0; // H264 default: limited + uint32_t colour_description_present_flag = 0; + uint8_t colour_primaries = 3; // H264 default: unspecified + uint8_t transfer_characteristics = 3; // H264 default: unspecified + uint8_t matrix_coefficients = 3; // H264 default: unspecified + uint32_t video_signal_type_present_flag = source.ReadBit(); + if (video_signal_type_present_flag) { + video_format = source.ReadBits(3); + video_full_range_flag = source.ReadBit(); + colour_description_present_flag = source.ReadBit(); + if (colour_description_present_flag) { + colour_primaries = source.Read<uint8_t>(); + transfer_characteristics = source.Read<uint8_t>(); + matrix_coefficients = source.Read<uint8_t>(); + } + } + RETURN_FALSE_ON_FAIL(source.Ok()); + + // Update. + uint32_t video_signal_type_present_flag_override = + video_signal_type_present_flag; + uint32_t video_format_override = video_format; + uint32_t video_full_range_flag_override = video_full_range_flag; + uint32_t colour_description_present_flag_override = + colour_description_present_flag; + uint8_t colour_primaries_override = colour_primaries; + uint8_t transfer_characteristics_override = transfer_characteristics; + uint8_t matrix_coefficients_override = matrix_coefficients; + if (color_space) { + if (IsDefaultColorSpace(*color_space)) { + video_signal_type_present_flag_override = 0; + } else { + video_signal_type_present_flag_override = 1; + video_format_override = 5; // unspecified + + if (color_space->range() == ColorSpace::RangeID::kFull) { + video_full_range_flag_override = 1; + } else { + // ColorSpace::RangeID::kInvalid and kDerived are treated as limited. + video_full_range_flag_override = 0; + } + + colour_description_present_flag_override = + color_space->primaries() != ColorSpace::PrimaryID::kUnspecified || + color_space->transfer() != ColorSpace::TransferID::kUnspecified || + color_space->matrix() != ColorSpace::MatrixID::kUnspecified; + colour_primaries_override = + static_cast<uint8_t>(color_space->primaries()); + transfer_characteristics_override = + static_cast<uint8_t>(color_space->transfer()); + matrix_coefficients_override = + static_cast<uint8_t>(color_space->matrix()); + } + } + + // Write. + RETURN_FALSE_ON_FAIL( + destination.WriteBits(video_signal_type_present_flag_override, 1)); + if (video_signal_type_present_flag_override) { + RETURN_FALSE_ON_FAIL(destination.WriteBits(video_format_override, 3)); + RETURN_FALSE_ON_FAIL( + destination.WriteBits(video_full_range_flag_override, 1)); + RETURN_FALSE_ON_FAIL( + destination.WriteBits(colour_description_present_flag_override, 1)); + if (colour_description_present_flag_override) { + RETURN_FALSE_ON_FAIL(destination.WriteUInt8(colour_primaries_override)); + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(transfer_characteristics_override)); + RETURN_FALSE_ON_FAIL( + destination.WriteUInt8(matrix_coefficients_override)); + } + } + + if (video_signal_type_present_flag_override != + video_signal_type_present_flag || + video_format_override != video_format || + video_full_range_flag_override != video_full_range_flag || + colour_description_present_flag_override != + colour_description_present_flag || + colour_primaries_override != colour_primaries || + transfer_characteristics_override != transfer_characteristics || + matrix_coefficients_override != matrix_coefficients) { + out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + } + + return true; +} + +bool CopyRemainingBits(BitstreamReader& source, + rtc::BitBufferWriter& destination) { + // Try to get at least the destination aligned. + if (source.RemainingBitCount() > 0 && source.RemainingBitCount() % 8 != 0) { + size_t misaligned_bits = source.RemainingBitCount() % 8; + CopyBits(misaligned_bits, source, destination); + } + while (source.RemainingBitCount() > 0) { + int count = std::min(32, source.RemainingBitCount()); + CopyBits(count, source, destination); + } + // TODO(noahric): The last byte could be all zeroes now, which we should just + // strip. + return source.Ok(); +} + +} // namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.h b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.h new file mode 100644 index 0000000000..ef80d5b60e --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ +#define COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/types/optional.h" +#include "api/video/color_space.h" +#include "common_video/h264/sps_parser.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +// A class that can parse an SPS+VUI and if necessary creates a copy with +// updated parameters. +// The rewriter disables frame buffering. This should force decoders to deliver +// decoded frame immediately and, thus, reduce latency. +// The rewriter updates video signal type parameters if external parameters are +// provided. +class SpsVuiRewriter : private SpsParser { + public: + enum class ParseResult { kFailure, kVuiOk, kVuiRewritten }; + enum class Direction { kIncoming, kOutgoing }; + + // Parses an SPS block and if necessary copies it and rewrites the VUI. + // Returns kFailure on failure, kParseOk if parsing succeeded and no update + // was necessary and kParsedAndModified if an updated copy of buffer was + // written to destination. destination may be populated with some data even if + // no rewrite was necessary, but the end offset should remain unchanged. + // Unless parsing fails, the sps parameter will be populated with the parsed + // SPS state. This function assumes that any previous headers + // (NALU start, type, Stap-A, etc) have already been parsed and that RBSP + // decoding has been performed. + static ParseResult ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const ColorSpace* color_space, + rtc::Buffer* destination, + Direction Direction); + + // Parses NAL units from `buffer`, strips AUD blocks and rewrites VUI in SPS + // blocks if necessary. + static rtc::Buffer ParseOutgoingBitstreamAndRewrite( + rtc::ArrayView<const uint8_t> buffer, + const ColorSpace* color_space); + + private: + static ParseResult ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + absl::optional<SpsParser::SpsState>* sps, + const ColorSpace* color_space, + rtc::Buffer* destination); + + static void UpdateStats(ParseResult result, Direction direction); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ diff --git a/third_party/libwebrtc/common_video/h264/sps_vui_rewriter_unittest.cc b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter_unittest.cc new file mode 100644 index 0000000000..2907949e6c --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_vui_rewriter_unittest.cc @@ -0,0 +1,463 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_vui_rewriter.h" + +#include <cstdint> +#include <vector> + +#include "api/video/color_space.h" +#include "common_video/h264/h264_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +enum SpsMode { + kNoRewriteRequired_VuiOptimal, + kRewriteRequired_NoVui, + kRewriteRequired_NoBitstreamRestriction, + kRewriteRequired_VuiSuboptimal, +}; + +static const size_t kSpsBufferMaxSize = 256; +static const size_t kWidth = 640; +static const size_t kHeight = 480; + +static const uint8_t kStartSequence[] = {0x00, 0x00, 0x00, 0x01}; +static const uint8_t kAud[] = {H264::NaluType::kAud, 0x09, 0x10}; +static const uint8_t kSpsNaluType[] = {H264::NaluType::kSps}; +static const uint8_t kIdr1[] = {H264::NaluType::kIdr, 0xFF, 0x00, 0x00, 0x04}; +static const uint8_t kIdr2[] = {H264::NaluType::kIdr, 0xFF, 0x00, 0x11}; + +struct VuiHeader { + uint32_t vui_parameters_present_flag; + uint32_t bitstream_restriction_flag; + uint32_t max_num_reorder_frames; + uint32_t max_dec_frame_buffering; + uint32_t video_signal_type_present_flag; + uint32_t video_full_range_flag; + uint32_t colour_description_present_flag; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; +}; + +static const VuiHeader kVuiNotPresent = { + /* vui_parameters_present_flag= */ 0, + /* bitstream_restriction_flag= */ 0, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 0, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiNoBitstreamRestriction = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 0, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 0, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiNoFrameBuffering = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiFrameBuffering = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 3, + /* max_dec_frame_buffering= */ 3, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiNoVideoSignalType = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 0, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiLimitedRangeNoColourDescription = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 1, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiFullRangeNoColourDescription = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 1, + /* video_full_range_flag= */ 1, + /* colour_description_present_flag= */ 0, + /* colour_primaries= */ 0, + /* transfer_characteristics= */ 0, + /* matrix_coefficients= */ 0}; + +static const VuiHeader kVuiLimitedRangeBt709Color = { + /* vui_parameters_present_flag= */ 1, + /* bitstream_restriction_flag= */ 1, + /* max_num_reorder_frames= */ 0, + /* max_dec_frame_buffering= */ 1, + /* video_signal_type_present_flag= */ 1, + /* video_full_range_flag= */ 0, + /* colour_description_present_flag= */ 1, + /* colour_primaries= */ 1, + /* transfer_characteristics= */ 1, + /* matrix_coefficients= */ 1}; + +static const webrtc::ColorSpace kColorSpaceH264Default( + ColorSpace::PrimaryID::kUnspecified, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpacePrimariesBt709( + ColorSpace::PrimaryID::kBT709, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpaceTransferBt709( + ColorSpace::PrimaryID::kUnspecified, + ColorSpace::TransferID::kBT709, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpaceMatrixBt709( + ColorSpace::PrimaryID::kUnspecified, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kBT709, + ColorSpace::RangeID::kLimited); + +static const webrtc::ColorSpace kColorSpaceFullRange( + ColorSpace::PrimaryID::kBT709, + ColorSpace::TransferID::kUnspecified, + ColorSpace::MatrixID::kUnspecified, + ColorSpace::RangeID::kFull); + +static const webrtc::ColorSpace kColorSpaceBt709LimitedRange( + ColorSpace::PrimaryID::kBT709, + ColorSpace::TransferID::kBT709, + ColorSpace::MatrixID::kBT709, + ColorSpace::RangeID::kLimited); +} // namespace + +// Generates a fake SPS with basically everything empty and with characteristics +// based off SpsMode. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +void GenerateFakeSps(const VuiHeader& vui, rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // Profile byte. + writer.WriteUInt8(0); + // Constraint sets and reserved zero bits. + writer.WriteUInt8(0); + // level_idc. + writer.WriteUInt8(3); + // seq_paramter_set_id. + writer.WriteExponentialGolomb(0); + // Profile is not special, so we skip all the chroma format settings. + + // Now some bit magic. + // log2_max_frame_num_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + // pic_order_cnt_type: ue(v). + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + + // max_num_ref_frames: ue(v). Use 1, to make optimal/suboptimal more obvious. + writer.WriteExponentialGolomb(1); + // gaps_in_frame_num_value_allowed_flag: u(1). + writer.WriteBits(0, 1); + // Next are width/height. First, calculate the mbs/map_units versions. + uint16_t width_in_mbs_minus1 = (kWidth + 15) / 16 - 1; + + // For the height, we're going to define frame_mbs_only_flag, so we need to + // divide by 2. See the parser for the full calculation. + uint16_t height_in_map_units_minus1 = ((kHeight + 15) / 16 - 1) / 2; + // Write each as ue(v). + writer.WriteExponentialGolomb(width_in_mbs_minus1); + writer.WriteExponentialGolomb(height_in_map_units_minus1); + // frame_mbs_only_flag: u(1). Needs to be false. + writer.WriteBits(0, 1); + // mb_adaptive_frame_field_flag: u(1). + writer.WriteBits(0, 1); + // direct_8x8_inferene_flag: u(1). + writer.WriteBits(0, 1); + // frame_cropping_flag: u(1). 1, so we can supply crop. + writer.WriteBits(1, 1); + // Now we write the left/right/top/bottom crop. For simplicity, we'll put all + // the crop at the left/top. + // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values. + // Left/right. + writer.WriteExponentialGolomb(((16 - (kWidth % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + // Top/bottom. + writer.WriteExponentialGolomb(((16 - (kHeight % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + + // Finally! The VUI. + // vui_parameters_present_flag: u(1) + writer.WriteBits(vui.vui_parameters_present_flag, 1); + if (vui.vui_parameters_present_flag) { + // aspect_ratio_info_present_flag, overscan_info_present_flag. Both u(1). + writer.WriteBits(0, 2); + + writer.WriteBits(vui.video_signal_type_present_flag, 1); + if (vui.video_signal_type_present_flag) { + // video_format: u(3). 5 = Unspecified + writer.WriteBits(5, 3); + writer.WriteBits(vui.video_full_range_flag, 1); + writer.WriteBits(vui.colour_description_present_flag, 1); + if (vui.colour_description_present_flag) { + writer.WriteUInt8(vui.colour_primaries); + writer.WriteUInt8(vui.transfer_characteristics); + writer.WriteUInt8(vui.matrix_coefficients); + } + } + + // chroma_loc_info_present_flag, timing_info_present_flag, + // nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag, + // pic_struct_present_flag, All u(1) + writer.WriteBits(0, 5); + + writer.WriteBits(vui.bitstream_restriction_flag, 1); + if (vui.bitstream_restriction_flag) { + // Write some defaults. Shouldn't matter for parsing, though. + // motion_vectors_over_pic_boundaries_flag: u(1) + writer.WriteBits(1, 1); + // max_bytes_per_pic_denom: ue(v) + writer.WriteExponentialGolomb(2); + // max_bits_per_mb_denom: ue(v) + writer.WriteExponentialGolomb(1); + // log2_max_mv_length_horizontal: ue(v) + // log2_max_mv_length_vertical: ue(v) + writer.WriteExponentialGolomb(16); + writer.WriteExponentialGolomb(16); + + // Next are the limits we care about. + writer.WriteExponentialGolomb(vui.max_num_reorder_frames); + writer.WriteExponentialGolomb(vui.max_dec_frame_buffering); + } + } + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + H264::WriteRbsp(rbsp, byte_count, out_buffer); +} + +void TestSps(const VuiHeader& vui, + const ColorSpace* color_space, + SpsVuiRewriter::ParseResult expected_parse_result) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + rtc::Buffer original_sps; + GenerateFakeSps(vui, &original_sps); + + absl::optional<SpsParser::SpsState> sps; + rtc::Buffer rewritten_sps; + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + original_sps.data(), original_sps.size(), &sps, color_space, + &rewritten_sps, SpsVuiRewriter::Direction::kIncoming); + EXPECT_EQ(expected_parse_result, result); + ASSERT_TRUE(sps); + EXPECT_EQ(sps->width, kWidth); + EXPECT_EQ(sps->height, kHeight); + if (vui.vui_parameters_present_flag) { + EXPECT_EQ(sps->vui_params_present, 1u); + } + + if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) { + // Ensure that added/rewritten SPS is parsable. + rtc::Buffer tmp; + result = SpsVuiRewriter::ParseAndRewriteSps( + rewritten_sps.data(), rewritten_sps.size(), &sps, nullptr, &tmp, + SpsVuiRewriter::Direction::kIncoming); + EXPECT_EQ(SpsVuiRewriter::ParseResult::kVuiOk, result); + ASSERT_TRUE(sps); + EXPECT_EQ(sps->width, kWidth); + EXPECT_EQ(sps->height, kHeight); + EXPECT_EQ(sps->vui_params_present, 1u); + } +} + +class SpsVuiRewriterTest : public ::testing::Test, + public ::testing::WithParamInterface< + ::testing::tuple<VuiHeader, + const ColorSpace*, + SpsVuiRewriter::ParseResult>> { +}; + +TEST_P(SpsVuiRewriterTest, RewriteVui) { + VuiHeader vui = ::testing::get<0>(GetParam()); + const ColorSpace* color_space = ::testing::get<1>(GetParam()); + SpsVuiRewriter::ParseResult expected_parse_result = + ::testing::get<2>(GetParam()); + TestSps(vui, color_space, expected_parse_result); +} + +INSTANTIATE_TEST_SUITE_P( + All, + SpsVuiRewriterTest, + ::testing::Values( + std::make_tuple(kVuiNoFrameBuffering, + nullptr, + SpsVuiRewriter::ParseResult::kVuiOk), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpaceH264Default, + SpsVuiRewriter::ParseResult::kVuiOk), + std::make_tuple(kVuiLimitedRangeBt709Color, + &kColorSpaceBt709LimitedRange, + SpsVuiRewriter::ParseResult::kVuiOk), + std::make_tuple(kVuiNotPresent, + nullptr, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoBitstreamRestriction, + nullptr, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiFrameBuffering, + nullptr, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiLimitedRangeNoColourDescription, + &kColorSpaceFullRange, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpacePrimariesBt709, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpaceTransferBt709, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiNoVideoSignalType, + &kColorSpaceMatrixBt709, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiFullRangeNoColourDescription, + &kColorSpaceH264Default, + SpsVuiRewriter::ParseResult::kVuiRewritten), + std::make_tuple(kVuiLimitedRangeBt709Color, + &kColorSpaceH264Default, + SpsVuiRewriter::ParseResult::kVuiRewritten))); + +TEST(SpsVuiRewriterOutgoingVuiTest, ParseOutgoingBitstreamOptimalVui) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + rtc::Buffer optimal_sps; + GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps); + + rtc::Buffer buffer; + buffer.AppendData(kStartSequence); + buffer.AppendData(optimal_sps); + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr1); + + EXPECT_THAT(SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite(buffer, nullptr), + ::testing::ElementsAreArray(buffer)); +} + +TEST(SpsVuiRewriterOutgoingVuiTest, ParseOutgoingBitstreamNoVui) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + rtc::Buffer sps; + GenerateFakeSps(kVuiNotPresent, &sps); + + rtc::Buffer buffer; + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr1); + buffer.AppendData(kStartSequence); + buffer.AppendData(kSpsNaluType); + buffer.AppendData(sps); + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr2); + + rtc::Buffer optimal_sps; + GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps); + + rtc::Buffer expected_buffer; + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kIdr1); + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kSpsNaluType); + expected_buffer.AppendData(optimal_sps); + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kIdr2); + + EXPECT_THAT(SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite(buffer, nullptr), + ::testing::ElementsAreArray(expected_buffer)); +} + +TEST(SpsVuiRewriterOutgoingAudTest, ParseOutgoingBitstreamWithAud) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + rtc::Buffer optimal_sps; + GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps); + + rtc::Buffer buffer; + buffer.AppendData(kStartSequence); + buffer.AppendData(kAud); + buffer.AppendData(kStartSequence); + buffer.AppendData(optimal_sps); + buffer.AppendData(kStartSequence); + buffer.AppendData(kIdr1); + + rtc::Buffer expected_buffer; + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(optimal_sps); + expected_buffer.AppendData(kStartSequence); + expected_buffer.AppendData(kIdr1); + + EXPECT_THAT(SpsVuiRewriter::ParseOutgoingBitstreamAndRewrite(buffer, nullptr), + ::testing::ElementsAreArray(expected_buffer)); +} +} // namespace webrtc |