diff options
Diffstat (limited to 'third_party/libwebrtc/common_video/h264/sps_parser.cc')
-rw-r--r-- | third_party/libwebrtc/common_video/h264/sps_parser.cc | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/third_party/libwebrtc/common_video/h264/sps_parser.cc b/third_party/libwebrtc/common_video/h264/sps_parser.cc new file mode 100644 index 0000000000..cfb0f24ff2 --- /dev/null +++ b/third_party/libwebrtc/common_video/h264/sps_parser.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h264/sps_parser.h" + +#include <cstdint> +#include <vector> + +#include "common_video/h264/h264_common.h" +#include "rtc_base/bitstream_reader.h" + +namespace { +constexpr int kScalingDeltaMin = -128; +constexpr int kScaldingDeltaMax = 127; +} // namespace + +namespace webrtc { + +SpsParser::SpsState::SpsState() = default; +SpsParser::SpsState::SpsState(const SpsState&) = default; +SpsParser::SpsState::~SpsState() = default; + +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data, + size_t length) { + std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + return ParseSpsUpToVui(reader); +} + +absl::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui( + BitstreamReader& reader) { + // Now, we need to use a bitstream reader to parse through the actual AVC SPS + // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the + // H.264 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + + SpsState sps; + + // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is + // 0. It defaults to 1, when not specified. + uint32_t chroma_format_idc = 1; + + // profile_idc: u(8). We need it to determine if we need to read/skip chroma + // formats. + uint8_t profile_idc = reader.Read<uint8_t>(); + // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits + // 1 bit each for the flags + 2 bits + 8 bits for level_idc = 16 bits. + reader.ConsumeBits(16); + // seq_parameter_set_id: ue(v) + sps.id = reader.ReadExponentialGolomb(); + sps.separate_colour_plane_flag = 0; + // See if profile_idc has chroma format information. + if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || + profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || + profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || + profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { + // chroma_format_idc: ue(v) + chroma_format_idc = reader.ReadExponentialGolomb(); + if (chroma_format_idc == 3) { + // separate_colour_plane_flag: u(1) + sps.separate_colour_plane_flag = reader.ReadBit(); + } + // bit_depth_luma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // bit_depth_chroma_minus8: ue(v) + reader.ReadExponentialGolomb(); + // qpprime_y_zero_transform_bypass_flag: u(1) + reader.ConsumeBits(1); + // seq_scaling_matrix_present_flag: u(1) + if (reader.Read<bool>()) { + // Process the scaling lists just enough to be able to properly + // skip over them, so we can still read the resolution on streams + // where this is included. + int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8); + for (int i = 0; i < scaling_list_count; ++i) { + // seq_scaling_list_present_flag[i] : u(1) + if (reader.Read<bool>()) { + int last_scale = 8; + int next_scale = 8; + int size_of_scaling_list = i < 6 ? 16 : 64; + for (int j = 0; j < size_of_scaling_list; j++) { + if (next_scale != 0) { + // delta_scale: se(v) + int delta_scale = reader.ReadSignedExponentialGolomb(); + if (!reader.Ok() || delta_scale < kScalingDeltaMin || + delta_scale > kScaldingDeltaMax) { + return absl::nullopt; + } + next_scale = (last_scale + delta_scale + 256) % 256; + } + if (next_scale != 0) + last_scale = next_scale; + } + } + } + } + } + // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with + // BitstreamReader::ReadBits, which can read at most 64 bits at a time. We + // also have to avoid overflow when adding 4 to the on-wire golomb value, + // e.g., for evil input data, ReadExponentialGolomb might return 0xfffc. + const uint32_t kMaxLog2Minus4 = 32 - 4; + + // log2_max_frame_num_minus4: ue(v) + uint32_t log2_max_frame_num_minus4 = reader.ReadExponentialGolomb(); + if (!reader.Ok() || log2_max_frame_num_minus4 > kMaxLog2Minus4) { + return absl::nullopt; + } + sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4; + + // pic_order_cnt_type: ue(v) + sps.pic_order_cnt_type = reader.ReadExponentialGolomb(); + if (sps.pic_order_cnt_type == 0) { + // log2_max_pic_order_cnt_lsb_minus4: ue(v) + uint32_t log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); + if (!reader.Ok() || log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) { + return absl::nullopt; + } + sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4; + } else if (sps.pic_order_cnt_type == 1) { + // delta_pic_order_always_zero_flag: u(1) + sps.delta_pic_order_always_zero_flag = reader.ReadBit(); + // offset_for_non_ref_pic: se(v) + reader.ReadExponentialGolomb(); + // offset_for_top_to_bottom_field: se(v) + reader.ReadExponentialGolomb(); + // num_ref_frames_in_pic_order_cnt_cycle: ue(v) + uint32_t num_ref_frames_in_pic_order_cnt_cycle = + reader.ReadExponentialGolomb(); + for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { + // offset_for_ref_frame[i]: se(v) + reader.ReadExponentialGolomb(); + if (!reader.Ok()) { + return absl::nullopt; + } + } + } + // max_num_ref_frames: ue(v) + sps.max_num_ref_frames = reader.ReadExponentialGolomb(); + // gaps_in_frame_num_value_allowed_flag: u(1) + reader.ConsumeBits(1); + // + // IMPORTANT ONES! Now we're getting to resolution. First we read the pic + // width/height in macroblocks (16x16), which gives us the base resolution, + // and then we continue on until we hit the frame crop offsets, which are used + // to signify resolutions that aren't multiples of 16. + // + // pic_width_in_mbs_minus1: ue(v) + sps.width = 16 * (reader.ReadExponentialGolomb() + 1); + // pic_height_in_map_units_minus1: ue(v) + uint32_t pic_height_in_map_units_minus1 = reader.ReadExponentialGolomb(); + // frame_mbs_only_flag: u(1) + sps.frame_mbs_only_flag = reader.ReadBit(); + if (!sps.frame_mbs_only_flag) { + // mb_adaptive_frame_field_flag: u(1) + reader.ConsumeBits(1); + } + sps.height = + 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); + // direct_8x8_inference_flag: u(1) + reader.ConsumeBits(1); + // + // MORE IMPORTANT ONES! Now we're at the frame crop information. + // + uint32_t frame_crop_left_offset = 0; + uint32_t frame_crop_right_offset = 0; + uint32_t frame_crop_top_offset = 0; + uint32_t frame_crop_bottom_offset = 0; + // frame_cropping_flag: u(1) + if (reader.Read<bool>()) { + // frame_crop_{left, right, top, bottom}_offset: ue(v) + frame_crop_left_offset = reader.ReadExponentialGolomb(); + frame_crop_right_offset = reader.ReadExponentialGolomb(); + frame_crop_top_offset = reader.ReadExponentialGolomb(); + frame_crop_bottom_offset = reader.ReadExponentialGolomb(); + } + // vui_parameters_present_flag: u(1) + sps.vui_params_present = reader.ReadBit(); + + // Far enough! We don't use the rest of the SPS. + if (!reader.Ok()) { + return absl::nullopt; + } + + // Figure out the crop units in pixels. That's based on the chroma format's + // sampling, which is indicated by chroma_format_idc. + if (sps.separate_colour_plane_flag || chroma_format_idc == 0) { + frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag); + frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag); + } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) { + // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). + if (chroma_format_idc == 1 || chroma_format_idc == 2) { + frame_crop_left_offset *= 2; + frame_crop_right_offset *= 2; + } + // Height multipliers for format 1 (4:2:0). + if (chroma_format_idc == 1) { + frame_crop_top_offset *= 2; + frame_crop_bottom_offset *= 2; + } + } + // Subtract the crop for each dimension. + sps.width -= (frame_crop_left_offset + frame_crop_right_offset); + sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset); + + return sps; +} + +} // namespace webrtc |