/* * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "common_video/h265/h265_bitstream_parser.h" #include #include #include #include #include "common_video/h265/h265_common.h" #include "rtc_base/bit_buffer.h" #include "rtc_base/bitstream_reader.h" #include "rtc_base/logging.h" #define IN_RANGE_OR_RETURN(val, min, max) \ do { \ if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ " to be" \ << " in range [" << (min) << ":" << (max) << "]" \ << " found " << (val) << " instead"; \ return kInvalidStream; \ } \ } while (0) #define IN_RANGE_OR_RETURN_NULL(val, min, max) \ do { \ if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ " to be" \ << " in range [" << (min) << ":" << (max) << "]" \ << " found " << (val) << " instead"; \ return absl::nullopt; \ } \ } while (0) #define IN_RANGE_OR_RETURN_VOID(val, min, max) \ do { \ if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ " to be" \ << " in range [" << (min) << ":" << (max) << "]" \ << " found " << (val) << " instead"; \ return; \ } \ } while (0) #define TRUE_OR_RETURN(a) \ do { \ if (!slice_reader.Ok() || !(a)) { \ RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ << #a; \ return kInvalidStream; \ } \ } while (0) namespace { constexpr int kMaxAbsQpDeltaValue = 51; constexpr int kMinQpValue = 0; constexpr int kMaxQpValue = 51; constexpr int kMaxRefIdxActive = 15; } // namespace namespace webrtc { H265BitstreamParser::H265BitstreamParser() = default; H265BitstreamParser::~H265BitstreamParser() = default; // General note: this is based off the 08/2021 version of the H.265 standard, // section 7.3.6.1. You can find it on this page: // http://www.itu.int/rec/T-REC-H.265 H265BitstreamParser::Result H265BitstreamParser::ParseNonParameterSetNalu( const uint8_t* source, size_t source_length, uint8_t nalu_type) { last_slice_qp_delta_ = absl::nullopt; last_slice_pps_id_ = absl::nullopt; const std::vector slice_rbsp = H265::ParseRbsp(source, source_length); if (slice_rbsp.size() < H265::kNaluHeaderSize) return kInvalidStream; BitstreamReader slice_reader(slice_rbsp); slice_reader.ConsumeBits(H265::kNaluHeaderSize * 8); // first_slice_segment_in_pic_flag: u(1) bool first_slice_segment_in_pic_flag = slice_reader.Read(); bool irap_pic = (H265::NaluType::kBlaWLp <= nalu_type && nalu_type <= H265::NaluType::kRsvIrapVcl23); if (irap_pic) { // no_output_of_prior_pics_flag: u(1) slice_reader.ConsumeBits(1); } // slice_pic_parameter_set_id: ue(v) uint32_t pps_id = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(pps_id, 0, 63); const H265PpsParser::PpsState* pps = GetPPS(pps_id); TRUE_OR_RETURN(pps); const H265SpsParser::SpsState* sps = GetSPS(pps->sps_id); TRUE_OR_RETURN(sps); bool dependent_slice_segment_flag = 0; if (!first_slice_segment_in_pic_flag) { if (pps->dependent_slice_segments_enabled_flag) { // dependent_slice_segment_flag: u(1) dependent_slice_segment_flag = slice_reader.Read(); } // slice_segment_address: u(v) int32_t log2_ctb_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3 + sps->log2_diff_max_min_luma_coding_block_size; uint32_t ctb_size_y = 1 << log2_ctb_size_y; uint32_t pic_width_in_ctbs_y = sps->pic_width_in_luma_samples / ctb_size_y; if (sps->pic_width_in_luma_samples % ctb_size_y) pic_width_in_ctbs_y++; uint32_t pic_height_in_ctbs_y = sps->pic_height_in_luma_samples / ctb_size_y; if (sps->pic_height_in_luma_samples % ctb_size_y) pic_height_in_ctbs_y++; uint32_t slice_segment_address_bits = H265::Log2Ceiling(pic_height_in_ctbs_y * pic_width_in_ctbs_y); TRUE_OR_RETURN(slice_segment_address_bits != std::numeric_limits::max()); slice_reader.ConsumeBits(slice_segment_address_bits); } if (dependent_slice_segment_flag == 0) { for (uint32_t i = 0; i < pps->num_extra_slice_header_bits; i++) { // slice_reserved_flag: u(1) slice_reader.ConsumeBits(1); } // slice_type: ue(v) uint32_t slice_type = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(slice_type, 0, 2); if (pps->output_flag_present_flag) { // pic_output_flag: u(1) slice_reader.ConsumeBits(1); } if (sps->separate_colour_plane_flag) { // colour_plane_id: u(2) slice_reader.ConsumeBits(2); } uint32_t num_long_term_sps = 0; uint32_t num_long_term_pics = 0; std::vector used_by_curr_pic_lt_flag; bool short_term_ref_pic_set_sps_flag = false; uint32_t short_term_ref_pic_set_idx = 0; H265SpsParser::ShortTermRefPicSet short_term_ref_pic_set; bool slice_temporal_mvp_enabled_flag = 0; if (nalu_type != H265::NaluType::kIdrWRadl && nalu_type != H265::NaluType::kIdrNLp) { // slice_pic_order_cnt_lsb: u(v) uint32_t slice_pic_order_cnt_lsb_bits = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; slice_reader.ConsumeBits(slice_pic_order_cnt_lsb_bits); // short_term_ref_pic_set_sps_flag: u(1) short_term_ref_pic_set_sps_flag = slice_reader.Read(); if (!short_term_ref_pic_set_sps_flag) { absl::optional ref_pic_set = H265SpsParser::ParseShortTermRefPicSet( sps->num_short_term_ref_pic_sets, sps->num_short_term_ref_pic_sets, sps->short_term_ref_pic_set, sps->sps_max_dec_pic_buffering_minus1 [sps->sps_max_sub_layers_minus1], slice_reader); TRUE_OR_RETURN(ref_pic_set); short_term_ref_pic_set = *ref_pic_set; } else if (sps->num_short_term_ref_pic_sets > 1) { // short_term_ref_pic_set_idx: u(v) uint32_t short_term_ref_pic_set_idx_bits = H265::Log2Ceiling(sps->num_short_term_ref_pic_sets); if ((1 << short_term_ref_pic_set_idx_bits) < sps->num_short_term_ref_pic_sets) { short_term_ref_pic_set_idx_bits++; } if (short_term_ref_pic_set_idx_bits > 0) { short_term_ref_pic_set_idx = slice_reader.ReadBits(short_term_ref_pic_set_idx_bits); IN_RANGE_OR_RETURN(short_term_ref_pic_set_idx, 0, sps->num_short_term_ref_pic_sets - 1); } } if (sps->long_term_ref_pics_present_flag) { if (sps->num_long_term_ref_pics_sps > 0) { // num_long_term_sps: ue(v) num_long_term_sps = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(num_long_term_sps, 0, sps->num_long_term_ref_pics_sps); } // num_long_term_pics: ue(v) num_long_term_pics = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(num_long_term_pics, 0, kMaxLongTermRefPicSets - num_long_term_sps); used_by_curr_pic_lt_flag.resize(num_long_term_sps + num_long_term_pics, 0); for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { if (i < num_long_term_sps) { uint32_t lt_idx_sps = 0; if (sps->num_long_term_ref_pics_sps > 1) { // lt_idx_sps: u(v) uint32_t lt_idx_sps_bits = H265::Log2Ceiling(sps->num_long_term_ref_pics_sps); lt_idx_sps = slice_reader.ReadBits(lt_idx_sps_bits); IN_RANGE_OR_RETURN(lt_idx_sps, 0, sps->num_long_term_ref_pics_sps - 1); } used_by_curr_pic_lt_flag[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps]; } else { // poc_lsb_lt: u(v) uint32_t poc_lsb_lt_bits = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; slice_reader.ConsumeBits(poc_lsb_lt_bits); // used_by_curr_pic_lt_flag: u(1) used_by_curr_pic_lt_flag[i] = slice_reader.Read(); } // delta_poc_msb_present_flag: u(1) bool delta_poc_msb_present_flag = slice_reader.Read(); if (delta_poc_msb_present_flag) { // delta_poc_msb_cycle_lt: ue(v) int delta_poc_msb_cycle_lt = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN( delta_poc_msb_cycle_lt, 0, std::pow(2, 32 - sps->log2_max_pic_order_cnt_lsb_minus4 - 4)); } } } if (sps->sps_temporal_mvp_enabled_flag) { // slice_temporal_mvp_enabled_flag: u(1) slice_temporal_mvp_enabled_flag = slice_reader.Read(); } } if (sps->sample_adaptive_offset_enabled_flag) { // slice_sao_luma_flag: u(1) slice_reader.ConsumeBits(1); uint32_t chroma_array_type = sps->separate_colour_plane_flag == 0 ? sps->chroma_format_idc : 0; if (chroma_array_type != 0) { // slice_sao_chroma_flag: u(1) slice_reader.ConsumeBits(1); } } if (slice_type == H265::SliceType::kP || slice_type == H265::SliceType::kB) { // num_ref_idx_active_override_flag: u(1) bool num_ref_idx_active_override_flag = slice_reader.Read(); uint32_t num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1; uint32_t num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1; if (num_ref_idx_active_override_flag) { // num_ref_idx_l0_active_minus1: ue(v) num_ref_idx_l0_active_minus1 = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(num_ref_idx_l0_active_minus1, 0, kMaxRefIdxActive - 1); if (slice_type == H265::SliceType::kB) { // num_ref_idx_l1_active_minus1: ue(v) num_ref_idx_l1_active_minus1 = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(num_ref_idx_l1_active_minus1, 0, kMaxRefIdxActive - 1); } } uint32_t num_pic_total_curr = 0; uint32_t curr_sps_idx = 0; if (short_term_ref_pic_set_sps_flag) { curr_sps_idx = short_term_ref_pic_set_idx; } else { curr_sps_idx = sps->num_short_term_ref_pic_sets; } if (sps->short_term_ref_pic_set.size() <= curr_sps_idx) { TRUE_OR_RETURN(!(curr_sps_idx != 0 || short_term_ref_pic_set_sps_flag)); } const H265SpsParser::ShortTermRefPicSet* ref_pic_set; if (curr_sps_idx < sps->short_term_ref_pic_set.size()) { ref_pic_set = &(sps->short_term_ref_pic_set[curr_sps_idx]); } else { ref_pic_set = &short_term_ref_pic_set; } // Equation 7-57 IN_RANGE_OR_RETURN(ref_pic_set->num_negative_pics, 0, kMaxShortTermRefPicSets); IN_RANGE_OR_RETURN(ref_pic_set->num_positive_pics, 0, kMaxShortTermRefPicSets); for (uint32_t i = 0; i < ref_pic_set->num_negative_pics; i++) { if (ref_pic_set->used_by_curr_pic_s0[i]) { num_pic_total_curr++; } } for (uint32_t i = 0; i < ref_pic_set->num_positive_pics; i++) { if (ref_pic_set->used_by_curr_pic_s1[i]) { num_pic_total_curr++; } } for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { if (used_by_curr_pic_lt_flag[i]) { num_pic_total_curr++; } } if (pps->lists_modification_present_flag && num_pic_total_curr > 1) { // ref_pic_lists_modification() uint32_t list_entry_bits = H265::Log2Ceiling(num_pic_total_curr); if ((1 << list_entry_bits) < num_pic_total_curr) { list_entry_bits++; } // ref_pic_list_modification_flag_l0: u(1) bool ref_pic_list_modification_flag_l0 = slice_reader.Read(); if (ref_pic_list_modification_flag_l0) { for (uint32_t i = 0; i < num_ref_idx_l0_active_minus1; i++) { // list_entry_l0: u(v) slice_reader.ConsumeBits(list_entry_bits); } } if (slice_type == H265::SliceType::kB) { // ref_pic_list_modification_flag_l1: u(1) bool ref_pic_list_modification_flag_l1 = slice_reader.Read(); if (ref_pic_list_modification_flag_l1) { for (uint32_t i = 0; i < num_ref_idx_l1_active_minus1; i++) { // list_entry_l1: u(v) slice_reader.ConsumeBits(list_entry_bits); } } } } if (slice_type == H265::SliceType::kB) { // mvd_l1_zero_flag: u(1) slice_reader.ConsumeBits(1); } if (pps->cabac_init_present_flag) { // cabac_init_flag: u(1) slice_reader.ConsumeBits(1); } if (slice_temporal_mvp_enabled_flag) { bool collocated_from_l0_flag = false; if (slice_type == H265::SliceType::kB) { // collocated_from_l0_flag: u(1) collocated_from_l0_flag = slice_reader.Read(); } if ((collocated_from_l0_flag && num_ref_idx_l0_active_minus1 > 0) || (!collocated_from_l0_flag && num_ref_idx_l1_active_minus1 > 0)) { // collocated_ref_idx: ue(v) uint32_t collocated_ref_idx = slice_reader.ReadExponentialGolomb(); if ((slice_type == H265::SliceType::kP || slice_type == H265::SliceType::kB) && collocated_from_l0_flag) { IN_RANGE_OR_RETURN(collocated_ref_idx, 0, num_ref_idx_l0_active_minus1); } if (slice_type == H265::SliceType::kB && !collocated_from_l0_flag) { IN_RANGE_OR_RETURN(collocated_ref_idx, 0, num_ref_idx_l1_active_minus1); } } } if (!slice_reader.Ok() || ((pps->weighted_pred_flag && slice_type == H265::SliceType::kP) || (pps->weighted_bipred_flag && slice_type == H265::SliceType::kB))) { // pred_weight_table() RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; return kUnsupportedStream; } // five_minus_max_num_merge_cand: ue(v) uint32_t five_minus_max_num_merge_cand = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN(5 - five_minus_max_num_merge_cand, 1, 5); } } // slice_qp_delta: se(v) int32_t last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); if (!slice_reader.Ok() || (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue)) { // Something has gone wrong, and the parsed value is invalid. RTC_LOG(LS_ERROR) << "Parsed QP value out of range."; return kInvalidStream; } // 7-54 in H265 spec. IN_RANGE_OR_RETURN(26 + pps->init_qp_minus26 + last_slice_qp_delta, -pps->qp_bd_offset_y, 51); last_slice_qp_delta_ = last_slice_qp_delta; last_slice_pps_id_ = pps_id; if (!slice_reader.Ok()) { return kInvalidStream; } return kOk; } const H265PpsParser::PpsState* H265BitstreamParser::GetPPS(uint32_t id) const { auto it = pps_.find(id); if (it == pps_.end()) { RTC_LOG(LS_WARNING) << "Requested a nonexistent PPS id " << id; return nullptr; } return &it->second; } const H265SpsParser::SpsState* H265BitstreamParser::GetSPS(uint32_t id) const { auto it = sps_.find(id); if (it == sps_.end()) { RTC_LOG(LS_WARNING) << "Requested a nonexistent SPS id " << id; return nullptr; } return &it->second; } void H265BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { H265::NaluType nalu_type = H265::ParseNaluType(slice[0]); switch (nalu_type) { case H265::NaluType::kVps: { absl::optional vps_state; if (length >= H265::kNaluHeaderSize) { vps_state = H265VpsParser::ParseVps(slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize); } if (!vps_state) { RTC_LOG(LS_WARNING) << "Unable to parse VPS from H265 bitstream."; } else { vps_[vps_state->id] = *vps_state; } break; } case H265::NaluType::kSps: { absl::optional sps_state; if (length >= H265::kNaluHeaderSize) { sps_state = H265SpsParser::ParseSps(slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize); } if (!sps_state) { RTC_LOG(LS_WARNING) << "Unable to parse SPS from H265 bitstream."; } else { sps_[sps_state->sps_id] = *sps_state; } break; } case H265::NaluType::kPps: { absl::optional pps_state; if (length >= H265::kNaluHeaderSize) { std::vector unpacked_buffer = H265::ParseRbsp( slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize); BitstreamReader slice_reader(unpacked_buffer); // pic_parameter_set_id: ue(v) uint32_t pps_id = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN_VOID(pps_id, 0, 63); // seq_parameter_set_id: ue(v) uint32_t sps_id = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN_VOID(sps_id, 0, 15); const H265SpsParser::SpsState* sps = GetSPS(sps_id); pps_state = H265PpsParser::ParsePps( slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize, sps); } if (!pps_state) { RTC_LOG(LS_WARNING) << "Unable to parse PPS from H265 bitstream."; } else { pps_[pps_state->pps_id] = *pps_state; } break; } case H265::NaluType::kAud: case H265::NaluType::kPrefixSei: case H265::NaluType::kSuffixSei: case H265::NaluType::kAp: case H265::NaluType::kFu: break; default: Result res = ParseNonParameterSetNalu(slice, length, nalu_type); if (res != kOk) { RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res; } break; } } absl::optional H265BitstreamParser::ParsePpsIdFromSliceSegmentLayerRbsp(const uint8_t* data, size_t length, uint8_t nalu_type) { std::vector unpacked_buffer = H265::ParseRbsp(data, length); BitstreamReader slice_reader(unpacked_buffer); // first_slice_segment_in_pic_flag: u(1) slice_reader.ConsumeBits(1); if (!slice_reader.Ok()) { return absl::nullopt; } if (nalu_type >= H265::NaluType::kBlaWLp && nalu_type <= H265::NaluType::kRsvIrapVcl23) { // no_output_of_prior_pics_flag: u(1) slice_reader.ConsumeBits(1); } // slice_pic_parameter_set_id: ue(v) uint32_t slice_pic_parameter_set_id = slice_reader.ReadExponentialGolomb(); IN_RANGE_OR_RETURN_NULL(slice_pic_parameter_set_id, 0, 63); if (!slice_reader.Ok()) { return absl::nullopt; } return slice_pic_parameter_set_id; } void H265BitstreamParser::ParseBitstream( rtc::ArrayView bitstream) { std::vector nalu_indices = H265::FindNaluIndices(bitstream.data(), bitstream.size()); for (const H265::NaluIndex& index : nalu_indices) ParseSlice(&bitstream[index.payload_start_offset], index.payload_size); } absl::optional H265BitstreamParser::GetLastSliceQp() const { if (!last_slice_qp_delta_ || !last_slice_pps_id_) { return absl::nullopt; } uint32_t pps_id = 0; const H265PpsParser::PpsState* pps = GetPPS(pps_id); if (!pps) return absl::nullopt; const int parsed_qp = 26 + pps->init_qp_minus26 + *last_slice_qp_delta_; if (parsed_qp < kMinQpValue || parsed_qp > kMaxQpValue) { RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; return absl::nullopt; } return parsed_qp; } } // namespace webrtc