diff options
Diffstat (limited to 'third_party/libwebrtc/common_video/h265')
16 files changed, 3070 insertions, 0 deletions
diff --git a/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.cc b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.cc new file mode 100644 index 0000000000..1093add102 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.cc @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "common_video/h265/h265_bitstream_parser.h" + +#include <stdlib.h> + +#include <cstdint> +#include <vector> + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +#define IN_RANGE_OR_RETURN(val, min, max) \ + do { \ + if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return kInvalidStream; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_NULL(val, min, max) \ + do { \ + if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return absl::nullopt; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_VOID(val, min, max) \ + do { \ + if (!slice_reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!slice_reader.Ok() || !(a)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ + << #a; \ + return kInvalidStream; \ + } \ + } while (0) + +namespace { + +constexpr int kMaxAbsQpDeltaValue = 51; +constexpr int kMinQpValue = 0; +constexpr int kMaxQpValue = 51; +constexpr int kMaxRefIdxActive = 15; + +} // namespace + +namespace webrtc { + +H265BitstreamParser::H265BitstreamParser() = default; +H265BitstreamParser::~H265BitstreamParser() = default; + +// General note: this is based off the 08/2021 version of the H.265 standard, +// section 7.3.6.1. You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 +H265BitstreamParser::Result H265BitstreamParser::ParseNonParameterSetNalu( + const uint8_t* source, + size_t source_length, + uint8_t nalu_type) { + last_slice_qp_delta_ = absl::nullopt; + last_slice_pps_id_ = absl::nullopt; + const std::vector<uint8_t> slice_rbsp = + H265::ParseRbsp(source, source_length); + if (slice_rbsp.size() < H265::kNaluHeaderSize) + return kInvalidStream; + + BitstreamReader slice_reader(slice_rbsp); + slice_reader.ConsumeBits(H265::kNaluHeaderSize * 8); + + // first_slice_segment_in_pic_flag: u(1) + bool first_slice_segment_in_pic_flag = slice_reader.Read<bool>(); + bool irap_pic = (H265::NaluType::kBlaWLp <= nalu_type && + nalu_type <= H265::NaluType::kRsvIrapVcl23); + if (irap_pic) { + // no_output_of_prior_pics_flag: u(1) + slice_reader.ConsumeBits(1); + } + // slice_pic_parameter_set_id: ue(v) + uint32_t pps_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(pps_id, 0, 63); + const H265PpsParser::PpsState* pps = GetPPS(pps_id); + TRUE_OR_RETURN(pps); + const H265SpsParser::SpsState* sps = GetSPS(pps->sps_id); + TRUE_OR_RETURN(sps); + bool dependent_slice_segment_flag = 0; + if (!first_slice_segment_in_pic_flag) { + if (pps->dependent_slice_segments_enabled_flag) { + // dependent_slice_segment_flag: u(1) + dependent_slice_segment_flag = slice_reader.Read<bool>(); + } + + // slice_segment_address: u(v) + int32_t log2_ctb_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size; + uint32_t ctb_size_y = 1 << log2_ctb_size_y; + uint32_t pic_width_in_ctbs_y = sps->pic_width_in_luma_samples / ctb_size_y; + if (sps->pic_width_in_luma_samples % ctb_size_y) + pic_width_in_ctbs_y++; + + uint32_t pic_height_in_ctbs_y = + sps->pic_height_in_luma_samples / ctb_size_y; + if (sps->pic_height_in_luma_samples % ctb_size_y) + pic_height_in_ctbs_y++; + + uint32_t slice_segment_address_bits = + H265::Log2Ceiling(pic_height_in_ctbs_y * pic_width_in_ctbs_y); + slice_reader.ConsumeBits(slice_segment_address_bits); + } + + if (dependent_slice_segment_flag == 0) { + for (uint32_t i = 0; i < pps->num_extra_slice_header_bits; i++) { + // slice_reserved_flag: u(1) + slice_reader.ConsumeBits(1); + } + // slice_type: ue(v) + uint32_t slice_type = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(slice_type, 0, 2); + if (pps->output_flag_present_flag) { + // pic_output_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (sps->separate_colour_plane_flag) { + // colour_plane_id: u(2) + slice_reader.ConsumeBits(2); + } + uint32_t num_long_term_sps = 0; + uint32_t num_long_term_pics = 0; + std::vector<bool> used_by_curr_pic_lt_flag; + bool short_term_ref_pic_set_sps_flag = false; + uint32_t short_term_ref_pic_set_idx = 0; + H265SpsParser::ShortTermRefPicSet short_term_ref_pic_set; + bool slice_temporal_mvp_enabled_flag = 0; + if (nalu_type != H265::NaluType::kIdrWRadl && + nalu_type != H265::NaluType::kIdrNLp) { + // slice_pic_order_cnt_lsb: u(v) + uint32_t slice_pic_order_cnt_lsb_bits = + sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + slice_reader.ConsumeBits(slice_pic_order_cnt_lsb_bits); + // short_term_ref_pic_set_sps_flag: u(1) + short_term_ref_pic_set_sps_flag = slice_reader.Read<bool>(); + if (!short_term_ref_pic_set_sps_flag) { + absl::optional<H265SpsParser::ShortTermRefPicSet> ref_pic_set = + H265SpsParser::ParseShortTermRefPicSet( + sps->num_short_term_ref_pic_sets, + sps->num_short_term_ref_pic_sets, sps->short_term_ref_pic_set, + sps->sps_max_dec_pic_buffering_minus1 + [sps->sps_max_sub_layers_minus1], + slice_reader); + TRUE_OR_RETURN(ref_pic_set); + short_term_ref_pic_set = *ref_pic_set; + + } else if (sps->num_short_term_ref_pic_sets > 1) { + // short_term_ref_pic_set_idx: u(v) + uint32_t short_term_ref_pic_set_idx_bits = + H265::Log2Ceiling(sps->num_short_term_ref_pic_sets); + if ((1 << short_term_ref_pic_set_idx_bits) < + sps->num_short_term_ref_pic_sets) { + short_term_ref_pic_set_idx_bits++; + } + if (short_term_ref_pic_set_idx_bits > 0) { + short_term_ref_pic_set_idx = + slice_reader.ReadBits(short_term_ref_pic_set_idx_bits); + IN_RANGE_OR_RETURN(short_term_ref_pic_set_idx, 0, + sps->num_short_term_ref_pic_sets - 1); + } + } + if (sps->long_term_ref_pics_present_flag) { + if (sps->num_long_term_ref_pics_sps > 0) { + // num_long_term_sps: ue(v) + num_long_term_sps = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_long_term_sps, 0, + sps->num_long_term_ref_pics_sps); + } + // num_long_term_pics: ue(v) + num_long_term_pics = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_long_term_pics, 0, + kMaxLongTermRefPicSets - num_long_term_sps); + used_by_curr_pic_lt_flag.resize(num_long_term_sps + num_long_term_pics, + 0); + for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { + if (i < num_long_term_sps) { + uint32_t lt_idx_sps = 0; + if (sps->num_long_term_ref_pics_sps > 1) { + // lt_idx_sps: u(v) + uint32_t lt_idx_sps_bits = + H265::Log2Ceiling(sps->num_long_term_ref_pics_sps); + lt_idx_sps = slice_reader.ReadBits(lt_idx_sps_bits); + IN_RANGE_OR_RETURN(lt_idx_sps, 0, + sps->num_long_term_ref_pics_sps - 1); + } + used_by_curr_pic_lt_flag[i] = + sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps]; + } else { + // poc_lsb_lt: u(v) + uint32_t poc_lsb_lt_bits = + sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + slice_reader.ConsumeBits(poc_lsb_lt_bits); + // used_by_curr_pic_lt_flag: u(1) + used_by_curr_pic_lt_flag[i] = slice_reader.Read<bool>(); + } + // delta_poc_msb_present_flag: u(1) + bool delta_poc_msb_present_flag = slice_reader.Read<bool>(); + if (delta_poc_msb_present_flag) { + // delta_poc_msb_cycle_lt: ue(v) + int delta_poc_msb_cycle_lt = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN( + delta_poc_msb_cycle_lt, 0, + std::pow(2, 32 - sps->log2_max_pic_order_cnt_lsb_minus4 - 4)); + } + } + } + if (sps->sps_temporal_mvp_enabled_flag) { + // slice_temporal_mvp_enabled_flag: u(1) + slice_temporal_mvp_enabled_flag = slice_reader.Read<bool>(); + } + } + + if (sps->sample_adaptive_offset_enabled_flag) { + // slice_sao_luma_flag: u(1) + slice_reader.ConsumeBits(1); + uint32_t chroma_array_type = + sps->separate_colour_plane_flag == 0 ? sps->chroma_format_idc : 0; + if (chroma_array_type != 0) { + // slice_sao_chroma_flag: u(1) + slice_reader.ConsumeBits(1); + } + } + + if (slice_type == H265::SliceType::kP || + slice_type == H265::SliceType::kB) { + // num_ref_idx_active_override_flag: u(1) + bool num_ref_idx_active_override_flag = slice_reader.Read<bool>(); + uint32_t num_ref_idx_l0_active_minus1 = + pps->num_ref_idx_l0_default_active_minus1; + uint32_t num_ref_idx_l1_active_minus1 = + pps->num_ref_idx_l1_default_active_minus1; + if (num_ref_idx_active_override_flag) { + // num_ref_idx_l0_active_minus1: ue(v) + num_ref_idx_l0_active_minus1 = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_ref_idx_l0_active_minus1, 0, + kMaxRefIdxActive - 1); + if (slice_type == H265::SliceType::kB) { + // num_ref_idx_l1_active_minus1: ue(v) + num_ref_idx_l1_active_minus1 = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(num_ref_idx_l1_active_minus1, 0, + kMaxRefIdxActive - 1); + } + } + + uint32_t num_pic_total_curr = 0; + uint32_t curr_sps_idx = 0; + if (short_term_ref_pic_set_sps_flag) { + curr_sps_idx = short_term_ref_pic_set_idx; + } else { + curr_sps_idx = sps->num_short_term_ref_pic_sets; + } + if (sps->short_term_ref_pic_set.size() <= curr_sps_idx) { + TRUE_OR_RETURN(!(curr_sps_idx != 0 || short_term_ref_pic_set_sps_flag)); + } + const H265SpsParser::ShortTermRefPicSet* ref_pic_set; + if (curr_sps_idx < sps->short_term_ref_pic_set.size()) { + ref_pic_set = &(sps->short_term_ref_pic_set[curr_sps_idx]); + } else { + ref_pic_set = &short_term_ref_pic_set; + } + + // Equation 7-57 + IN_RANGE_OR_RETURN(ref_pic_set->num_negative_pics, 0, + kMaxShortTermRefPicSets); + IN_RANGE_OR_RETURN(ref_pic_set->num_positive_pics, 0, + kMaxShortTermRefPicSets); + for (uint32_t i = 0; i < ref_pic_set->num_negative_pics; i++) { + if (ref_pic_set->used_by_curr_pic_s0[i]) { + num_pic_total_curr++; + } + } + for (uint32_t i = 0; i < ref_pic_set->num_positive_pics; i++) { + if (ref_pic_set->used_by_curr_pic_s1[i]) { + num_pic_total_curr++; + } + } + for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) { + if (used_by_curr_pic_lt_flag[i]) { + num_pic_total_curr++; + } + } + + if (pps->lists_modification_present_flag && num_pic_total_curr > 1) { + // ref_pic_lists_modification() + uint32_t list_entry_bits = H265::Log2Ceiling(num_pic_total_curr); + if ((1 << list_entry_bits) < num_pic_total_curr) { + list_entry_bits++; + } + // ref_pic_list_modification_flag_l0: u(1) + bool ref_pic_list_modification_flag_l0 = slice_reader.Read<bool>(); + if (ref_pic_list_modification_flag_l0) { + for (uint32_t i = 0; i < num_ref_idx_l0_active_minus1; i++) { + // list_entry_l0: u(v) + slice_reader.ConsumeBits(list_entry_bits); + } + } + if (slice_type == H265::SliceType::kB) { + // ref_pic_list_modification_flag_l1: u(1) + bool ref_pic_list_modification_flag_l1 = slice_reader.Read<bool>(); + if (ref_pic_list_modification_flag_l1) { + for (uint32_t i = 0; i < num_ref_idx_l1_active_minus1; i++) { + // list_entry_l1: u(v) + slice_reader.ConsumeBits(list_entry_bits); + } + } + } + } + if (slice_type == H265::SliceType::kB) { + // mvd_l1_zero_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (pps->cabac_init_present_flag) { + // cabac_init_flag: u(1) + slice_reader.ConsumeBits(1); + } + if (slice_temporal_mvp_enabled_flag) { + bool collocated_from_l0_flag = false; + if (slice_type == H265::SliceType::kB) { + // collocated_from_l0_flag: u(1) + collocated_from_l0_flag = slice_reader.Read<bool>(); + } + if ((collocated_from_l0_flag && num_ref_idx_l0_active_minus1 > 0) || + (!collocated_from_l0_flag && num_ref_idx_l1_active_minus1 > 0)) { + // collocated_ref_idx: ue(v) + uint32_t collocated_ref_idx = slice_reader.ReadExponentialGolomb(); + if ((slice_type == H265::SliceType::kP || + slice_type == H265::SliceType::kB) && + collocated_from_l0_flag) { + IN_RANGE_OR_RETURN(collocated_ref_idx, 0, + num_ref_idx_l0_active_minus1); + } + if (slice_type == H265::SliceType::kB && !collocated_from_l0_flag) { + IN_RANGE_OR_RETURN(collocated_ref_idx, 0, + num_ref_idx_l1_active_minus1); + } + } + } + if (!slice_reader.Ok() || + ((pps->weighted_pred_flag && slice_type == H265::SliceType::kP) || + (pps->weighted_bipred_flag && slice_type == H265::SliceType::kB))) { + // pred_weight_table() + RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; + return kUnsupportedStream; + } + // five_minus_max_num_merge_cand: ue(v) + uint32_t five_minus_max_num_merge_cand = + slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN(5 - five_minus_max_num_merge_cand, 1, 5); + } + } + + // slice_qp_delta: se(v) + int32_t last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb(); + if (!slice_reader.Ok() || (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue)) { + // Something has gone wrong, and the parsed value is invalid. + RTC_LOG(LS_ERROR) << "Parsed QP value out of range."; + return kInvalidStream; + } + // 7-54 in H265 spec. + IN_RANGE_OR_RETURN(26 + pps->init_qp_minus26 + last_slice_qp_delta, + -pps->qp_bd_offset_y, 51); + + last_slice_qp_delta_ = last_slice_qp_delta; + last_slice_pps_id_ = pps_id; + if (!slice_reader.Ok()) { + return kInvalidStream; + } + + return kOk; +} + +const H265PpsParser::PpsState* H265BitstreamParser::GetPPS(uint32_t id) const { + auto it = pps_.find(id); + if (it == pps_.end()) { + RTC_LOG(LS_WARNING) << "Requested a nonexistent PPS id " << id; + return nullptr; + } + + return &it->second; +} + +const H265SpsParser::SpsState* H265BitstreamParser::GetSPS(uint32_t id) const { + auto it = sps_.find(id); + if (it == sps_.end()) { + RTC_LOG(LS_WARNING) << "Requested a nonexistent SPS id " << id; + return nullptr; + } + + return &it->second; +} + +void H265BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { + H265::NaluType nalu_type = H265::ParseNaluType(slice[0]); + switch (nalu_type) { + case H265::NaluType::kVps: { + absl::optional<H265VpsParser::VpsState> vps_state; + if (length >= H265::kNaluHeaderSize) { + vps_state = H265VpsParser::ParseVps(slice + H265::kNaluHeaderSize, + length - H265::kNaluHeaderSize); + } + + if (!vps_state) { + RTC_LOG(LS_WARNING) << "Unable to parse VPS from H265 bitstream."; + } else { + vps_[vps_state->id] = *vps_state; + } + break; + } + case H265::NaluType::kSps: { + absl::optional<H265SpsParser::SpsState> sps_state; + if (length >= H265::kNaluHeaderSize) { + sps_state = H265SpsParser::ParseSps(slice + H265::kNaluHeaderSize, + length - H265::kNaluHeaderSize); + } + if (!sps_state) { + RTC_LOG(LS_WARNING) << "Unable to parse SPS from H265 bitstream."; + } else { + sps_[sps_state->sps_id] = *sps_state; + } + break; + } + case H265::NaluType::kPps: { + absl::optional<H265PpsParser::PpsState> pps_state; + if (length >= H265::kNaluHeaderSize) { + std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp( + slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize); + BitstreamReader slice_reader(unpacked_buffer); + // pic_parameter_set_id: ue(v) + uint32_t pps_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_VOID(pps_id, 0, 63); + // seq_parameter_set_id: ue(v) + uint32_t sps_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_VOID(sps_id, 0, 15); + const H265SpsParser::SpsState* sps = GetSPS(sps_id); + pps_state = H265PpsParser::ParsePps( + slice + H265::kNaluHeaderSize, length - H265::kNaluHeaderSize, sps); + } + if (!pps_state) { + RTC_LOG(LS_WARNING) << "Unable to parse PPS from H265 bitstream."; + } else { + pps_[pps_state->pps_id] = *pps_state; + } + break; + } + case H265::NaluType::kAud: + case H265::NaluType::kPrefixSei: + case H265::NaluType::kSuffixSei: + case H265::NaluType::kAP: + case H265::NaluType::kFU: + break; + default: + Result res = ParseNonParameterSetNalu(slice, length, nalu_type); + if (res != kOk) { + RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res; + } + break; + } +} + +absl::optional<uint32_t> +H265BitstreamParser::ParsePpsIdFromSliceSegmentLayerRbsp(const uint8_t* data, + size_t length, + uint8_t nalu_type) { + std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length); + BitstreamReader slice_reader(unpacked_buffer); + + // first_slice_segment_in_pic_flag: u(1) + slice_reader.ConsumeBits(1); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + + if (nalu_type >= H265::NaluType::kBlaWLp && + nalu_type <= H265::NaluType::kRsvIrapVcl23) { + // no_output_of_prior_pics_flag: u(1) + slice_reader.ConsumeBits(1); + } + + // slice_pic_parameter_set_id: ue(v) + uint32_t slice_pic_parameter_set_id = slice_reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(slice_pic_parameter_set_id, 0, 63); + if (!slice_reader.Ok()) { + return absl::nullopt; + } + + return slice_pic_parameter_set_id; +} + +void H265BitstreamParser::ParseBitstream( + rtc::ArrayView<const uint8_t> bitstream) { + std::vector<H265::NaluIndex> nalu_indices = + H265::FindNaluIndices(bitstream.data(), bitstream.size()); + for (const H265::NaluIndex& index : nalu_indices) + ParseSlice(&bitstream[index.payload_start_offset], index.payload_size); +} + +absl::optional<int> H265BitstreamParser::GetLastSliceQp() const { + if (!last_slice_qp_delta_ || !last_slice_pps_id_) { + return absl::nullopt; + } + uint32_t pps_id = 0; + const H265PpsParser::PpsState* pps = GetPPS(pps_id); + if (!pps) + return absl::nullopt; + const int parsed_qp = 26 + pps->init_qp_minus26 + *last_slice_qp_delta_; + if (parsed_qp < kMinQpValue || parsed_qp > kMaxQpValue) { + RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream."; + return absl::nullopt; + } + return parsed_qp; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.h b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.h new file mode 100644 index 0000000000..3c0883c7a1 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ +#define COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "absl/types/optional.h" +#include "api/video_codecs/bitstream_parser.h" +#include "common_video/h265/h265_pps_parser.h" +#include "common_video/h265/h265_sps_parser.h" +#include "common_video/h265/h265_vps_parser.h" +#include "rtc_base/containers/flat_map.h" + +namespace webrtc { + +// Stateful H265 bitstream parser (due to VPS/SPS/PPS). Used to parse out QP +// values from the bitstream. +class H265BitstreamParser : public BitstreamParser { + public: + H265BitstreamParser(); + ~H265BitstreamParser() override; + + // New interface. + void ParseBitstream(rtc::ArrayView<const uint8_t> bitstream) override; + absl::optional<int> GetLastSliceQp() const override; + + static absl::optional<uint32_t> ParsePpsIdFromSliceSegmentLayerRbsp( + const uint8_t* data, + size_t length, + uint8_t nalu_type); + + protected: + enum Result { + kOk, + kInvalidStream, + kUnsupportedStream, + }; + void ParseSlice(const uint8_t* slice, size_t length); + Result ParseNonParameterSetNalu(const uint8_t* source, + size_t source_length, + uint8_t nalu_type); + + const H265PpsParser::PpsState* GetPPS(uint32_t id) const; + const H265SpsParser::SpsState* GetSPS(uint32_t id) const; + + // VPS/SPS/PPS state, updated when parsing new VPS/SPS/PPS, used to parse + // slices. + flat_map<uint32_t, H265VpsParser::VpsState> vps_; + flat_map<uint32_t, H265SpsParser::SpsState> sps_; + flat_map<uint32_t, H265PpsParser::PpsState> pps_; + + // Last parsed slice QP. + absl::optional<int32_t> last_slice_qp_delta_; + absl::optional<uint32_t> last_slice_pps_id_; +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_bitstream_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser_unittest.cc new file mode 100644 index 0000000000..7ca979433a --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_bitstream_parser_unittest.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_bitstream_parser.h" + +#include "common_video/h265/h265_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// VPS/SPS/PPS part of below chunk. +const uint8_t kH265VpsSpsPps[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40}; + +// Contains enough of the image slice to contain slice QP. +const uint8_t kH265BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40, 0x00, + 0x00, 0x01, 0x26, 0x01, 0xaf, 0x08, 0x42, 0x23, 0x10, 0x5d, 0x2b, 0x51, + 0xf9, 0x7a, 0x55, 0x15, 0x0d, 0x10, 0x40, 0xe8, 0x10, 0x05, 0x30, 0x95, + 0x09, 0x9a, 0xa5, 0xb6, 0x6a, 0x66, 0x6d, 0xde, 0xe0, 0xf9, +}; + +// Contains enough of the image slice to contain slice QP. +const uint8_t kH265BitstreamNextImageSliceChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xe0, 0x24, 0xbf, 0x82, 0x05, + 0x21, 0x12, 0x22, 0xa3, 0x29, 0xb4, 0x21, 0x91, 0xa1, 0xaa, 0x40, +}; + +// Contains enough of the image slice to contain slice QP. +const uint8_t kH265SliceChunk[] = { + 0xa4, 0x04, 0x55, 0xa2, 0x6d, 0xce, 0xc0, 0xc3, 0xed, 0x0b, 0xac, 0xbc, + 0x00, 0xc4, 0x44, 0x2e, 0xf7, 0x55, 0xfd, 0x05, 0x86, 0x92, 0x19, 0xdf, + 0x58, 0xec, 0x38, 0x36, 0xb7, 0x7c, 0x00, 0x15, 0x33, 0x78, 0x03, 0x67, + 0x26, 0x0f, 0x7b, 0x30, 0x1c, 0xd7, 0xd4, 0x3a, 0xec, 0xad, 0xef, 0x73, +}; + +// Contains short term ref pic set slice to verify Log2Ceiling path. +const uint8_t kH265SliceStrChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x99, 0x94, 0x90, 0x24, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, + 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x99, 0xa0, 0x01, 0x40, 0x20, 0x06, 0x41, 0xfe, 0x59, + 0x49, 0x26, 0x4d, 0x86, 0x16, 0x22, 0xaa, 0x4c, 0x4c, 0x32, 0xfb, 0x3e, + 0xbc, 0xdf, 0x96, 0x7d, 0x78, 0x51, 0x18, 0x9c, 0xbb, 0x20, 0x00, 0x00, + 0x00, 0x01, 0x44, 0x01, 0xc1, 0xa5, 0x58, 0x11, 0x20, 0x00, 0x00, 0x01, + 0x02, 0x01, 0xe1, 0x18, 0xfe, 0x47, 0x60, 0xd2, 0x74, 0xd6, 0x9f, 0xfc, + 0xbe, 0x6b, 0x15, 0x48, 0x59, 0x1f, 0xf7, 0xc1, 0x7c, 0xe2, 0xe8, 0x10, +}; + +// Contains enough of the image slice to contain invalid slice QP -52. +const uint8_t kH265BitstreamInvalidQPChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40, 0x00, + 0x00, 0x01, 0x26, 0x01, 0xaf, 0x03, 0x4c, +}; + +// Contains enough of the image slice to contain invalid slice QP 52. +const uint8_t kH265BitstreamInvalidQPChunk52[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x95, 0x98, 0x09, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x04, 0x08, + 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0xb0, 0x03, 0xc0, 0x80, 0x10, 0xe5, 0x96, 0x56, 0x69, 0x24, 0xca, 0xe0, + 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe0, 0x80, + 0x00, 0x00, 0x00, 0x01, 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40, 0x00, + 0x00, 0x01, 0x26, 0x01, 0xaf, 0x03, 0x44, +}; + +TEST(H265BitstreamParserTest, ReportsNoQpWithoutParsedSlices) { + H265BitstreamParser h265_parser; + EXPECT_FALSE(h265_parser.GetLastSliceQp().has_value()); +} + +TEST(H265BitstreamParserTest, ReportsNoQpWithOnlyParsedPpsAndSpsSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265VpsSpsPps); + EXPECT_FALSE(h265_parser.GetLastSliceQp().has_value()); +} + +TEST(H265BitstreamParserTest, ReportsLastSliceQpForImageSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265BitstreamChunk); + absl::optional<int> qp = h265_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(34, *qp); + + // Parse an additional image slice. + h265_parser.ParseBitstream(kH265BitstreamNextImageSliceChunk); + qp = h265_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(36, *qp); +} + +TEST(H265BitstreamParserTest, ReportsLastSliceQpFromShortTermReferenceSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265SliceStrChunk); + absl::optional<int> qp = h265_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(33, *qp); +} + +TEST(H265BitstreamParserTest, PpsIdFromSlice) { + H265BitstreamParser h265_parser; + absl::optional<uint32_t> pps_id = + h265_parser.ParsePpsIdFromSliceSegmentLayerRbsp( + kH265SliceChunk, sizeof(kH265SliceChunk), H265::NaluType::kTrailR); + ASSERT_TRUE(pps_id); + EXPECT_EQ(1u, *pps_id); +} + +TEST(H265BitstreamParserTest, ReportsLastSliceQpInvalidQPSlices) { + H265BitstreamParser h265_parser; + h265_parser.ParseBitstream(kH265BitstreamInvalidQPChunk); + absl::optional<int> qp = h265_parser.GetLastSliceQp(); + ASSERT_FALSE(qp.has_value()); + + h265_parser.ParseBitstream(kH265BitstreamInvalidQPChunk52); + qp = h265_parser.GetLastSliceQp(); + ASSERT_FALSE(qp.has_value()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_common.cc b/third_party/libwebrtc/common_video/h265/h265_common.cc new file mode 100644 index 0000000000..70864495bc --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_common.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_common.h" + +#include "common_video/h264/h264_common.h" + +namespace webrtc { +namespace H265 { + +constexpr uint8_t kNaluTypeMask = 0x7E; + +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + std::vector<H264::NaluIndex> indices = + H264::FindNaluIndices(buffer, buffer_size); + std::vector<NaluIndex> results; + for (auto& index : indices) { + results.push_back( + {index.start_offset, index.payload_start_offset, index.payload_size}); + } + return results; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast<NaluType>((data & kNaluTypeMask) >> 1); +} + +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length) { + return H264::ParseRbsp(data, length); +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + H264::WriteRbsp(bytes, length, destination); +} + +uint32_t Log2Ceiling(uint32_t value) { + // When n == 0, we want the function to return -1. + // When n == 0, (n - 1) will underflow to 0xFFFFFFFF, which is + // why the statement below starts with (n ? 32 : -1). + return (value ? 32 : -1) - WebRtcVideo_CountLeadingZeros32(value - 1); +} + +} // namespace H265 +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_common.h b/third_party/libwebrtc/common_video/h265/h265_common.h new file mode 100644 index 0000000000..fcb97815ff --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_common.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_COMMON_H_ +#define COMMON_VIDEO_H265_H265_COMMON_H_ + +#include <memory> +#include <vector> + +#include "common_video/h265/h265_inline.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +namespace H265 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +constexpr size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +constexpr size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU header byte (2). +constexpr size_t kNaluHeaderSize = 2; + +// Type description of 0-40 is defined in Table7-1 of the H.265 spec +// Type desciption of 48-49 is defined in section 4.4.2 and 4.4.3 of RFC7798 +enum NaluType : uint8_t { + kTrailN = 0, + kTrailR = 1, + kTsaN = 2, + kTsaR = 3, + kStsaN = 4, + kStsaR = 5, + kRadlN = 6, + kRadlR = 7, + kBlaWLp = 16, + kBlaWRadl = 17, + kBlaNLp = 18, + kIdrWRadl = 19, + kIdrNLp = 20, + kCra = 21, + kRsvIrapVcl23 = 23, + kVps = 32, + kSps = 33, + kPps = 34, + kAud = 35, + kPrefixSei = 39, + kSuffixSei = 40, + kAP = 48, + kFU = 49 +}; + +// Slice type definition. See table 7-7 of the H265 spec +enum SliceType : uint8_t { kB = 0, kP = 1, kI = 2 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset = 0; + // Start index of NALU payload, typically type header. + size_t payload_start_offset = 0; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size = 0; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.2 of the H265 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); + +uint32_t Log2Ceiling(uint32_t value); + +} // namespace H265 +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_COMMON_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_inline.cc b/third_party/libwebrtc/common_video/h265/h265_inline.cc new file mode 100644 index 0000000000..3943a7a41e --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_inline.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_inline.h" + +#include <stdint.h> + +// Table used by WebRtcVideo_CountLeadingZeros32_NotBuiltin. For each uint32_t n +// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at +// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in +// n. +const int8_t kWebRtcVideo_CountLeadingZeros32_Table[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, +}; diff --git a/third_party/libwebrtc/common_video/h265/h265_inline.h b/third_party/libwebrtc/common_video/h265/h265_inline.h new file mode 100644 index 0000000000..85421a6706 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_inline.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in H265 parser. + +#ifndef COMMON_VIDEO_H265_H265_INLINE_H_ +#define COMMON_VIDEO_H265_H265_INLINE_H_ + +#include <stdint.h> + +#include "rtc_base/compile_assert_c.h" + +extern const int8_t kWebRtcVideo_CountLeadingZeros32_Table[64]; + +static __inline int WebRtcVideo_CountLeadingZeros32_NotBuiltin(uint32_t n) { + // Normalize n by rounding up to the nearest number that is a sequence of 0 + // bits followed by a sequence of 1 bits. This number has the same number of + // leading zeros as the original n. There are exactly 33 such values. + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + + // Multiply the modified n with a constant selected (by exhaustive search) + // such that each of the 33 possible values of n give a product whose 6 most + // significant bits are unique. Then look up the answer in the table. + return kWebRtcVideo_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcVideo_CountLeadingZeros32(uint32_t n) { +#ifdef __GNUC__ + RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t)); + return n == 0 ? 32 : __builtin_clz(n); +#else + return WebRtcVideo_CountLeadingZeros32_NotBuiltin(n); +#endif +} +#endif // COMMON_VIDEO_H265_H265_INLINE_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_pps_parser.cc b/third_party/libwebrtc/common_video/h265/h265_pps_parser.cc new file mode 100644 index 0000000000..1cc9abd794 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_pps_parser.cc @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_pps_parser.h" + +#include <memory> +#include <vector> + +#include "absl/types/optional.h" +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +#define IN_RANGE_OR_RETURN_NULL(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return absl::nullopt; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_FALSE(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return false; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!reader.Ok() || !(a)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ + << #a; \ + return absl::nullopt; \ + } \ + } while (0) + +namespace { +constexpr int kMaxNumTileColumnWidth = 19; +constexpr int kMaxNumTileRowHeight = 21; +constexpr int kMaxRefIdxActive = 15; +} // namespace + +namespace webrtc { + +// General note: this is based off the 08/2021 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +absl::optional<H265PpsParser::PpsState> H265PpsParser::ParsePps( + const uint8_t* data, + size_t length, + const H265SpsParser::SpsState* sps) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard. + return ParseInternal(H265::ParseRbsp(data, length), sps); +} + +bool H265PpsParser::ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id) { + RTC_DCHECK(pps_id); + RTC_DCHECK(sps_id); + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1.1 of the H.265 standard. + std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length); + BitstreamReader reader(unpacked_buffer); + *pps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(*pps_id, 0, 63); + *sps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(*sps_id, 0, 15); + return reader.Ok(); +} + +absl::optional<H265PpsParser::PpsState> H265PpsParser::ParseInternal( + rtc::ArrayView<const uint8_t> buffer, + const H265SpsParser::SpsState* sps) { + BitstreamReader reader(buffer); + PpsState pps; + + if (!sps) { + return absl::nullopt; + } + + if (!ParsePpsIdsInternal(reader, pps.pps_id, pps.sps_id)) { + return absl::nullopt; + } + + // dependent_slice_segments_enabled_flag: u(1) + pps.dependent_slice_segments_enabled_flag = reader.Read<bool>(); + // output_flag_present_flag: u(1) + pps.output_flag_present_flag = reader.Read<bool>(); + // num_extra_slice_header_bits: u(3) + pps.num_extra_slice_header_bits = reader.ReadBits(3); + IN_RANGE_OR_RETURN_NULL(pps.num_extra_slice_header_bits, 0, 2); + // sign_data_hiding_enabled_flag: u(1) + reader.ConsumeBits(1); + // cabac_init_present_flag: u(1) + pps.cabac_init_present_flag = reader.Read<bool>(); + // num_ref_idx_l0_default_active_minus1: ue(v) + pps.num_ref_idx_l0_default_active_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps.num_ref_idx_l0_default_active_minus1, 0, + kMaxRefIdxActive - 1); + // num_ref_idx_l1_default_active_minus1: ue(v) + pps.num_ref_idx_l1_default_active_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps.num_ref_idx_l1_default_active_minus1, 0, + kMaxRefIdxActive - 1); + // init_qp_minus26: se(v) + pps.init_qp_minus26 = reader.ReadSignedExponentialGolomb(); + pps.qp_bd_offset_y = 6 * sps->bit_depth_luma_minus8; + // Sanity-check parsed value + IN_RANGE_OR_RETURN_NULL(pps.init_qp_minus26, -(26 + pps.qp_bd_offset_y), 25); + // constrained_intra_pred_flag: u(1)log2_min_pcm_luma_coding_block_size_minus3 + reader.ConsumeBits(1); + // transform_skip_enabled_flag: u(1) + reader.ConsumeBits(1); + // cu_qp_delta_enabled_flag: u(1) + bool cu_qp_delta_enabled_flag = reader.Read<bool>(); + if (cu_qp_delta_enabled_flag) { + // diff_cu_qp_delta_depth: ue(v) + uint32_t diff_cu_qp_delta_depth = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(diff_cu_qp_delta_depth, 0, + sps->log2_diff_max_min_luma_coding_block_size); + } + // pps_cb_qp_offset: se(v) + int32_t pps_cb_qp_offset = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_cb_qp_offset, -12, 12); + // pps_cr_qp_offset: se(v) + int32_t pps_cr_qp_offset = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_cr_qp_offset, -12, 12); + // pps_slice_chroma_qp_offsets_present_flag: u(1) + reader.ConsumeBits(1); + // weighted_pred_flag: u(1) + pps.weighted_pred_flag = reader.Read<bool>(); + // weighted_bipred_flag: u(1) + pps.weighted_bipred_flag = reader.Read<bool>(); + // transquant_bypass_enabled_flag: u(1) + reader.ConsumeBits(1); + // tiles_enabled_flag: u(1) + bool tiles_enabled_flag = reader.Read<bool>(); + // entropy_coding_sync_enabled_flag: u(1) + reader.ConsumeBits(1); + if (tiles_enabled_flag) { + // num_tile_columns_minus1: ue(v) + uint32_t num_tile_columns_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(num_tile_columns_minus1, 0, + sps->pic_width_in_ctbs_y - 1); + TRUE_OR_RETURN(num_tile_columns_minus1 < kMaxNumTileColumnWidth); + // num_tile_rows_minus1: ue(v) + uint32_t num_tile_rows_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(num_tile_rows_minus1, 0, + sps->pic_height_in_ctbs_y - 1); + TRUE_OR_RETURN((num_tile_columns_minus1 != 0) || + (num_tile_rows_minus1 != 0)); + TRUE_OR_RETURN(num_tile_rows_minus1 < kMaxNumTileRowHeight); + // uniform_spacing_flag: u(1) + bool uniform_spacing_flag = reader.Read<bool>(); + if (!uniform_spacing_flag) { + int column_width_minus1[kMaxNumTileColumnWidth]; + column_width_minus1[num_tile_columns_minus1] = + sps->pic_width_in_ctbs_y - 1; + for (uint32_t i = 0; i < num_tile_columns_minus1; i++) { + // column_width_minus1: ue(v) + column_width_minus1[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL( + column_width_minus1[i], 0, + column_width_minus1[num_tile_columns_minus1] - 1); + column_width_minus1[num_tile_columns_minus1] -= + column_width_minus1[i] + 1; + } + int row_height_minus1[kMaxNumTileRowHeight]; + row_height_minus1[num_tile_rows_minus1] = sps->pic_height_in_ctbs_y - 1; + for (uint32_t i = 0; i < num_tile_rows_minus1; i++) { + // row_height_minus1: ue(v) + row_height_minus1[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(row_height_minus1[i], 0, + row_height_minus1[num_tile_rows_minus1] - 1); + row_height_minus1[num_tile_rows_minus1] -= row_height_minus1[i] + 1; + } + // loop_filter_across_tiles_enabled_flag: u(1) + reader.ConsumeBits(1); + } + } + // pps_loop_filter_across_slices_enabled_flag: u(1) + reader.ConsumeBits(1); + // deblocking_filter_control_present_flag: u(1) + bool deblocking_filter_control_present_flag = reader.Read<bool>(); + if (deblocking_filter_control_present_flag) { + // deblocking_filter_override_enabled_flag: u(1) + reader.ConsumeBits(1); + // pps_deblocking_filter_disabled_flag: u(1) + bool pps_deblocking_filter_disabled_flag = reader.Read<bool>(); + if (!pps_deblocking_filter_disabled_flag) { + // pps_beta_offset_div2: se(v) + int pps_beta_offset_div2 = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_beta_offset_div2, -6, 6); + // pps_tc_offset_div2: se(v) + int pps_tc_offset_div2 = reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(pps_tc_offset_div2, -6, 6); + } + } + // pps_scaling_list_data_present_flag: u(1) + bool pps_scaling_list_data_present_flag = 0; + pps_scaling_list_data_present_flag = reader.Read<bool>(); + if (pps_scaling_list_data_present_flag) { + // scaling_list_data() + if (!H265SpsParser::ParseScalingListData(reader)) { + return absl::nullopt; + } + } + // lists_modification_present_flag: u(1) + pps.lists_modification_present_flag = reader.Read<bool>(); + + if (!reader.Ok()) { + return absl::nullopt; + } + + return pps; +} + +bool H265PpsParser::ParsePpsIdsInternal(BitstreamReader& reader, + uint32_t& pps_id, + uint32_t& sps_id) { + // pic_parameter_set_id: ue(v) + pps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(pps_id, 0, 63); + // seq_parameter_set_id: ue(v) + sps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(sps_id, 0, 15); + return true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_pps_parser.h b/third_party/libwebrtc/common_video/h265/h265_pps_parser.h new file mode 100644 index 0000000000..625869d8d5 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_pps_parser.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_PPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_PPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "common_video/h265/h265_sps_parser.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H265 NALU. +class H265PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool dependent_slice_segments_enabled_flag = false; + bool cabac_init_present_flag = false; + bool output_flag_present_flag = false; + uint32_t num_extra_slice_header_bits = 0; + uint32_t num_ref_idx_l0_default_active_minus1 = 0; + uint32_t num_ref_idx_l1_default_active_minus1 = 0; + int init_qp_minus26 = 0; + bool weighted_pred_flag = false; + bool weighted_bipred_flag = false; + bool lists_modification_present_flag = false; + uint32_t pps_id = 0; + uint32_t sps_id = 0; + int qp_bd_offset_y = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static absl::optional<PpsState> ParsePps(const uint8_t* data, + size_t length, + const H265SpsParser::SpsState* sps); + + static bool ParsePpsIds(const uint8_t* data, + size_t length, + uint32_t* pps_id, + uint32_t* sps_id); + + protected: + // Parse the PPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional<PpsState> ParseInternal( + rtc::ArrayView<const uint8_t> buffer, + const H265SpsParser::SpsState* sps); + static bool ParsePpsIdsInternal(BitstreamReader& reader, + uint32_t& pps_id, + uint32_t& sps_id); +}; + +} // namespace webrtc + +#endif // COMMON_VIDEO_H265_H265_PPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_pps_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_pps_parser_unittest.cc new file mode 100644 index 0000000000..d91fc1a55c --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_pps_parser_unittest.cc @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_pps_parser.h" + +#include <algorithm> + +#include "common_video/h265/h265_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr size_t kPpsBufferMaxSize = 256; +constexpr uint32_t kIgnored = 0; +} // namespace + +void WritePps(const H265PpsParser::PpsState& pps, + bool cu_qp_delta_enabled_flag, + bool tiles_enabled_flag, + bool uniform_spacing_flag, + bool deblocking_filter_control_present_flag, + bool pps_deblocking_filter_disabled_flag, + bool pps_scaling_list_data_present_flag, + bool scaling_list_pred_mode_flag, + rtc::Buffer* out_buffer) { + uint8_t data[kPpsBufferMaxSize] = {0}; + rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize); + + // pic_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.pps_id); + // seq_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(pps.sps_id); + // dependent_slice_segments_enabled_flag: u(1) + bit_buffer.WriteBits(pps.dependent_slice_segments_enabled_flag, 1); + // output_flag_present_flag: u(1) + bit_buffer.WriteBits(pps.output_flag_present_flag, 1); + // num_extra_slice_header_bits: u(3) + bit_buffer.WriteBits(pps.num_extra_slice_header_bits, 3); + // sign_data_hiding_enabled_flag: u(1) + bit_buffer.WriteBits(1, 1); + // cabac_init_present_flag: u(1) + bit_buffer.WriteBits(pps.cabac_init_present_flag, 1); + // num_ref_idx_l0_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(pps.num_ref_idx_l0_default_active_minus1); + // num_ref_idx_l1_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(pps.num_ref_idx_l1_default_active_minus1); + // init_qp_minus26: se(v) + bit_buffer.WriteSignedExponentialGolomb(pps.init_qp_minus26); + // constrained_intra_pred_flag: u(1) + bit_buffer.WriteBits(0, 1); + // transform_skip_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + // cu_qp_delta_enabled_flag: u(1) + bit_buffer.WriteBits(cu_qp_delta_enabled_flag, 1); + if (cu_qp_delta_enabled_flag) { + // diff_cu_qp_delta_depth: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + // pps_cb_qp_offset: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + // pps_cr_qp_offset: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + // pps_slice_chroma_qp_offsets_present_flag: u(1) + bit_buffer.WriteBits(0, 1); + // weighted_pred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_pred_flag, 1); + // weighted_bipred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_bipred_flag, 1); + // transquant_bypass_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + // tiles_enabled_flag: u(1) + bit_buffer.WriteBits(tiles_enabled_flag, 1); + // entropy_coding_sync_enabled_flag: u(1) + bit_buffer.WriteBits(1, 1); + if (tiles_enabled_flag) { + // num_tile_columns_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(6); + // num_tile_rows_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(1); + // uniform_spacing_flag: u(1) + bit_buffer.WriteBits(0, 1); + if (!uniform_spacing_flag) { + for (uint32_t i = 0; i < 6; i++) { + // column_width_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + for (uint32_t i = 0; i < 1; i++) { + // row_height_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + // loop_filter_across_tiles_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + } + } + // pps_loop_filter_across_slices_enabled_flag: u(1) + bit_buffer.WriteBits(1, 1); + // deblocking_filter_control_present_flag: u(1) + bit_buffer.WriteBits(deblocking_filter_control_present_flag, 1); + if (deblocking_filter_control_present_flag) { + // deblocking_filter_override_enabled_flag: u(1) + bit_buffer.WriteBits(0, 1); + // pps_deblocking_filter_disabled_flag: u(1) + bit_buffer.WriteBits(pps_deblocking_filter_disabled_flag, 1); + if (!pps_deblocking_filter_disabled_flag) { + // pps_beta_offset_div2: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + // pps_tc_offset_div2: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + } + } + // pps_scaling_list_data_present_flag: u(1) + bit_buffer.WriteBits(pps_scaling_list_data_present_flag, 1); + if (pps_scaling_list_data_present_flag) { + for (int size_id = 0; size_id < 4; size_id++) { + for (int matrix_id = 0; matrix_id < 6; + matrix_id += (size_id == 3) ? 3 : 1) { + // scaling_list_pred_mode_flag: u(1) + bit_buffer.WriteBits(scaling_list_pred_mode_flag, 1); + if (!scaling_list_pred_mode_flag) { + // scaling_list_pred_matrix_id_delta: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } else { + uint32_t coef_num = std::min(64, 1 << (4 + (size_id << 1))); + if (size_id > 1) { + // scaling_list_dc_coef_minus8: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + } + for (uint32_t i = 0; i < coef_num; i++) { + // scaling_list_delta_coef: se(v) + bit_buffer.WriteSignedExponentialGolomb(kIgnored); + } + } + } + } + } + // lists_modification_present_flag: u(1) + bit_buffer.WriteBits(pps.lists_modification_present_flag, 1); + // log2_parallel_merge_level_minus2: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // slice_segment_header_extension_present_flag: u(1) + bit_buffer.WriteBits(0, 1); + + size_t byte_offset; + size_t bit_offset; + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + bit_buffer.WriteBits(0, 8 - bit_offset); + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + } + + H265::WriteRbsp(data, byte_offset, out_buffer); +} + +class H265PpsParserTest : public ::testing::Test { + public: + H265PpsParserTest() {} + ~H265PpsParserTest() override {} + + void RunTest() { + VerifyParsing(generated_pps_, false, false, false, false, false, false, + false); + // Enable flags to cover more path + VerifyParsing(generated_pps_, true, true, false, true, true, true, false); + } + + void VerifyParsing(const H265PpsParser::PpsState& pps, + bool cu_qp_delta_enabled_flag, + bool tiles_enabled_flag, + bool uniform_spacing_flag, + bool deblocking_filter_control_present_flag, + bool pps_deblocking_filter_disabled_flag, + bool pps_scaling_list_data_present_flag, + bool scaling_list_pred_mode_flag) { + buffer_.Clear(); + WritePps(pps, cu_qp_delta_enabled_flag, tiles_enabled_flag, + uniform_spacing_flag, deblocking_filter_control_present_flag, + pps_deblocking_filter_disabled_flag, + pps_scaling_list_data_present_flag, scaling_list_pred_mode_flag, + &buffer_); + const uint8_t sps_buffer[] = { + 0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, 0x08, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x5d, 0xb0, 0x02, 0x80, 0x80, 0x2d, + 0x16, 0x59, 0x59, 0xa4, 0x93, 0x2b, 0x80, 0x40, 0x00, 0x00, + 0x03, 0x00, 0x40, 0x00, 0x00, 0x07, 0x82}; + H265SpsParser::SpsState parsed_sps = + H265SpsParser::ParseSps(sps_buffer, arraysize(sps_buffer)).value(); + parsed_pps_ = + H265PpsParser::ParsePps(buffer_.data(), buffer_.size(), &parsed_sps); + ASSERT_TRUE(parsed_pps_); + EXPECT_EQ(pps.dependent_slice_segments_enabled_flag, + parsed_pps_->dependent_slice_segments_enabled_flag); + EXPECT_EQ(pps.cabac_init_present_flag, + parsed_pps_->cabac_init_present_flag); + EXPECT_EQ(pps.output_flag_present_flag, + parsed_pps_->output_flag_present_flag); + EXPECT_EQ(pps.num_extra_slice_header_bits, + parsed_pps_->num_extra_slice_header_bits); + EXPECT_EQ(pps.num_ref_idx_l0_default_active_minus1, + parsed_pps_->num_ref_idx_l0_default_active_minus1); + EXPECT_EQ(pps.num_ref_idx_l1_default_active_minus1, + parsed_pps_->num_ref_idx_l1_default_active_minus1); + EXPECT_EQ(pps.init_qp_minus26, parsed_pps_->init_qp_minus26); + EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag); + EXPECT_EQ(pps.weighted_bipred_flag, parsed_pps_->weighted_bipred_flag); + EXPECT_EQ(pps.lists_modification_present_flag, + parsed_pps_->lists_modification_present_flag); + EXPECT_EQ(pps.pps_id, parsed_pps_->pps_id); + EXPECT_EQ(pps.sps_id, parsed_pps_->sps_id); + } + + H265PpsParser::PpsState generated_pps_; + rtc::Buffer buffer_; + absl::optional<H265PpsParser::PpsState> parsed_pps_; + absl::optional<H265SpsParser::SpsState> parsed_sps_; +}; + +TEST_F(H265PpsParserTest, ZeroPps) { + RunTest(); +} + +TEST_F(H265PpsParserTest, MaxPps) { + generated_pps_.dependent_slice_segments_enabled_flag = true; + generated_pps_.init_qp_minus26 = 25; + generated_pps_.num_extra_slice_header_bits = 1; // 1 bit value. + generated_pps_.weighted_bipred_flag = true; + generated_pps_.weighted_pred_flag = true; + generated_pps_.cabac_init_present_flag = true; + generated_pps_.pps_id = 2; + generated_pps_.sps_id = 1; + RunTest(); + + generated_pps_.init_qp_minus26 = -25; + RunTest(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_sps_parser.cc b/third_party/libwebrtc/common_video/h265/h265_sps_parser.cc new file mode 100644 index 0000000000..a2da4b9b7b --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_sps_parser.cc @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_sps_parser.h" + +#include <algorithm> +#include <memory> +#include <vector> + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/logging.h" + +#define IN_RANGE_OR_RETURN_NULL(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return absl::nullopt; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN_FALSE(val, min, max) \ + do { \ + if (!reader.Ok() || (val) < (min) || (val) > (max)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " #val \ + " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return false; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!reader.Ok() || !(a)) { \ + RTC_LOG(LS_WARNING) << "Error in stream: invalid value, expected " \ + << #a; \ + return absl::nullopt; \ + } \ + } while (0) + +namespace { +using OptionalSps = absl::optional<webrtc::H265SpsParser::SpsState>; +using OptionalShortTermRefPicSet = + absl::optional<webrtc::H265SpsParser::ShortTermRefPicSet>; +using OptionalProfileTierLevel = + absl::optional<webrtc::H265SpsParser::ProfileTierLevel>; + +constexpr int kMaxNumSizeIds = 4; +constexpr int kMaxNumMatrixIds = 6; +constexpr int kMaxNumCoefs = 64; +} // namespace + +namespace webrtc { + +H265SpsParser::ShortTermRefPicSet::ShortTermRefPicSet() = default; + +H265SpsParser::ProfileTierLevel::ProfileTierLevel() = default; + +int H265SpsParser::GetMaxLumaPs(int general_level_idc) { + // From Table A.8 - General tier and level limits. + // |general_level_idc| is 30x the actual level. + if (general_level_idc <= 30) // level 1 + return 36864; + if (general_level_idc <= 60) // level 2 + return 122880; + if (general_level_idc <= 63) // level 2.1 + return 245760; + if (general_level_idc <= 90) // level 3 + return 552960; + if (general_level_idc <= 93) // level 3.1 + return 983040; + if (general_level_idc <= 123) // level 4, 4.1 + return 2228224; + if (general_level_idc <= 156) // level 5, 5.1, 5.2 + return 8912896; + // level 6, 6.1, 6.2 - beyond that there's no actual limit. + return 35651584; +} + +size_t H265SpsParser::GetDpbMaxPicBuf(int general_profile_idc) { + // From A.4.2 - Profile-specific level limits for the video profiles. + // If sps_curr_pic_ref_enabled_flag is required to be zero, than this is 6 + // otherwise it is 7. + return (general_profile_idc >= kProfileIdcMain && + general_profile_idc <= kProfileIdcHighThroughput) + ? 6 + : 7; +} + +// General note: this is based off the 08/2021 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse SPS state from the supplied buffer. +absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSps( + const uint8_t* data, + size_t length) { + RTC_DCHECK(data); + return ParseSpsInternal(H265::ParseRbsp(data, length)); +} + +bool H265SpsParser::ParseScalingListData(BitstreamReader& reader) { + int32_t scaling_list_dc_coef_minus8[kMaxNumSizeIds][kMaxNumMatrixIds] = {}; + for (int size_id = 0; size_id < kMaxNumSizeIds; size_id++) { + for (int matrix_id = 0; matrix_id < kMaxNumMatrixIds; + matrix_id += (size_id == 3) ? 3 : 1) { + // scaling_list_pred_mode_flag: u(1) + bool scaling_list_pred_mode_flag = reader.Read<bool>(); + if (!scaling_list_pred_mode_flag) { + // scaling_list_pred_matrix_id_delta: ue(v) + int scaling_list_pred_matrix_id_delta = reader.ReadExponentialGolomb(); + if (size_id <= 2) { + IN_RANGE_OR_RETURN_FALSE(scaling_list_pred_matrix_id_delta, 0, + matrix_id); + } else { // size_id == 3 + IN_RANGE_OR_RETURN_FALSE(scaling_list_pred_matrix_id_delta, 0, + matrix_id / 3); + } + } else { + uint32_t coef_num = std::min(kMaxNumCoefs, 1 << (4 + (size_id << 1))); + if (size_id > 1) { + // scaling_list_dc_coef_minus8: se(v) + scaling_list_dc_coef_minus8[size_id - 2][matrix_id] = + reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE( + scaling_list_dc_coef_minus8[size_id - 2][matrix_id], -7, 247); + } + for (uint32_t i = 0; i < coef_num; i++) { + // scaling_list_delta_coef: se(v) + int32_t scaling_list_delta_coef = + reader.ReadSignedExponentialGolomb(); + IN_RANGE_OR_RETURN_FALSE(scaling_list_delta_coef, -128, 127); + } + } + } + } + return reader.Ok(); +} + +absl::optional<H265SpsParser::ShortTermRefPicSet> +H265SpsParser::ParseShortTermRefPicSet( + uint32_t st_rps_idx, + uint32_t num_short_term_ref_pic_sets, + const std::vector<H265SpsParser::ShortTermRefPicSet>& + short_term_ref_pic_set, + uint32_t sps_max_dec_pic_buffering_minus1, + BitstreamReader& reader) { + H265SpsParser::ShortTermRefPicSet st_ref_pic_set; + + bool inter_ref_pic_set_prediction_flag = false; + if (st_rps_idx != 0) { + // inter_ref_pic_set_prediction_flag: u(1) + inter_ref_pic_set_prediction_flag = reader.Read<bool>(); + } + + if (inter_ref_pic_set_prediction_flag) { + uint32_t delta_idx_minus1 = 0; + if (st_rps_idx == num_short_term_ref_pic_sets) { + // delta_idx_minus1: ue(v) + delta_idx_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(delta_idx_minus1, 0, st_rps_idx - 1); + } + // delta_rps_sign: u(1) + int delta_rps_sign = reader.ReadBits(1); + // abs_delta_rps_minus1: ue(v) + int abs_delta_rps_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(abs_delta_rps_minus1, 0, 0x7FFF); + int delta_rps = (1 - 2 * delta_rps_sign) * (abs_delta_rps_minus1 + 1); + uint32_t ref_rps_idx = st_rps_idx - (delta_idx_minus1 + 1); + uint32_t num_delta_pocs = + short_term_ref_pic_set[ref_rps_idx].num_delta_pocs; + IN_RANGE_OR_RETURN_NULL(num_delta_pocs, 0, kMaxShortTermRefPicSets); + const ShortTermRefPicSet& ref_set = short_term_ref_pic_set[ref_rps_idx]; + bool used_by_curr_pic_flag[kMaxShortTermRefPicSets] = {}; + bool use_delta_flag[kMaxShortTermRefPicSets] = {}; + // 7.4.8 - use_delta_flag defaults to 1 if not present. + std::fill_n(use_delta_flag, kMaxShortTermRefPicSets, true); + + for (uint32_t j = 0; j <= num_delta_pocs; j++) { + // used_by_curr_pic_flag: u(1) + used_by_curr_pic_flag[j] = reader.Read<bool>(); + if (!used_by_curr_pic_flag[j]) { + // use_delta_flag: u(1) + use_delta_flag[j] = reader.Read<bool>(); + } + } + + // Calculate delta_poc_s{0,1}, used_by_curr_pic_s{0,1}, num_negative_pics + // and num_positive_pics. + // Equation 7-61 + int i = 0; + IN_RANGE_OR_RETURN_NULL( + ref_set.num_negative_pics + ref_set.num_positive_pics, 0, + kMaxShortTermRefPicSets); + for (int j = ref_set.num_positive_pics - 1; j >= 0; --j) { + int d_poc = ref_set.delta_poc_s1[j] + delta_rps; + if (d_poc < 0 && use_delta_flag[ref_set.num_negative_pics + j]) { + st_ref_pic_set.delta_poc_s0[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s0[i++] = + used_by_curr_pic_flag[ref_set.num_negative_pics + j]; + } + } + if (delta_rps < 0 && use_delta_flag[ref_set.num_delta_pocs]) { + st_ref_pic_set.delta_poc_s0[i] = delta_rps; + st_ref_pic_set.used_by_curr_pic_s0[i++] = + used_by_curr_pic_flag[ref_set.num_delta_pocs]; + } + for (uint32_t j = 0; j < ref_set.num_negative_pics; ++j) { + int d_poc = ref_set.delta_poc_s0[j] + delta_rps; + if (d_poc < 0 && use_delta_flag[j]) { + st_ref_pic_set.delta_poc_s0[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s0[i++] = used_by_curr_pic_flag[j]; + } + } + st_ref_pic_set.num_negative_pics = i; + // Equation 7-62 + i = 0; + for (int j = ref_set.num_negative_pics - 1; j >= 0; --j) { + int d_poc = ref_set.delta_poc_s0[j] + delta_rps; + if (d_poc > 0 && use_delta_flag[j]) { + st_ref_pic_set.delta_poc_s1[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s1[i++] = used_by_curr_pic_flag[j]; + } + } + if (delta_rps > 0 && use_delta_flag[ref_set.num_delta_pocs]) { + st_ref_pic_set.delta_poc_s1[i] = delta_rps; + st_ref_pic_set.used_by_curr_pic_s1[i++] = + used_by_curr_pic_flag[ref_set.num_delta_pocs]; + } + for (uint32_t j = 0; j < ref_set.num_positive_pics; ++j) { + int d_poc = ref_set.delta_poc_s1[j] + delta_rps; + if (d_poc > 0 && use_delta_flag[ref_set.num_negative_pics + j]) { + st_ref_pic_set.delta_poc_s1[i] = d_poc; + st_ref_pic_set.used_by_curr_pic_s1[i++] = + used_by_curr_pic_flag[ref_set.num_negative_pics + j]; + } + } + st_ref_pic_set.num_positive_pics = i; + IN_RANGE_OR_RETURN_NULL(st_ref_pic_set.num_negative_pics, 0, + sps_max_dec_pic_buffering_minus1); + IN_RANGE_OR_RETURN_NULL( + st_ref_pic_set.num_positive_pics, 0, + sps_max_dec_pic_buffering_minus1 - st_ref_pic_set.num_negative_pics); + + } else { + // num_negative_pics: ue(v) + st_ref_pic_set.num_negative_pics = reader.ReadExponentialGolomb(); + // num_positive_pics: ue(v) + st_ref_pic_set.num_positive_pics = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(st_ref_pic_set.num_negative_pics, 0, + sps_max_dec_pic_buffering_minus1); + IN_RANGE_OR_RETURN_NULL( + st_ref_pic_set.num_positive_pics, 0, + sps_max_dec_pic_buffering_minus1 - st_ref_pic_set.num_negative_pics); + + for (uint32_t i = 0; i < st_ref_pic_set.num_negative_pics; i++) { + // delta_poc_s0_minus1: ue(v) + int delta_poc_s0_minus1 = 0; + delta_poc_s0_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(delta_poc_s0_minus1, 0, 0x7FFF); + if (i == 0) { + st_ref_pic_set.delta_poc_s0[i] = -(delta_poc_s0_minus1 + 1); + } else { + st_ref_pic_set.delta_poc_s0[i] = + st_ref_pic_set.delta_poc_s0[i - 1] - (delta_poc_s0_minus1 + 1); + } + // used_by_curr_pic_s0_flag: u(1) + st_ref_pic_set.used_by_curr_pic_s0[i] = reader.Read<bool>(); + } + + for (uint32_t i = 0; i < st_ref_pic_set.num_positive_pics; i++) { + // delta_poc_s1_minus1: ue(v) + int delta_poc_s1_minus1 = 0; + delta_poc_s1_minus1 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(delta_poc_s1_minus1, 0, 0x7FFF); + if (i == 0) { + st_ref_pic_set.delta_poc_s1[i] = delta_poc_s1_minus1 + 1; + } else { + st_ref_pic_set.delta_poc_s1[i] = + st_ref_pic_set.delta_poc_s1[i - 1] + delta_poc_s1_minus1 + 1; + } + // used_by_curr_pic_s1_flag: u(1) + st_ref_pic_set.used_by_curr_pic_s1[i] = reader.Read<bool>(); + } + } + + st_ref_pic_set.num_delta_pocs = + st_ref_pic_set.num_negative_pics + st_ref_pic_set.num_positive_pics; + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalShortTermRefPicSet(st_ref_pic_set); +} + +absl::optional<H265SpsParser::ProfileTierLevel> +H265SpsParser::ParseProfileTierLevel(bool profile_present, + int max_num_sub_layers_minus1, + BitstreamReader& reader) { + H265SpsParser::ProfileTierLevel pf_tier_level; + // 7.4.4 + if (profile_present) { + int general_profile_space; + general_profile_space = reader.ReadBits(2); + TRUE_OR_RETURN(general_profile_space == 0); + // general_tier_flag or reserved 0: u(1) + reader.ConsumeBits(1); + pf_tier_level.general_profile_idc = reader.ReadBits(5); + IN_RANGE_OR_RETURN_NULL(pf_tier_level.general_profile_idc, 0, 11); + uint16_t general_profile_compatibility_flag_high16 = reader.ReadBits(16); + uint16_t general_profile_compatibility_flag_low16 = reader.ReadBits(16); + pf_tier_level.general_profile_compatibility_flags = + (general_profile_compatibility_flag_high16 << 16) + + general_profile_compatibility_flag_low16; + pf_tier_level.general_progressive_source_flag = reader.ReadBits(1); + pf_tier_level.general_interlaced_source_flag = reader.ReadBits(1); + if (!reader.Ok() || (!pf_tier_level.general_progressive_source_flag && + pf_tier_level.general_interlaced_source_flag)) { + RTC_LOG(LS_WARNING) << "Interlaced streams not supported"; + return absl::nullopt; + } + pf_tier_level.general_non_packed_constraint_flag = reader.ReadBits(1); + pf_tier_level.general_frame_only_constraint_flag = reader.ReadBits(1); + // general_reserved_zero_7bits + reader.ConsumeBits(7); + pf_tier_level.general_one_picture_only_constraint_flag = reader.ReadBits(1); + // general_reserved_zero_35bits + reader.ConsumeBits(35); + // general_inbld_flag + reader.ConsumeBits(1); + } + pf_tier_level.general_level_idc = reader.ReadBits(8); + bool sub_layer_profile_present_flag[8] = {}; + bool sub_layer_level_present_flag[8] = {}; + for (int i = 0; i < max_num_sub_layers_minus1; ++i) { + sub_layer_profile_present_flag[i] = reader.ReadBits(1); + sub_layer_level_present_flag[i] = reader.ReadBits(1); + } + if (max_num_sub_layers_minus1 > 0) { + for (int i = max_num_sub_layers_minus1; i < 8; i++) { + reader.ConsumeBits(2); + } + } + for (int i = 0; i < max_num_sub_layers_minus1; i++) { + if (sub_layer_profile_present_flag[i]) { + // sub_layer_profile_space + reader.ConsumeBits(2); + // sub_layer_tier_flag + reader.ConsumeBits(1); + // sub_layer_profile_idc + reader.ConsumeBits(5); + // sub_layer_profile_compatibility_flag + reader.ConsumeBits(32); + // sub_layer_{progressive,interlaced}_source_flag + reader.ConsumeBits(2); + // Ignore sub_layer_non_packed_constraint_flag and + // sub_layer_frame_only_constraint_flag. + reader.ConsumeBits(2); + // Skip the compatibility flags, they are always 43 bits. + reader.ConsumeBits(43); + // sub_layer_inbld_flag + reader.ConsumeBits(1); + } + if (sub_layer_level_present_flag[i]) { + // sub_layer_level_idc + reader.ConsumeBits(8); + } + } + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalProfileTierLevel(pf_tier_level); +} + +absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSpsInternal( + rtc::ArrayView<const uint8_t> buffer) { + BitstreamReader reader(buffer); + + // Now, we need to use a bit buffer to parse through the actual H265 SPS + // format. See Section 7.3.2.2.1 ("General sequence parameter set data + // syntax") of the H.265 standard for a complete description. + // Since we only care about resolution, we ignore the majority of fields, but + // we still have to actively parse through a lot of the data, since many of + // the fields have variable size. + // We're particularly interested in: + // chroma_format_idc -> affects crop units + // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). + // frame_crop_*_offset -> crop information + SpsState sps; + + // sps_video_parameter_set_id: u(4) + uint32_t sps_video_parameter_set_id = 0; + sps_video_parameter_set_id = reader.ReadBits(4); + IN_RANGE_OR_RETURN_NULL(sps_video_parameter_set_id, 0, 15); + + // sps_max_sub_layers_minus1: u(3) + uint32_t sps_max_sub_layers_minus1 = 0; + sps_max_sub_layers_minus1 = reader.ReadBits(3); + IN_RANGE_OR_RETURN_NULL(sps_max_sub_layers_minus1, 0, kMaxSubLayers - 1); + sps.sps_max_sub_layers_minus1 = sps_max_sub_layers_minus1; + // sps_temporal_id_nesting_flag: u(1) + reader.ConsumeBits(1); + // profile_tier_level(1, sps_max_sub_layers_minus1). + OptionalProfileTierLevel profile_tier_level = + ParseProfileTierLevel(true, sps.sps_max_sub_layers_minus1, reader); + if (!profile_tier_level) { + return absl::nullopt; + } + // sps_seq_parameter_set_id: ue(v) + sps.sps_id = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.sps_id, 0, 15); + // chrome_format_idc: ue(v) + sps.chroma_format_idc = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.chroma_format_idc, 0, 3); + if (sps.chroma_format_idc == 3) { + // seperate_colour_plane_flag: u(1) + sps.separate_colour_plane_flag = reader.Read<bool>(); + } + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + // pic_width_in_luma_samples: ue(v) + pic_width_in_luma_samples = reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(pic_width_in_luma_samples != 0); + // pic_height_in_luma_samples: ue(v) + pic_height_in_luma_samples = reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(pic_height_in_luma_samples != 0); + + // Equation A-2: Calculate max_dpb_size. + uint32_t max_luma_ps = GetMaxLumaPs(profile_tier_level->general_level_idc); + uint32_t max_dpb_size = 0; + uint32_t pic_size_in_samples_y = pic_height_in_luma_samples; + pic_size_in_samples_y *= pic_width_in_luma_samples; + size_t max_dpb_pic_buf = + GetDpbMaxPicBuf(profile_tier_level->general_profile_idc); + if (pic_size_in_samples_y <= (max_luma_ps >> 2)) + max_dpb_size = std::min(4 * max_dpb_pic_buf, size_t{16}); + else if (pic_size_in_samples_y <= (max_luma_ps >> 1)) + max_dpb_size = std::min(2 * max_dpb_pic_buf, size_t{16}); + else if (pic_size_in_samples_y <= ((3 * max_luma_ps) >> 2)) + max_dpb_size = std::min((4 * max_dpb_pic_buf) / 3, size_t{16}); + else + max_dpb_size = max_dpb_pic_buf; + + // conformance_window_flag: u(1) + bool conformance_window_flag = reader.Read<bool>(); + + uint32_t conf_win_left_offset = 0; + uint32_t conf_win_right_offset = 0; + uint32_t conf_win_top_offset = 0; + uint32_t conf_win_bottom_offset = 0; + int sub_width_c = + ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) && + (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + int sub_height_c = + (1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) ? 2 + : 1; + if (conformance_window_flag) { + // conf_win_left_offset: ue(v) + conf_win_left_offset = reader.ReadExponentialGolomb(); + // conf_win_right_offset: ue(v) + conf_win_right_offset = reader.ReadExponentialGolomb(); + // conf_win_top_offset: ue(v) + conf_win_top_offset = reader.ReadExponentialGolomb(); + // conf_win_bottom_offset: ue(v) + conf_win_bottom_offset = reader.ReadExponentialGolomb(); + uint32_t width_crop = conf_win_left_offset; + width_crop += conf_win_right_offset; + width_crop *= sub_width_c; + TRUE_OR_RETURN(width_crop < pic_width_in_luma_samples); + uint32_t height_crop = conf_win_top_offset; + height_crop += conf_win_bottom_offset; + height_crop *= sub_height_c; + TRUE_OR_RETURN(height_crop < pic_height_in_luma_samples); + } + + // bit_depth_luma_minus8: ue(v) + sps.bit_depth_luma_minus8 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.bit_depth_luma_minus8, 0, 8); + // bit_depth_chroma_minus8: ue(v) + uint32_t bit_depth_chroma_minus8 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(bit_depth_chroma_minus8, 0, 8); + // log2_max_pic_order_cnt_lsb_minus4: ue(v) + sps.log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.log2_max_pic_order_cnt_lsb_minus4, 0, 12); + uint32_t sps_sub_layer_ordering_info_present_flag = 0; + // sps_sub_layer_ordering_info_present_flag: u(1) + sps_sub_layer_ordering_info_present_flag = reader.Read<bool>(); + uint32_t sps_max_num_reorder_pics[kMaxSubLayers] = {}; + for (uint32_t i = (sps_sub_layer_ordering_info_present_flag != 0) + ? 0 + : sps_max_sub_layers_minus1; + i <= sps_max_sub_layers_minus1; i++) { + // sps_max_dec_pic_buffering_minus1: ue(v) + sps.sps_max_dec_pic_buffering_minus1[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.sps_max_dec_pic_buffering_minus1[i], 0, + max_dpb_size - 1); + // sps_max_num_reorder_pics: ue(v) + sps_max_num_reorder_pics[i] = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps_max_num_reorder_pics[i], 0, + sps.sps_max_dec_pic_buffering_minus1[i]); + if (i > 0) { + TRUE_OR_RETURN(sps.sps_max_dec_pic_buffering_minus1[i] >= + sps.sps_max_dec_pic_buffering_minus1[i - 1]); + TRUE_OR_RETURN(sps_max_num_reorder_pics[i] >= + sps_max_num_reorder_pics[i - 1]); + } + // sps_max_latency_increase_plus1: ue(v) + reader.ReadExponentialGolomb(); + } + if (!sps_sub_layer_ordering_info_present_flag) { + // Fill in the default values for the other sublayers. + for (uint32_t i = 0; i < sps_max_sub_layers_minus1; ++i) { + sps.sps_max_dec_pic_buffering_minus1[i] = + sps.sps_max_dec_pic_buffering_minus1[sps_max_sub_layers_minus1]; + } + } + // log2_min_luma_coding_block_size_minus3: ue(v) + sps.log2_min_luma_coding_block_size_minus3 = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.log2_min_luma_coding_block_size_minus3, 0, 27); + // log2_diff_max_min_luma_coding_block_size: ue(v) + sps.log2_diff_max_min_luma_coding_block_size = reader.ReadExponentialGolomb(); + int min_cb_log2_size_y = sps.log2_min_luma_coding_block_size_minus3 + 3; + int ctb_log2_size_y = min_cb_log2_size_y; + ctb_log2_size_y += sps.log2_diff_max_min_luma_coding_block_size; + IN_RANGE_OR_RETURN_NULL(ctb_log2_size_y, 0, 30); + int min_cb_size_y = 1 << min_cb_log2_size_y; + int ctb_size_y = 1 << ctb_log2_size_y; + sps.pic_width_in_ctbs_y = + std::ceil(static_cast<float>(pic_width_in_luma_samples) / ctb_size_y); + sps.pic_height_in_ctbs_y = + std::ceil(static_cast<float>(pic_height_in_luma_samples) / ctb_size_y); + TRUE_OR_RETURN(pic_width_in_luma_samples % min_cb_size_y == 0); + TRUE_OR_RETURN(pic_height_in_luma_samples % min_cb_size_y == 0); + // log2_min_luma_transform_block_size_minus2: ue(v) + int log2_min_luma_transform_block_size_minus2 = + reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(log2_min_luma_transform_block_size_minus2, 0, + min_cb_log2_size_y - 3); + int min_tb_log2_size_y = log2_min_luma_transform_block_size_minus2 + 2; + // log2_diff_max_min_luma_transform_block_size: ue(v) + int log2_diff_max_min_luma_transform_block_size = + reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(log2_diff_max_min_luma_transform_block_size <= + std::min(ctb_log2_size_y, 5) - min_tb_log2_size_y); + // max_transform_hierarchy_depth_inter: ue(v) + int max_transform_hierarchy_depth_inter = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(max_transform_hierarchy_depth_inter, 0, + ctb_log2_size_y - min_tb_log2_size_y); + // max_transform_hierarchy_depth_intra: ue(v) + int max_transform_hierarchy_depth_intra = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(max_transform_hierarchy_depth_intra, 0, + ctb_log2_size_y - min_tb_log2_size_y); + // scaling_list_enabled_flag: u(1) + bool scaling_list_enabled_flag = reader.Read<bool>(); + if (scaling_list_enabled_flag) { + // sps_scaling_list_data_present_flag: u(1) + bool sps_scaling_list_data_present_flag = reader.Read<bool>(); + if (sps_scaling_list_data_present_flag) { + // scaling_list_data() + if (!ParseScalingListData(reader)) { + return absl::nullopt; + } + } + } + + // amp_enabled_flag: u(1) + reader.ConsumeBits(1); + // sample_adaptive_offset_enabled_flag: u(1) + sps.sample_adaptive_offset_enabled_flag = reader.Read<bool>(); + // pcm_enabled_flag: u(1) + bool pcm_enabled_flag = reader.Read<bool>(); + if (pcm_enabled_flag) { + // pcm_sample_bit_depth_luma_minus1: u(4) + reader.ConsumeBits(4); + // pcm_sample_bit_depth_chroma_minus1: u(4) + reader.ConsumeBits(4); + // log2_min_pcm_luma_coding_block_size_minus3: ue(v) + int log2_min_pcm_luma_coding_block_size_minus3 = + reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(log2_min_pcm_luma_coding_block_size_minus3, 0, 2); + int log2_min_ipcm_cb_size_y = + log2_min_pcm_luma_coding_block_size_minus3 + 3; + IN_RANGE_OR_RETURN_NULL(log2_min_ipcm_cb_size_y, + std::min(min_cb_log2_size_y, 5), + std::min(ctb_log2_size_y, 5)); + // log2_diff_max_min_pcm_luma_coding_block_size: ue(v) + int log2_diff_max_min_pcm_luma_coding_block_size = + reader.ReadExponentialGolomb(); + TRUE_OR_RETURN(log2_diff_max_min_pcm_luma_coding_block_size <= + std::min(ctb_log2_size_y, 5) - log2_min_ipcm_cb_size_y); + // pcm_loop_filter_disabled_flag: u(1) + reader.ConsumeBits(1); + } + + // num_short_term_ref_pic_sets: ue(v) + sps.num_short_term_ref_pic_sets = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.num_short_term_ref_pic_sets, 0, + kMaxShortTermRefPicSets); + sps.short_term_ref_pic_set.resize(sps.num_short_term_ref_pic_sets); + for (uint32_t st_rps_idx = 0; st_rps_idx < sps.num_short_term_ref_pic_sets; + st_rps_idx++) { + uint32_t sps_max_dec_pic_buffering_minus1 = + sps.sps_max_dec_pic_buffering_minus1[sps.sps_max_sub_layers_minus1]; + // st_ref_pic_set() + OptionalShortTermRefPicSet ref_pic_set = ParseShortTermRefPicSet( + st_rps_idx, sps.num_short_term_ref_pic_sets, sps.short_term_ref_pic_set, + sps_max_dec_pic_buffering_minus1, reader); + if (ref_pic_set) { + sps.short_term_ref_pic_set[st_rps_idx] = *ref_pic_set; + } else { + return absl::nullopt; + } + } + + // long_term_ref_pics_present_flag: u(1) + sps.long_term_ref_pics_present_flag = reader.Read<bool>(); + if (sps.long_term_ref_pics_present_flag) { + // num_long_term_ref_pics_sps: ue(v) + sps.num_long_term_ref_pics_sps = reader.ReadExponentialGolomb(); + IN_RANGE_OR_RETURN_NULL(sps.num_long_term_ref_pics_sps, 0, + kMaxLongTermRefPicSets); + sps.used_by_curr_pic_lt_sps_flag.resize(sps.num_long_term_ref_pics_sps, 0); + for (uint32_t i = 0; i < sps.num_long_term_ref_pics_sps; i++) { + // lt_ref_pic_poc_lsb_sps: u(v) + uint32_t lt_ref_pic_poc_lsb_sps_bits = + sps.log2_max_pic_order_cnt_lsb_minus4 + 4; + reader.ConsumeBits(lt_ref_pic_poc_lsb_sps_bits); + // used_by_curr_pic_lt_sps_flag: u(1) + sps.used_by_curr_pic_lt_sps_flag[i] = reader.Read<bool>(); + } + } + + // sps_temporal_mvp_enabled_flag: u(1) + sps.sps_temporal_mvp_enabled_flag = reader.Read<bool>(); + + // Far enough! We don't use the rest of the SPS. + + sps.vps_id = sps_video_parameter_set_id; + + sps.pic_width_in_luma_samples = pic_width_in_luma_samples; + sps.pic_height_in_luma_samples = pic_height_in_luma_samples; + + // Start with the resolution determined by the pic_width/pic_height fields. + sps.width = pic_width_in_luma_samples; + sps.height = pic_height_in_luma_samples; + + if (conformance_window_flag) { + int sub_width_c = + ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) && + (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + int sub_height_c = + (1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) + ? 2 + : 1; + // the offset includes the pixel within conformance window. so don't need to + // +1 as per spec + sps.width -= sub_width_c * (conf_win_right_offset + conf_win_left_offset); + sps.height -= sub_height_c * (conf_win_top_offset + conf_win_bottom_offset); + } + + if (!reader.Ok()) { + return absl::nullopt; + } + + return OptionalSps(sps); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_sps_parser.h b/third_party/libwebrtc/common_video/h265/h265_sps_parser.h new file mode 100644 index 0000000000..854c0f29eb --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_sps_parser.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_SPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_SPS_PARSER_H_ + +#include <vector> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "rtc_base/bitstream_reader.h" + +namespace webrtc { + +// For explanations of each struct and its members, see H.265 specification +// at http://www.itu.int/rec/T-REC-H.265. +enum { + kMaxLongTermRefPicSets = 32, // 7.4.3.2.1 + kMaxShortTermRefPicSets = 64, // 7.4.3.2.1 + kMaxSubLayers = 7, // 7.4.3.1 & 7.4.3.2.1 [v|s]ps_max_sub_layers_minus1 + 1 +}; + +enum H265ProfileIdc { + kProfileIdcMain = 1, + kProfileIdcMain10 = 2, + kProfileIdcMainStill = 3, + kProfileIdcRangeExtensions = 4, + kProfileIdcHighThroughput = 5, + kProfileIdcMultiviewMain = 6, + kProfileIdcScalableMain = 7, + kProfileIdc3dMain = 8, + kProfileIdcScreenContentCoding = 9, + kProfileIdcScalableRangeExtensions = 10, + kProfileIdcHighThroughputScreenContentCoding = 11, +}; + +// A class for parsing out sequence parameter set (SPS) data from an H265 NALU. +class H265SpsParser { + public: + struct ProfileTierLevel { + ProfileTierLevel(); + // Syntax elements. + int general_profile_idc = 0; + int general_level_idc = 0; // 30x the actual level. + uint32_t general_profile_compatibility_flags = 0; + bool general_progressive_source_flag = false; + bool general_interlaced_source_flag = false; + bool general_non_packed_constraint_flag = false; + bool general_frame_only_constraint_flag = false; + bool general_one_picture_only_constraint_flag = false; + }; + + struct ShortTermRefPicSet { + ShortTermRefPicSet(); + + // Syntax elements. + uint32_t num_negative_pics = 0; + uint32_t num_positive_pics = 0; + uint32_t delta_poc_s0[kMaxShortTermRefPicSets] = {}; + uint32_t used_by_curr_pic_s0[kMaxShortTermRefPicSets] = {}; + uint32_t delta_poc_s1[kMaxShortTermRefPicSets] = {}; + uint32_t used_by_curr_pic_s1[kMaxShortTermRefPicSets] = {}; + + // Calculated fields. + uint32_t num_delta_pocs = 0; + }; + + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState() = default; + + uint32_t sps_max_sub_layers_minus1 = 0; + uint32_t chroma_format_idc = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t pic_width_in_luma_samples = 0; + uint32_t pic_height_in_luma_samples = 0; + uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0; + uint32_t sps_max_dec_pic_buffering_minus1[kMaxSubLayers] = {}; + uint32_t log2_min_luma_coding_block_size_minus3 = 0; + uint32_t log2_diff_max_min_luma_coding_block_size = 0; + uint32_t sample_adaptive_offset_enabled_flag = 0; + uint32_t num_short_term_ref_pic_sets = 0; + std::vector<H265SpsParser::ShortTermRefPicSet> short_term_ref_pic_set; + uint32_t long_term_ref_pics_present_flag = 0; + uint32_t num_long_term_ref_pics_sps = 0; + std::vector<uint32_t> used_by_curr_pic_lt_sps_flag; + uint32_t sps_temporal_mvp_enabled_flag = 0; + uint32_t width = 0; + uint32_t height = 0; + uint32_t sps_id = 0; + uint32_t vps_id = 0; + uint32_t pic_width_in_ctbs_y = 0; + uint32_t pic_height_in_ctbs_y = 0; + uint32_t bit_depth_luma_minus8 = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static absl::optional<SpsState> ParseSps(const uint8_t* data, size_t length); + + static bool ParseScalingListData(BitstreamReader& reader); + + static absl::optional<ShortTermRefPicSet> ParseShortTermRefPicSet( + uint32_t st_rps_idx, + uint32_t num_short_term_ref_pic_sets, + const std::vector<ShortTermRefPicSet>& ref_pic_sets, + uint32_t sps_max_dec_pic_buffering_minus1, + BitstreamReader& reader); + + static absl::optional<H265SpsParser::ProfileTierLevel> ParseProfileTierLevel( + bool profile_present, + int max_num_sub_layers_minus1, + BitstreamReader& reader); + + protected: + // Parse the SPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional<SpsState> ParseSpsInternal( + rtc::ArrayView<const uint8_t> buffer); + static bool ParseProfileTierLevel(BitstreamReader& reader, + uint32_t sps_max_sub_layers_minus1); + + // From Table A.8 - General tier and level limits. + static int GetMaxLumaPs(int general_level_idc); + // From A.4.2 - Profile-specific level limits for the video profiles. + static size_t GetDpbMaxPicBuf(int general_profile_idc); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_SPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_sps_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_sps_parser_unittest.cc new file mode 100644 index 0000000000..26af4b1170 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_sps_parser_unittest.cc @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_sps_parser.h" + +#include "common_video/h265/h265_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +static constexpr size_t kSpsBufferMaxSize = 256; + +// Generates a fake SPS with basically everything empty but the width/height, +// max_num_sublayer_minus1 and num_short_term_ref_pic_sets. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +// num_short_term_ref_pic_sets is set to 11 followed with 11 +// short_term_ref_pic_set data in this fake sps. +void WriteSps(uint16_t width, + uint16_t height, + int id, + uint32_t max_num_sublayer_minus1, + bool sub_layer_ordering_info_present_flag, + bool long_term_ref_pics_present_flag, + rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // sps_video_parameter_set_id + writer.WriteBits(0, 4); + // sps_max_sub_layers_minus1 + writer.WriteBits(max_num_sublayer_minus1, 3); + // sps_temporal_id_nesting_flag + writer.WriteBits(1, 1); + // profile_tier_level(profilePresentFlag=1, maxNumSublayersMinus1=0) + // profile-space=0, tier=0, profile-idc=1 + writer.WriteBits(0, 2); + writer.WriteBits(0, 1); + writer.WriteBits(1, 5); + // general_prfile_compatibility_flag[32] + writer.WriteBits(0, 32); + // general_progressive_source_flag + writer.WriteBits(1, 1); + // general_interlace_source_flag + writer.WriteBits(0, 1); + // general_non_packed_constraint_flag + writer.WriteBits(0, 1); + // general_frame_only_constraint_flag + writer.WriteBits(1, 1); + // general_reserved_zero_7bits + writer.WriteBits(0, 7); + // general_one_picture_only_flag + writer.WriteBits(0, 1); + // general_reserved_zero_35bits + writer.WriteBits(0, 35); + // general_inbld_flag + writer.WriteBits(0, 1); + // general_level_idc + writer.WriteBits(93, 8); + // if max_sub_layers_minus1 >=1, read the sublayer profile information + std::vector<uint32_t> sub_layer_profile_present_flags; + std::vector<uint32_t> sub_layer_level_present_flags; + for (uint32_t i = 0; i < max_num_sublayer_minus1; i++) { + // sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2) + writer.WriteBits(1, 1); + writer.WriteBits(1, 1); + sub_layer_profile_present_flags.push_back(1); + sub_layer_level_present_flags.push_back(1); + } + if (max_num_sublayer_minus1 > 0) { + for (uint32_t j = max_num_sublayer_minus1; j < 8; j++) { + // reserved 2 bits: u(2) + writer.WriteBits(0, 2); + } + } + for (uint32_t k = 0; k < max_num_sublayer_minus1; k++) { + if (sub_layer_profile_present_flags[k]) { // + // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8) + writer.WriteBits(0, 8); + // profile_compatability_flag: u(32) + writer.WriteBits(0, 32); + // sub_layer progressive_source_flag/interlaced_source_flag/ + // non_packed_constraint_flag/frame_only_constraint_flag: u(4) + writer.WriteBits(0, 4); + // following 43-bits are profile_idc specific. We simply read/skip it. + // u(43) + writer.WriteBits(0, 43); + // 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1) + writer.WriteBits(0, 1); + } + if (sub_layer_level_present_flags[k]) { + // sub_layer_level_idc: u(8) + writer.WriteBits(0, 8); + } + } + + // seq_parameter_set_id + writer.WriteExponentialGolomb(id); + // chroma_format_idc + writer.WriteExponentialGolomb(2); + if (width % 8 != 0 || height % 8 != 0) { + int width_delta = 8 - width % 8; + int height_delta = 8 - height % 8; + if (width_delta != 8) { + // pic_width_in_luma_samples + writer.WriteExponentialGolomb(width + width_delta); + } else { + writer.WriteExponentialGolomb(width); + } + if (height_delta != 8) { + // pic_height_in_luma_samples + writer.WriteExponentialGolomb(height + height_delta); + } else { + writer.WriteExponentialGolomb(height); + } + // conformance_window_flag + writer.WriteBits(1, 1); + // conf_win_left_offset + writer.WriteExponentialGolomb((width % 8) / 2); + // conf_win_right_offset + writer.WriteExponentialGolomb(0); + // conf_win_top_offset + writer.WriteExponentialGolomb(height_delta); + // conf_win_bottom_offset + writer.WriteExponentialGolomb(0); + } else { + // pic_width_in_luma_samples + writer.WriteExponentialGolomb(width); + // pic_height_in_luma_samples + writer.WriteExponentialGolomb(height); + // conformance_window_flag + writer.WriteBits(0, 1); + } + // bit_depth_luma_minus8 + writer.WriteExponentialGolomb(0); + // bit_depth_chroma_minus8 + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4 + writer.WriteExponentialGolomb(4); + // sps_sub_layer_ordering_info_present_flag + writer.WriteBits(sub_layer_ordering_info_present_flag, 1); + for (uint32_t i = (sub_layer_ordering_info_present_flag != 0) + ? 0 + : max_num_sublayer_minus1; + i <= max_num_sublayer_minus1; i++) { + // sps_max_dec_pic_buffering_minus1: ue(v) + writer.WriteExponentialGolomb(4); + // sps_max_num_reorder_pics: ue(v) + writer.WriteExponentialGolomb(3); + // sps_max_latency_increase_plus1: ue(v) + writer.WriteExponentialGolomb(0); + } + // log2_min_luma_coding_block_size_minus3 + writer.WriteExponentialGolomb(0); + // log2_diff_max_min_luma_coding_block_size + writer.WriteExponentialGolomb(3); + // log2_min_luma_transform_block_size_minus2 + writer.WriteExponentialGolomb(0); + // log2_diff_max_min_luma_transform_block_size + writer.WriteExponentialGolomb(3); + // max_transform_hierarchy_depth_inter + writer.WriteExponentialGolomb(0); + // max_transform_hierarchy_depth_intra + writer.WriteExponentialGolomb(0); + // scaling_list_enabled_flag + writer.WriteBits(0, 1); + // apm_enabled_flag + writer.WriteBits(0, 1); + // sample_adaptive_offset_enabled_flag + writer.WriteBits(1, 1); + // pcm_enabled_flag + writer.WriteBits(0, 1); + // num_short_term_ref_pic_sets + writer.WriteExponentialGolomb(11); + // short_term_ref_pic_set[0] + // num_negative_pics + writer.WriteExponentialGolomb(4); + // num_positive_pics + writer.WriteExponentialGolomb(0); + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(7); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + for (int i = 0; i < 2; i++) { + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(1); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + } + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(3); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + // short_term_ref_pic_set[1] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(3); + for (int i = 0; i < 2; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + for (int i = 0; i < 2; i++) { + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + // short_term_ref_pic_set[2] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[3] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(0); + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + for (int i = 0; i < 3; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[4] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(1, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + // short_term_ref_pic_set[5] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(1, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(2); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + // short_term_ref_pic_set[6] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(0); + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + for (int i = 0; i < 3; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[7] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(1, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 4; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // used_by_curr_pic_flag + writer.WriteBits(0, 1); + // use_delta_flag + writer.WriteBits(0, 1); + // short_term_ref_pic_set[8] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(0, 1); + // num_negative_pics + writer.WriteExponentialGolomb(1); + // num_positive_pics + writer.WriteExponentialGolomb(0); + // delta_poc_s0_minus1 + writer.WriteExponentialGolomb(7); + // used_by_curr_pic_s0_flag + writer.WriteBits(1, 1); + // short_term_ref_pic_set[9] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(3); + for (int i = 0; i < 2; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // short_term_ref_pic_set[10] + // inter_ref_pic_set_prediction_flag + writer.WriteBits(1, 1); + // delta_rps_sign + writer.WriteBits(0, 1); + // abs_delta_rps_minus1 + writer.WriteExponentialGolomb(1); + for (int i = 0; i < 3; i++) { + // used_by_curr_pic_flag + writer.WriteBits(1, 1); + } + // long_term_ref_pics_present_flag + writer.WriteBits(long_term_ref_pics_present_flag, 1); + if (long_term_ref_pics_present_flag) { + // num_long_term_ref_pics_sps + writer.WriteExponentialGolomb(1); + // lt_ref_pic_poc_lsb_sps + writer.WriteExponentialGolomb(1); + // used_by_curr_pic_lt_sps_flag + writer.WriteBits(1, 8); + } + // sps_temproal_mvp_enabled_flag + writer.WriteBits(1, 1); + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + out_buffer->Clear(); + H265::WriteRbsp(rbsp, byte_count, out_buffer); +} + +class H265SpsParserTest : public ::testing::Test { + public: + H265SpsParserTest() {} + ~H265SpsParserTest() override {} +}; + +TEST_F(H265SpsParserTest, TestSampleSPSHdLandscape) { + // SPS for a 1280x720 camera capture from ffmpeg on linux. Contains + // emulation bytes but no cropping. This buffer is generated + // with following command: + // 1) ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 1280x720 camera.h265 + // + // 2) Open camera.h265 and find the SPS, generally everything between the + // second and third start codes (0 0 0 1 or 0 0 1). The first two bytes should + // be 0x42 and 0x01, which should be stripped out before being passed to the + // parser. + const uint8_t buffer[] = {0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x5d, 0xb0, + 0x02, 0x80, 0x80, 0x2d, 0x16, 0x59, 0x59, 0xa4, + 0x93, 0x2b, 0x80, 0x40, 0x00, 0x00, 0x03, 0x00, + 0x40, 0x00, 0x00, 0x07, 0x82}; + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(1280u, sps->width); + EXPECT_EQ(720u, sps->height); +} + +TEST_F(H265SpsParserTest, TestSampleSPSVerticalCropLandscape) { + // SPS for a 640x260 camera captureH265SpsParser::ParseSps(buffer.data(), + // buffer.size()) from ffmpeg on Linux,. Contains emulation bytes and vertical + // cropping (crop from 640x264). The buffer is generated + // with following command: + // 1) Generate a video, from the camera: + // ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 640x264 camera.h265 + // + // 2) Crop the video to expected size(for example, 640x260 which will crop + // from 640x264): + // ffmpeg -i camera.h265 -filter:v crop=640:260:200:200 -c:v libx265 + // cropped.h265 + // + // 3) Open cropped.h265 and find the SPS, generally everything between the + // second and third start codes (0 0 0 1 or 0 0 1). The first two bytes should + // be 0x42 and 0x01, which should be stripped out before being passed to the + // parser. + const uint8_t buffer[] = {0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3f, 0xb0, + 0x05, 0x02, 0x01, 0x09, 0xf2, 0xe5, 0x95, 0x9a, + 0x49, 0x32, 0xb8, 0x04, 0x00, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x03, 0x00, 0x78, 0x20}; + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(640u, sps->width); + EXPECT_EQ(260u, sps->height); +} + +TEST_F(H265SpsParserTest, TestSampleSPSHorizontalAndVerticalCrop) { + // SPS for a 260x260 camera capture from ffmpeg on Linux. Contains emulation + // bytes. Horizontal and veritcal crop (Crop from 264x264). The buffer is + // generated with following command: + // 1) Generate a video, from the camera: + // ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 264x264 camera.h265 + // + // 2) Crop the video to expected size(for example, 260x260 which will crop + // from 264x264): + // ffmpeg -i camera.h265 -filter:v crop=260:260:200:200 -c:v libx265 + // cropped.h265 + // + // 3) Open cropped.h265 and find the SPS, generally everything between the + // second and third start codes (0 0 0 1 or 0 0 1). The first two bytes should + // be 0x42 and 0x01, which should be stripped out before being passed to the + // parser. + const uint8_t buffer[] = {0x01, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x3c, 0xb0, + 0x08, 0x48, 0x04, 0x27, 0x72, 0xe5, 0x95, 0x9a, + 0x49, 0x32, 0xb8, 0x04, 0x00, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x03, 0x00, 0x78, 0x20}; + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer, arraysize(buffer)); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(260u, sps->width); + EXPECT_EQ(260u, sps->height); +} + +TEST_F(H265SpsParserTest, TestSyntheticSPSQvgaLandscape) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); +} + +TEST_F(H265SpsParserTest, TestSyntheticSPSWeirdResolution) { + rtc::Buffer buffer; + WriteSps(156u, 122u, 2, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(156u, sps->width); + EXPECT_EQ(122u, sps->height); + EXPECT_EQ(2u, sps->sps_id); +} + +TEST_F(H265SpsParserTest, TestLog2MaxSubLayersMinus1) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); + EXPECT_EQ(0u, sps->sps_max_sub_layers_minus1); + + WriteSps(320u, 180u, 1, 6, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps1 = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps1.has_value()); + EXPECT_EQ(320u, sps1->width); + EXPECT_EQ(180u, sps1->height); + EXPECT_EQ(1u, sps1->sps_id); + EXPECT_EQ(6u, sps1->sps_max_sub_layers_minus1); + + WriteSps(320u, 180u, 1, 7, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> result = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + EXPECT_FALSE(result.has_value()); +} + +TEST_F(H265SpsParserTest, TestSubLayerOrderingInfoPresentFlag) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 6, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); + EXPECT_EQ(6u, sps->sps_max_sub_layers_minus1); + + WriteSps(320u, 180u, 1, 6, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps1 = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps1.has_value()); + EXPECT_EQ(320u, sps1->width); + EXPECT_EQ(180u, sps1->height); + EXPECT_EQ(1u, sps1->sps_id); + EXPECT_EQ(6u, sps1->sps_max_sub_layers_minus1); +} + +TEST_F(H265SpsParserTest, TestLongTermRefPicsPresentFlag) { + rtc::Buffer buffer; + WriteSps(320u, 180u, 1, 0, 1, 0, &buffer); + absl::optional<H265SpsParser::SpsState> sps = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps.has_value()); + EXPECT_EQ(320u, sps->width); + EXPECT_EQ(180u, sps->height); + EXPECT_EQ(1u, sps->sps_id); + EXPECT_EQ(0u, sps->long_term_ref_pics_present_flag); + + WriteSps(320u, 180u, 1, 6, 1, 1, &buffer); + absl::optional<H265SpsParser::SpsState> sps1 = + H265SpsParser::ParseSps(buffer.data(), buffer.size()); + ASSERT_TRUE(sps1.has_value()); + EXPECT_EQ(320u, sps1->width); + EXPECT_EQ(180u, sps1->height); + EXPECT_EQ(1u, sps1->sps_id); + EXPECT_EQ(1u, sps1->long_term_ref_pics_present_flag); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_vps_parser.cc b/third_party/libwebrtc/common_video/h265/h265_vps_parser.cc new file mode 100644 index 0000000000..16b967dad4 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_vps_parser.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_vps_parser.h" + +#include "common_video/h265/h265_common.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/bitstream_reader.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +H265VpsParser::VpsState::VpsState() = default; + +// General note: this is based off the 08/2021 version of the H.265 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.265 + +// Unpack RBSP and parse VPS state from the supplied buffer. +absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseVps( + const uint8_t* data, + size_t length) { + RTC_DCHECK(data); + return ParseInternal(H265::ParseRbsp(data, length)); +} + +absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseInternal( + rtc::ArrayView<const uint8_t> buffer) { + BitstreamReader reader(buffer); + + // Now, we need to use a bit buffer to parse through the actual H265 VPS + // format. See Section 7.3.2.1 ("Video parameter set RBSP syntax") of the + // H.265 standard for a complete description. + VpsState vps; + + // vps_video_parameter_set_id: u(4) + vps.id = reader.ReadBits(4); + + if (!reader.Ok()) { + return absl::nullopt; + } + + return vps; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/common_video/h265/h265_vps_parser.h b/third_party/libwebrtc/common_video/h265/h265_vps_parser.h new file mode 100644 index 0000000000..e391d47401 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_vps_parser.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_VIDEO_H265_H265_VPS_PARSER_H_ +#define COMMON_VIDEO_H265_H265_VPS_PARSER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace webrtc { + +// A class for parsing out video parameter set (VPS) data from an H265 NALU. +class H265VpsParser { + public: + // The parsed state of the VPS. Only some select values are stored. + // Add more as they are actually needed. + struct VpsState { + VpsState(); + + uint32_t id = 0; + }; + + // Unpack RBSP and parse VPS state from the supplied buffer. + static absl::optional<VpsState> ParseVps(const uint8_t* data, size_t length); + + protected: + // Parse the VPS state, for a bit buffer where RBSP decoding has already been + // performed. + static absl::optional<VpsState> ParseInternal( + rtc::ArrayView<const uint8_t> buffer); +}; + +} // namespace webrtc +#endif // COMMON_VIDEO_H265_H265_VPS_PARSER_H_ diff --git a/third_party/libwebrtc/common_video/h265/h265_vps_parser_unittest.cc b/third_party/libwebrtc/common_video/h265/h265_vps_parser_unittest.cc new file mode 100644 index 0000000000..24e8a77154 --- /dev/null +++ b/third_party/libwebrtc/common_video/h265/h265_vps_parser_unittest.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_video/h265/h265_vps_parser.h" + +#include "common_video/h265/h265_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/bit_buffer.h" +#include "rtc_base/buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +// Example VPS can be generated with ffmpeg. Here's an example set of commands, +// runnable on Linux: +// 1) Generate a video, from the camera: +// ffmpeg -i /dev/video0 -r 30 -c:v libx265 -s 1280x720 camera.h265 +// +// 2) Open camera.h265 and find the VPS, generally everything between the first +// and second start codes (0 0 0 1 or 0 0 1). The first two bytes should be 0x40 +// and 0x01, which should be stripped out before being passed to the parser. + +class H265VpsParserTest : public ::testing::Test { + public: + H265VpsParserTest() {} + ~H265VpsParserTest() override {} + + absl::optional<H265VpsParser::VpsState> vps_; +}; + +TEST_F(H265VpsParserTest, TestSampleVPSId) { + // VPS id 1 + const uint8_t buffer[] = { + 0x1c, 0x01, 0xff, 0xff, 0x04, 0x08, 0x00, 0x00, 0x03, 0x00, 0x9d, + 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, 0x95, 0x98, 0x09, + }; + EXPECT_TRUE(static_cast<bool>( + vps_ = H265VpsParser::ParseVps(buffer, arraysize(buffer)))); + EXPECT_EQ(1u, vps_->id); +} + +} // namespace webrtc |