summaryrefslogtreecommitdiffstats
path: root/dom/media/platforms/agnostic/bytestreams/H264.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/platforms/agnostic/bytestreams/H264.h')
-rw-r--r--dom/media/platforms/agnostic/bytestreams/H264.h525
1 files changed, 525 insertions, 0 deletions
diff --git a/dom/media/platforms/agnostic/bytestreams/H264.h b/dom/media/platforms/agnostic/bytestreams/H264.h
new file mode 100644
index 0000000000..37c0e5bc14
--- /dev/null
+++ b/dom/media/platforms/agnostic/bytestreams/H264.h
@@ -0,0 +1,525 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MP4_DEMUXER_H264_H_
+#define MP4_DEMUXER_H264_H_
+
+#include "DecoderData.h"
+#include "mozilla/gfx/Types.h"
+
+namespace mozilla {
+class BitReader;
+
+// Spec 7.4.2.1
+#define MAX_SPS_COUNT 32
+#define MAX_PPS_COUNT 256
+
+// NAL unit types
+enum NAL_TYPES {
+ H264_NAL_SLICE = 1,
+ H264_NAL_DPA = 2,
+ H264_NAL_DPB = 3,
+ H264_NAL_DPC = 4,
+ H264_NAL_IDR_SLICE = 5,
+ H264_NAL_SEI = 6,
+ H264_NAL_SPS = 7,
+ H264_NAL_PPS = 8,
+ H264_NAL_AUD = 9,
+ H264_NAL_END_SEQUENCE = 10,
+ H264_NAL_END_STREAM = 11,
+ H264_NAL_FILLER_DATA = 12,
+ H264_NAL_SPS_EXT = 13,
+ H264_NAL_PREFIX = 14,
+ H264_NAL_AUXILIARY_SLICE = 19,
+ H264_NAL_SLICE_EXT = 20,
+ H264_NAL_SLICE_EXT_DVC = 21,
+};
+
+// According to ITU-T Rec H.264 (2017/04) Table 7.6.
+enum SLICE_TYPES {
+ P_SLICE = 0,
+ B_SLICE = 1,
+ I_SLICE = 2,
+ SP_SLICE = 3,
+ SI_SLICE = 4,
+};
+
+struct SPSData {
+ bool operator==(const SPSData& aOther) const;
+ bool operator!=(const SPSData& aOther) const;
+
+ gfx::YUVColorSpace ColorSpace() const;
+ gfx::ColorDepth ColorDepth() const;
+
+ bool valid;
+
+ /* Decoded Members */
+ /*
+ pic_width is the decoded width according to:
+ pic_width = ((pic_width_in_mbs_minus1 + 1) * 16)
+ - (frame_crop_left_offset + frame_crop_right_offset) * 2
+ */
+ uint32_t pic_width;
+ /*
+ pic_height is the decoded height according to:
+ pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 +
+ 1) * 16)
+ - (frame_crop_top_offset + frame_crop_bottom_offset) * 2
+ */
+ uint32_t pic_height;
+
+ bool interlaced;
+
+ /*
+ Displayed size.
+ display_width and display_height are adjusted according to the display
+ sample aspect ratio.
+ */
+ uint32_t display_width;
+ uint32_t display_height;
+
+ float sample_ratio;
+
+ uint32_t crop_left;
+ uint32_t crop_right;
+ uint32_t crop_top;
+ uint32_t crop_bottom;
+
+ /*
+ H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en)
+ http://www.itu.int/rec/T-REC-H.264-201402-I/en
+ */
+
+ bool constraint_set0_flag;
+ bool constraint_set1_flag;
+ bool constraint_set2_flag;
+ bool constraint_set3_flag;
+ bool constraint_set4_flag;
+ bool constraint_set5_flag;
+
+ /*
+ profile_idc and level_idc indicate the profile and level to which the coded
+ video sequence conforms when the SVC sequence parameter set is the active
+ SVC sequence parameter set.
+ */
+ uint8_t profile_idc;
+ uint8_t level_idc;
+
+ /*
+ seq_parameter_set_id identifies the sequence parameter set that is referred
+ to by the picture parameter set. The value of seq_parameter_set_id shall be
+ in the range of 0 to 31, inclusive.
+ */
+ uint8_t seq_parameter_set_id;
+
+ /*
+ chroma_format_idc specifies the chroma sampling relative to the luma
+ sampling as specified in clause 6.2. The value of chroma_format_idc shall be
+ in the range of 0 to 3, inclusive. When chroma_format_idc is not present,
+ it shall be inferred to be equal to 1 (4:2:0 chroma format).
+ When profile_idc is equal to 183, chroma_format_idc shall be equal to 0
+ (4:0:0 chroma format).
+ */
+ uint8_t chroma_format_idc;
+
+ /*
+ bit_depth_luma_minus8 specifies the bit depth of the samples of the luma
+ array and the value of the luma quantisation parameter range offset
+ QpBdOffset Y , as specified by
+ BitDepth Y = 8 + bit_depth_luma_minus8 (7-3)
+ QpBdOffset Y = 6 * bit_depth_luma_minus8 (7-4)
+ When bit_depth_luma_minus8 is not present, it shall be inferred to be equal
+ to 0. bit_depth_luma_minus8 shall be in the range of 0 to 6, inclusive.
+ */
+ uint8_t bit_depth_luma_minus8;
+
+ /*
+ bit_depth_chroma_minus8 specifies the bit depth of the samples of the chroma
+ arrays and the value of the chroma quantisation parameter range offset
+ QpBdOffset C , as specified by
+ BitDepth C = 8 + bit_depth_chroma_minus8 (7-5)
+ QpBdOffset C = 6 * bit_depth_chroma_minus8 (7-6)
+ When bit_depth_chroma_minus8 is not present, it shall be inferred to be
+ equal to 0. bit_depth_chroma_minus8 shall be in the range of 0 to 6,
+ inclusive.
+ */
+ uint8_t bit_depth_chroma_minus8;
+
+ /*
+ separate_colour_plane_flag equal to 1 specifies that the three colour
+ components of the 4:4:4 chroma format are coded separately.
+ separate_colour_plane_flag equal to 0 specifies that the colour components
+ are not coded separately. When separate_colour_plane_flag is not present,
+ it shall be inferred to be equal to 0. When separate_colour_plane_flag is
+ equal to 1, the primary coded picture consists of three separate components,
+ each of which consists of coded samples of one colour plane (Y, Cb or Cr)
+ that each use the monochrome coding syntax. In this case, each colour plane
+ is associated with a specific colour_plane_id value.
+ */
+ bool separate_colour_plane_flag;
+
+ /*
+ seq_scaling_matrix_present_flag equal to 1 specifies that the flags
+ seq_scaling_list_present_flag[ i ] for i = 0..7 or
+ i = 0..11 are present. seq_scaling_matrix_present_flag equal to 0 specifies
+ that these flags are not present and the sequence-level scaling list
+ specified by Flat_4x4_16 shall be inferred for i = 0..5 and the
+ sequence-level scaling list specified by Flat_8x8_16 shall be inferred for
+ i = 6..11. When seq_scaling_matrix_present_flag is not present, it shall be
+ inferred to be equal to 0.
+ */
+ bool seq_scaling_matrix_present_flag;
+
+ /*
+ log2_max_frame_num_minus4 specifies the value of the variable
+ MaxFrameNum that is used in frame_num related derivations as
+ follows:
+
+ MaxFrameNum = 2( log2_max_frame_num_minus4 + 4 ). The value of
+ log2_max_frame_num_minus4 shall be in the range of 0 to 12, inclusive.
+ */
+ uint8_t log2_max_frame_num;
+
+ /*
+ pic_order_cnt_type specifies the method to decode picture order
+ count (as specified in subclause 8.2.1). The value of
+ pic_order_cnt_type shall be in the range of 0 to 2, inclusive.
+ */
+ uint8_t pic_order_cnt_type;
+
+ /*
+ log2_max_pic_order_cnt_lsb_minus4 specifies the value of the
+ variable MaxPicOrderCntLsb that is used in the decoding
+ process for picture order count as specified in subclause
+ 8.2.1 as follows:
+
+ MaxPicOrderCntLsb = 2( log2_max_pic_order_cnt_lsb_minus4 + 4 )
+
+ The value of log2_max_pic_order_cnt_lsb_minus4 shall be in
+ the range of 0 to 12, inclusive.
+ */
+ uint8_t log2_max_pic_order_cnt_lsb;
+
+ /*
+ delta_pic_order_always_zero_flag equal to 1 specifies that
+ delta_pic_order_cnt[ 0 ] and delta_pic_order_cnt[ 1 ] are
+ not present in the slice headers of the sequence and shall
+ be inferred to be equal to 0.
+ */
+ bool delta_pic_order_always_zero_flag;
+
+ /*
+ offset_for_non_ref_pic is used to calculate the picture
+ order count of a non-reference picture as specified in
+ 8.2.1. The value of offset_for_non_ref_pic shall be in the
+ range of -231 to 231 - 1, inclusive.
+ */
+ int8_t offset_for_non_ref_pic;
+
+ /*
+ offset_for_top_to_bottom_field is used to calculate the
+ picture order count of a bottom field as specified in
+ subclause 8.2.1. The value of offset_for_top_to_bottom_field
+ shall be in the range of -231 to 231 - 1, inclusive.
+ */
+ int8_t offset_for_top_to_bottom_field;
+
+ /*
+ max_num_ref_frames specifies the maximum number of short-term and
+ long-term reference frames, complementary reference field pairs,
+ and non-paired reference fields that may be used by the decoding
+ process for inter prediction of any picture in the
+ sequence. max_num_ref_frames also determines the size of the sliding
+ window operation as specified in subclause 8.2.5.3. The value of
+ max_num_ref_frames shall be in the range of 0 to MaxDpbFrames (as
+ specified in subclause A.3.1 or A.3.2), inclusive.
+ */
+ uint32_t max_num_ref_frames;
+
+ /*
+ gaps_in_frame_num_value_allowed_flag specifies the allowed
+ values of frame_num as specified in subclause 7.4.3 and the
+ decoding process in case of an inferred gap between values of
+ frame_num as specified in subclause 8.2.5.2.
+ */
+ bool gaps_in_frame_num_allowed_flag;
+
+ /*
+ pic_width_in_mbs_minus1 plus 1 specifies the width of each
+ decoded picture in units of macroblocks. 16 macroblocks in a row
+ */
+ uint32_t pic_width_in_mbs;
+
+ /*
+ pic_height_in_map_units_minus1 plus 1 specifies the height in
+ slice group map units of a decoded frame or field. 16
+ macroblocks in each column.
+ */
+ uint32_t pic_height_in_map_units;
+
+ /*
+ frame_mbs_only_flag equal to 0 specifies that coded pictures of
+ the coded video sequence may either be coded fields or coded
+ frames. frame_mbs_only_flag equal to 1 specifies that every
+ coded picture of the coded video sequence is a coded frame
+ containing only frame macroblocks.
+ */
+ bool frame_mbs_only_flag;
+
+ /*
+ mb_adaptive_frame_field_flag equal to 0 specifies no
+ switching between frame and field macroblocks within a
+ picture. mb_adaptive_frame_field_flag equal to 1 specifies
+ the possible use of switching between frame and field
+ macroblocks within frames. When mb_adaptive_frame_field_flag
+ is not present, it shall be inferred to be equal to 0.
+ */
+ bool mb_adaptive_frame_field_flag;
+
+ /*
+ direct_8x8_inference_flag specifies the method used in the derivation
+ process for luma motion vectors for B_Skip, B_Direct_16x16 and B_Direct_8x8
+ as specified in clause 8.4.1.2. When frame_mbs_only_flag is equal to 0,
+ direct_8x8_inference_flag shall be equal to 1.
+ */
+ bool direct_8x8_inference_flag;
+
+ /*
+ frame_cropping_flag equal to 1 specifies that the frame cropping
+ offset parameters follow next in the sequence parameter
+ set. frame_cropping_flag equal to 0 specifies that the frame
+ cropping offset parameters are not present.
+ */
+ bool frame_cropping_flag;
+ uint32_t frame_crop_left_offset;
+ uint32_t frame_crop_right_offset;
+ uint32_t frame_crop_top_offset;
+ uint32_t frame_crop_bottom_offset;
+
+ // VUI Parameters
+
+ /*
+ vui_parameters_present_flag equal to 1 specifies that the
+ vui_parameters( ) syntax structure as specified in Annex E is
+ present. vui_parameters_present_flag equal to 0 specifies that
+ the vui_parameters( ) syntax structure as specified in Annex E
+ is not present.
+ */
+ bool vui_parameters_present_flag;
+
+ /*
+ aspect_ratio_info_present_flag equal to 1 specifies that
+ aspect_ratio_idc is present. aspect_ratio_info_present_flag
+ equal to 0 specifies that aspect_ratio_idc is not present.
+ */
+ bool aspect_ratio_info_present_flag;
+
+ /*
+ aspect_ratio_idc specifies the value of the sample aspect
+ ratio of the luma samples. Table E-1 shows the meaning of
+ the code. When aspect_ratio_idc indicates Extended_SAR, the
+ sample aspect ratio is represented by sar_width and
+ sar_height. When the aspect_ratio_idc syntax element is not
+ present, aspect_ratio_idc value shall be inferred to be
+ equal to 0.
+ */
+ uint8_t aspect_ratio_idc;
+ uint32_t sar_width;
+ uint32_t sar_height;
+
+ /*
+ video_signal_type_present_flag equal to 1 specifies that video_format,
+ video_full_range_flag and colour_description_present_flag are present.
+ video_signal_type_present_flag equal to 0, specify that video_format,
+ video_full_range_flag and colour_description_present_flag are not present.
+ */
+ bool video_signal_type_present_flag;
+
+ /*
+ overscan_info_present_flag equal to1 specifies that the
+ overscan_appropriate_flag is present. When overscan_info_present_flag is
+ equal to 0 or is not present, the preferred display method for the video
+ signal is unspecified (Unspecified).
+ */
+ bool overscan_info_present_flag;
+ /*
+ overscan_appropriate_flag equal to 1 indicates that the cropped decoded
+ pictures output are suitable for display using overscan.
+ overscan_appropriate_flag equal to 0 indicates that the cropped decoded
+ pictures output contain visually important information in the entire region
+ out to the edges of the cropping rectangle of the picture
+ */
+ bool overscan_appropriate_flag;
+
+ /*
+ video_format indicates the representation of the pictures as specified in
+ Table E-2, before being coded in accordance with this
+ Recommendation | International Standard. When the video_format syntax
+ element is not present, video_format value shall be inferred to be equal
+ to 5. (Unspecified video format)
+ */
+ uint8_t video_format;
+
+ /*
+ video_full_range_flag indicates the black level and range of the luma and
+ chroma signals as derived from E′Y, E′PB, and E′PR or E′R, E′G, and E′B
+ real-valued component signals.
+ When the video_full_range_flag syntax element is not present, the value of
+ video_full_range_flag shall be inferred to be equal to 0.
+ */
+ bool video_full_range_flag;
+
+ /*
+ colour_description_present_flag equal to1 specifies that colour_primaries,
+ transfer_characteristics and matrix_coefficients are present.
+ colour_description_present_flag equal to 0 specifies that colour_primaries,
+ transfer_characteristics and matrix_coefficients are not present.
+ */
+ bool colour_description_present_flag;
+
+ /*
+ colour_primaries indicates the chromaticity coordinates of the source
+ primaries as specified in Table E-3 in terms of the CIE 1931 definition of
+ x and y as specified by ISO 11664-1.
+ When the colour_primaries syntax element is not present, the value of
+ colour_primaries shall be inferred to be equal to 2 (the chromaticity is
+ unspecified or is determined by the application).
+ */
+ uint8_t colour_primaries;
+
+ /*
+ transfer_characteristics indicates the opto-electronic transfer
+ characteristic of the source picture as specified in Table E-4 as a function
+ of a linear optical intensity input Lc with a nominal real-valued range of 0
+ to 1.
+ When the transfer_characteristics syntax element is not present, the value
+ of transfer_characteristics shall be inferred to be equal to 2
+ (the transfer characteristics are unspecified or are determined by the
+ application).
+ */
+ uint8_t transfer_characteristics;
+
+ uint8_t matrix_coefficients;
+ bool chroma_loc_info_present_flag;
+ /*
+ The value of chroma_sample_loc_type_top_field and
+ chroma_sample_loc_type_bottom_field shall be in the range of 0 to 5,
+ inclusive
+ */
+ uint8_t chroma_sample_loc_type_top_field;
+ uint8_t chroma_sample_loc_type_bottom_field;
+
+ bool scaling_matrix_present;
+ uint8_t scaling_matrix4x4[6][16];
+ uint8_t scaling_matrix8x8[6][64];
+
+ SPSData();
+};
+
+struct SEIRecoveryData {
+ /*
+ recovery_frame_cnt specifies the recovery point of output pictures in output
+ order. All decoded pictures in output order are indicated to be correct or
+ approximately correct in content starting at the output order position of
+ the reference picture having the frame_num equal to the frame_num of the VCL
+ NAL units for the current access unit incremented by recovery_frame_cnt in
+ modulo MaxFrameNum arithmetic. recovery_frame_cnt shall be in the range of 0
+ to MaxFrameNum − 1, inclusive.
+ */
+ uint32_t recovery_frame_cnt = 0;
+ /*
+ exact_match_flag indicates whether decoded pictures at and subsequent to the
+ specified recovery point in output order derived by starting the decoding
+ process at the access unit associated with the recovery point SEI message
+ shall be an exact match to the pictures that would be produced by starting
+ the decoding process at the location of a previous IDR access unit in the
+ NAL unit stream. The value 0 indicates that the match need not be exact and
+ the value 1 indicates that the match shall be exact.
+ */
+ bool exact_match_flag = false;
+ /*
+ broken_link_flag indicates the presence or absence of a broken link in the
+ NAL unit stream at the location of the recovery point SEI message */
+ bool broken_link_flag = false;
+ /*
+ changing_slice_group_idc equal to 0 indicates that decoded pictures are
+ correct or approximately correct in content at and subsequent to the
+ recovery point in output order when all macroblocks of the primary coded
+ pictures are decoded within the changing slice group period
+ */
+ uint8_t changing_slice_group_idc = 0;
+};
+
+class H264 {
+ public:
+ /* Check if out of band extradata contains a SPS NAL */
+ static bool HasSPS(const mozilla::MediaByteBuffer* aExtraData);
+ // Extract SPS and PPS NALs from aSample by looking into each NALs.
+ // aSample must be in AVCC format.
+ static already_AddRefed<mozilla::MediaByteBuffer> ExtractExtraData(
+ const mozilla::MediaRawData* aSample);
+ // Return true if both extradata are equal.
+ static bool CompareExtraData(const mozilla::MediaByteBuffer* aExtraData1,
+ const mozilla::MediaByteBuffer* aExtraData2);
+
+ // Ensure that SPS data makes sense, Return true if SPS data was, and false
+ // otherwise. If false, then content will be adjusted accordingly.
+ static bool EnsureSPSIsSane(SPSData& aSPS);
+
+ static bool DecodeSPSFromExtraData(const mozilla::MediaByteBuffer* aExtraData,
+ SPSData& aDest);
+ /* Decode SPS NAL RBSP and fill SPSData structure */
+ static bool DecodeSPS(const mozilla::MediaByteBuffer* aSPS, SPSData& aDest);
+
+ // If the given aExtraData is valid, return the aExtraData.max_num_ref_frames
+ // clamped to be in the range of [4, 16]; otherwise return 4.
+ static uint32_t ComputeMaxRefFrames(
+ const mozilla::MediaByteBuffer* aExtraData);
+
+ enum class FrameType {
+ I_FRAME,
+ OTHER,
+ INVALID,
+ };
+
+ // Returns the frame type. Returns I_FRAME if the sample is an IDR
+ // (Instantaneous Decoding Refresh) Picture.
+ static FrameType GetFrameType(const mozilla::MediaRawData* aSample);
+ // Create a dummy extradata, useful to create a decoder and test the
+ // capabilities of the decoder.
+ static already_AddRefed<mozilla::MediaByteBuffer> CreateExtraData(
+ uint8_t aProfile, uint8_t aConstraints, uint8_t aLevel,
+ const gfx::IntSize& aSize);
+ static void WriteExtraData(mozilla::MediaByteBuffer* aDestExtraData,
+ const uint8_t aProfile, const uint8_t aConstraints,
+ const uint8_t aLevel,
+ const Span<const uint8_t> aSPS,
+ const Span<const uint8_t> aPPS);
+
+ private:
+ friend class SPSNAL;
+ /* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content.
+ Returns nullptr if invalid content.
+ This is compliant to ITU H.264 7.3.1 Syntax in tabular form NAL unit syntax
+ */
+ static already_AddRefed<mozilla::MediaByteBuffer> DecodeNALUnit(
+ const uint8_t* aNAL, size_t aLength);
+ static already_AddRefed<mozilla::MediaByteBuffer> EncodeNALUnit(
+ const uint8_t* aNAL, size_t aLength);
+ static bool vui_parameters(mozilla::BitReader& aBr, SPSData& aDest);
+ // Read HRD parameters, all data is ignored.
+ static void hrd_parameters(mozilla::BitReader& aBr);
+ static uint8_t NumSPS(const mozilla::MediaByteBuffer* aExtraData);
+ // Decode SEI payload and return true if the SEI NAL indicates a recovery
+ // point.
+ static bool DecodeRecoverySEI(const mozilla::MediaByteBuffer* aSEI,
+ SEIRecoveryData& aDest);
+ // Decode NAL Slice payload and return true if its slice type is I slice or SI
+ // slice.
+ static bool DecodeISlice(const mozilla::MediaByteBuffer* aSlice);
+};
+
+} // namespace mozilla
+
+#endif // MP4_DEMUXER_H264_H_