/* * Copyright 2016 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * NOTES: * All the following structs and classes definded base on the spec of H264, * you can see H.264 specification at http://www.itu.int/rec/T-REC-H.264. * And in this header file, there are two categories of code-styles for variables. * Some of them looks like "NalUnit::nal_ref_idc" which separated by underline, * that is to say, these variables defined in H264 spec and assigned by reading bits * from video bits stream directly, on my purpose, it is helpful for you to find where * they are in spec quickly. Some of variables defined use CamelCase because these * variables assigned by other variables. */ #ifndef h264parser_h #define h264parser_h #include "nalReader.h" #include "VideoCommonDefs.h" #include #include namespace YamiParser { namespace H264 { #define MAX_SPS_ID 31 #define MAX_PPS_ID 255 #define MAX_IDR_PIC_ID 65535 //get the coding type of the slice according to Table 7-6 #define IS_P_SLICE(slice_type) ((slice_type) % 5 == 0) #define IS_B_SLICE(slice_type) ((slice_type) % 5 == 1) #define IS_I_SLICE(slice_type) ((slice_type) % 5 == 2) #define IS_SP_SLICE(slice_type) ((slice_type) % 5 == 3) #define IS_SI_SLICE(slice_type) ((slice_type) % 5 == 4) enum SliceGroupMapType { SLICE_GROUP_INTERLEAVED, SLICE_GROUP_DISPERSED_MAPPING, SLIEC_GROUP_FOREGROUND_LEFTOVER, //3, 4, 5 specify changing slice groups. when num_slice_groups_numus //is not equal to 1, slice_group_map_type shall not be equal to 3, 4, or 5 SLICE_GROUP_CHANGING3, SLICE_GROUP_CHANGING4, SLICE_GROUP_CHANGING5, SLICE_GROUP_ASSIGNMENT }; enum Profile { PROFILE_CAVLC_444_INTRA = 44, //A.2.11 PROFILE_BASELINE = 66, //A.2.1 PROFILE_MAIN = 77, //A.2.2 PROFILE_SCALABLE_BASELINE = 83, //G.10.1.1 PROFILE_SCALABLE_HIGH = 86, //G.10.1.2 PROFILE_EXTENDED = 88, //A.2.3 PROFILE_HIGH = 100, //A.2.4 PROFILE_HIGH_10 = 110, //A.2.5 PROFILE_MULTIVIEW_HIGH = 118, //H.10.1.1 PROFILE_HIGH_422 = 122, //A.2.6 PROFILE_STEREO_HIGH = 128, //H.10.1.2 PROFILE_MULTIVIEW_DEPTH_HIGH = 138, //I.10.1.1 PROFILE_HIGH_444 = 244, //A.2.7 }; //according 8.5.6 static const uint8_t zigzag_scans_4x4[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; static const uint8_t zigzag_scans_8x8[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 }; #define transform_coefficients_for_frame_macroblocks(dest, src, len, mode) \ { \ if ((dest) != (src)) { \ for (uint32_t l = 0; l < (len); l++) \ (dest)[zigzag_scans_##mode[l]] = (src)[l]; \ } \ } //according to Table 7-1 enum NalUnitType { NAL_UNSPECIFIED, //unspecified NAL_SLICE_NONIDR, //coded slice of a non-IDR picture NAL_SLICE_DPA, //coded slice data partiiton A NAL_SLICE_DPB, //coded slice data partition B NAL_SLICE_DPC, //coded slice data partition C NAL_SLICE_IDR, //coded slice of an IDR picture NAL_SEI, //supplemental enhancement information (SEI) NAL_SPS, //sequence parameter set NAL_PPS, //picture parameter set NAL_AU_DELIMITER, //access unit delimiter NAL_SEQ_END, //end of sequence NAL_STREAM_END, //end of stream NAL_FILLER_DATA, //filler data NAL_SPS_EXT, //sequence parameter set extension NAL_PREFIX_UNIT, //prefix NAL unit NAL_SUBSET_SPS, //subset sequence parameter set //16 -18 reserved NAL_SLICE_AUX = 19, //coded slice of an auxiliary coded picture without partitioning NAL_SLICE_EXT, //coded slice extension NAL_SLICE_EXT_DEPV //coded slice extension for depth view components //22 & 23 reserved, 24 - 31 unspecified }; struct NaluHeadMvcExt { bool non_idr_flag; uint8_t priority_id; uint16_t view_id; uint8_t temporal_id; bool anchor_pic_flag; bool inter_view_flag; }; struct NaluHeadSvcExt { bool idr_flag; uint8_t priority_id; bool no_inter_layer_pred_flag; uint8_t dependency_id; uint8_t quality_id; uint8_t temporal_id; bool use_ref_base_pic_flag; bool discardable_flag; bool output_flag; uint8_t reserved_three_2bits; }; class Parser; class NalUnit { public: //the min size of a valid nal unit enum { NAL_UNIT_SEQUENCE_SIZE = 4 }; /* nal should be a complete nal unit buffer without start code or length bytes */ bool parseNalUnit(const uint8_t* nal, size_t size); public: const uint8_t* m_data; uint32_t m_size; uint16_t nal_ref_idc; uint16_t nal_unit_type; //calc value, used by other syntax structs bool m_idrPicFlag; uint8_t m_nalUnitHeaderBytes; NaluHeadMvcExt m_mvc; NaluHeadSvcExt m_svc; private: bool parseSvcExtension(BitReader& br); bool parseMvcExtension(BitReader& br); }; struct HRDParameters { uint8_t cpb_cnt_minus1; uint8_t bit_rate_scale; uint8_t cpb_size_scale; uint32_t bit_rate_value_minus1[32]; uint32_t cpb_size_value_minus1[32]; bool cbr_flag[32]; uint8_t initial_cpb_removal_delay_length_minus1; uint8_t cpb_removal_delay_length_minus1; uint8_t dpb_output_delay_length_minus1; uint8_t time_offset_length; }; struct VUIParameters { bool aspect_ratio_info_present_flag; uint8_t aspect_ratio_idc; uint16_t sar_width; uint16_t sar_height; bool overscan_info_present_flag; bool overscan_appropriate_flag; bool video_signal_type_present_flag; uint8_t video_format; bool video_full_range_flag; bool colour_description_present_flag; uint8_t colour_primaries; uint8_t transfer_characteristics; uint8_t matrix_coefficients; bool chroma_loc_info_present_flag; uint8_t chroma_sample_loc_type_top_field; uint8_t chroma_sample_loc_type_bottom_field; bool timing_info_present_flag; uint32_t num_units_in_tick; uint32_t time_scale; bool fixed_frame_rate_flag; bool nal_hrd_parameters_present_flag; HRDParameters nal_hrd_parameters; bool vcl_hrd_parameters_present_flag; HRDParameters vcl_hrd_parameters; bool low_delay_hrd_flag; bool pic_struct_present_flag; bool bitstream_restriction_flag; bool motion_vectors_over_pic_boundaries_flag; uint32_t max_bytes_per_pic_denom; uint32_t max_bits_per_mb_denom; uint32_t log2_max_mv_length_horizontal; uint32_t log2_max_mv_length_vertical; uint32_t max_num_reorder_frames; uint32_t max_dec_frame_buffering; }; struct SPS { uint8_t profile_idc; bool constraint_set0_flag; bool constraint_set1_flag; bool constraint_set2_flag; bool constraint_set3_flag; bool constraint_set4_flag; bool constraint_set5_flag; uint8_t level_idc; uint32_t sps_id; //seq_parameter_set_id uint8_t chroma_format_idc; bool separate_colour_plane_flag; uint8_t bit_depth_luma_minus8; uint8_t bit_depth_chroma_minus8; bool qpprime_y_zero_transform_bypass_flag; bool seq_scaling_matrix_present_flag; bool seq_scaling_list_present_flag[12]; uint8_t scaling_lists_4x4[6][16]; uint8_t scaling_lists_8x8[6][64]; uint8_t log2_max_frame_num_minus4; uint8_t pic_order_cnt_type; uint8_t log2_max_pic_order_cnt_lsb_minus4; bool delta_pic_order_always_zero_flag; int32_t offset_for_non_ref_pic; int32_t offset_for_top_to_bottom_field; uint8_t num_ref_frames_in_pic_order_cnt_cycle; int32_t offset_for_ref_frame[255]; uint32_t num_ref_frames; bool gaps_in_frame_num_value_allowed_flag; uint32_t pic_width_in_mbs_minus1; uint32_t pic_height_in_map_units_minus1; bool frame_mbs_only_flag; bool mb_adaptive_frame_field_flag; bool direct_8x8_inference_flag; bool frame_cropping_flag; uint32_t frame_crop_left_offset; uint32_t frame_crop_right_offset; uint32_t frame_crop_top_offset; uint32_t frame_crop_bottom_offset; bool vui_parameters_present_flag; VUIParameters m_vui; //Because these variables calced from other variables instead of //reading from bits stream, so using different style and spec do like this //used to calc slice`s maxPicNum uint32_t m_maxFrameNum; uint8_t m_chromaArrayType; int32_t m_width; int32_t m_height; int32_t m_cropX; int32_t m_cropY; int32_t m_cropRectWidth; int32_t m_cropRectHeight; }; struct PPS { PPS(); ~PPS(); uint32_t pps_id; uint32_t sps_id; bool entropy_coding_mode_flag; bool pic_order_present_flag; uint32_t num_slice_groups_minus1; uint8_t slice_group_map_type; uint32_t run_length_minus1[8]; uint32_t top_left[8]; uint32_t bottom_right[8]; bool slice_group_change_direction_flag; uint32_t slice_group_change_rate_minus1; uint32_t pic_size_in_map_units_minus1; uint8_t* slice_group_id; uint8_t num_ref_idx_l0_active_minus1; uint8_t num_ref_idx_l1_active_minus1; bool weighted_pred_flag; uint8_t weighted_bipred_idc; int8_t pic_init_qp_minus26; int8_t pic_init_qs_minus26; int8_t chroma_qp_index_offset; bool deblocking_filter_control_present_flag; bool constrained_intra_pred_flag; bool redundant_pic_cnt_present_flag; bool transform_8x8_mode_flag; bool pic_scaling_list_present_flag[12]; uint8_t scaling_lists_4x4[6][16]; uint8_t scaling_lists_8x8[6][64]; int8_t second_chroma_qp_index_offset; SharedPtr m_sps; //Because these variables is non pod type, it can not use memset and use offsetof to weed out it }; struct RefPicListModification { uint8_t modification_of_pic_nums_idc; uint32_t abs_diff_pic_num_minus1; uint32_t long_term_pic_num; uint32_t abs_diff_view_idx_minus1; }; struct PredWeightTable { uint8_t luma_log2_weight_denom; uint8_t chroma_log2_weight_denom; bool luma_weight_l0_flag; //32 is the max of num_ref_idx_l0_active_minus1 int16_t luma_weight_l0[32]; int8_t luma_offset_l0[32]; bool chroma_weight_l0_flag; int16_t chroma_weight_l0[32][2]; int8_t chroma_offset_l0[32][2]; bool luma_weight_l1_flag; int16_t luma_weight_l1[32]; int8_t luma_offset_l1[32]; bool chroma_weight_l1_flag; int16_t chroma_weight_l1[32][2]; int8_t chroma_offset_l1[32][2]; }; struct RefPicMarking { uint8_t memory_management_control_operation; uint32_t difference_of_pic_nums_minus1; uint32_t long_term_pic_num; uint32_t long_term_frame_idx; uint32_t max_long_term_frame_idx_plus1; }; struct DecRefPicMarking { bool no_output_of_prior_pics_flag; bool long_term_reference_flag; bool adaptive_ref_pic_marking_mode_flag; RefPicMarking ref_pic_marking[10]; uint8_t n_ref_pic_marking; }; class SliceHeader { public: SliceHeader(); bool parseHeader(Parser* nalparser, NalUnit* nalu); private: bool refPicListModification(NalReader& nr, RefPicListModification* pm0, RefPicListModification* pm1, bool is_mvc); bool predWeightTable(NalReader& nr, uint8_t chroma_array_type); bool decRefPicMarking(NalUnit* nalu, NalReader& nr); public: uint32_t first_mb_in_slice; uint32_t slice_type; uint8_t colour_plane_id; uint16_t frame_num; bool field_pic_flag; bool bottom_field_flag; uint32_t idr_pic_id; uint16_t pic_order_cnt_lsb; int32_t delta_pic_order_cnt_bottom; int32_t delta_pic_order_cnt[2]; uint8_t redundant_pic_cnt; bool direct_spatial_mv_pred_flag; bool num_ref_idx_active_override_flag; uint8_t num_ref_idx_l0_active_minus1; uint8_t num_ref_idx_l1_active_minus1; bool ref_pic_list_modification_flag_l0; uint8_t n_ref_pic_list_modification_l0; RefPicListModification ref_pic_list_modification_l0[32]; bool ref_pic_list_modification_flag_l1; uint8_t n_ref_pic_list_modification_l1; RefPicListModification ref_pic_list_modification_l1[32]; PredWeightTable pred_weight_table; DecRefPicMarking dec_ref_pic_marking; uint8_t cabac_init_idc; int8_t slice_qp_delta; bool sp_for_switch_flag; int8_t slice_qs_delta; uint8_t disable_deblocking_filter_idc; int8_t slice_alpha_c0_offset_div2; int8_t slice_beta_offset_div2; uint16_t slice_group_change_cycle; //the allowned max value of abs_diff_pic_num_minus1 uint32_t m_maxPicNum; //the size of the slice header in bits uint32_t m_headerSize; //the number of emulation prevention bytes uint32_t m_emulationPreventionBytes; SharedPtr m_pps; //Because these variables is non pod type, it can not use memset and use offsetof to weed out it }; class Parser { public: enum { MAX_CPB_CNT_MINUS1 = 31, MAX_CHROMA_FORMAT_IDC = 3, SCALING_LIST_DEFAULT_VALUE = 16 }; typedef std::map > SpsMap; typedef std::map > PpsMap; bool parseSps(SharedPtr& sps, const NalUnit* nalu); bool parsePps(SharedPtr& pps, const NalUnit* nalu); inline SharedPtr searchPps(uint8_t id) const; inline SharedPtr searchSps(uint8_t id) const; private: bool hrdParameters(HRDParameters* hrd, NalReader& nr); bool vuiParameters(SharedPtr& sps, NalReader& nr); static const uint8_t EXTENDED_SAR; SpsMap m_spsMap; PpsMap m_ppsMap; }; } } #endif