/*
* Copyright 2016 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* NOTES:
* All the following structs and classes definded base on the spec of H264,
* you can see H.264 specification at http://www.itu.int/rec/T-REC-H.264.
* And in this header file, there are two categories of code-styles for variables.
* Some of them looks like "NalUnit::nal_ref_idc" which separated by underline,
* that is to say, these variables defined in H264 spec and assigned by reading bits
* from video bits stream directly, on my purpose, it is helpful for you to find where
* they are in spec quickly. Some of variables defined use CamelCase because these
* variables assigned by other variables.
*/
#ifndef h264parser_h
#define h264parser_h
#include "nalReader.h"
#include "VideoCommonDefs.h"
#include <map>
#include <string.h>
namespace YamiParser {
namespace H264 {
#define MAX_SPS_ID 31
#define MAX_PPS_ID 255
#define MAX_IDR_PIC_ID 65535
//get the coding type of the slice according to Table 7-6
#define IS_P_SLICE(slice_type) ((slice_type) % 5 == 0)
#define IS_B_SLICE(slice_type) ((slice_type) % 5 == 1)
#define IS_I_SLICE(slice_type) ((slice_type) % 5 == 2)
#define IS_SP_SLICE(slice_type) ((slice_type) % 5 == 3)
#define IS_SI_SLICE(slice_type) ((slice_type) % 5 == 4)
enum SliceGroupMapType {
SLICE_GROUP_INTERLEAVED,
SLICE_GROUP_DISPERSED_MAPPING,
SLIEC_GROUP_FOREGROUND_LEFTOVER,
//3, 4, 5 specify changing slice groups. when num_slice_groups_numus
//is not equal to 1, slice_group_map_type shall not be equal to 3, 4, or 5
SLICE_GROUP_CHANGING3,
SLICE_GROUP_CHANGING4,
SLICE_GROUP_CHANGING5,
SLICE_GROUP_ASSIGNMENT
};
enum Profile {
PROFILE_CAVLC_444_INTRA = 44, //A.2.11
PROFILE_BASELINE = 66, //A.2.1
PROFILE_MAIN = 77, //A.2.2
PROFILE_SCALABLE_BASELINE = 83, //G.10.1.1
PROFILE_SCALABLE_HIGH = 86, //G.10.1.2
PROFILE_EXTENDED = 88, //A.2.3
PROFILE_HIGH = 100, //A.2.4
PROFILE_HIGH_10 = 110, //A.2.5
PROFILE_MULTIVIEW_HIGH = 118, //H.10.1.1
PROFILE_HIGH_422 = 122, //A.2.6
PROFILE_STEREO_HIGH = 128, //H.10.1.2
PROFILE_MULTIVIEW_DEPTH_HIGH = 138, //I.10.1.1
PROFILE_HIGH_444 = 244, //A.2.7
};
//according 8.5.6
static const uint8_t zigzag_scans_4x4[16] = {
0, 1, 4, 8,
5, 2, 3, 6,
9, 12, 13, 10,
7, 11, 14, 15
};
static const uint8_t zigzag_scans_8x8[64] = {
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};
#define transform_coefficients_for_frame_macroblocks(dest, src, len, mode) \
{ \
if ((dest) != (src)) { \
for (uint32_t l = 0; l < (len); l++) \
(dest)[zigzag_scans_##mode[l]] = (src)[l]; \
} \
}
//according to Table 7-1
enum NalUnitType {
NAL_UNSPECIFIED, //unspecified
NAL_SLICE_NONIDR, //coded slice of a non-IDR picture
NAL_SLICE_DPA, //coded slice data partiiton A
NAL_SLICE_DPB, //coded slice data partition B
NAL_SLICE_DPC, //coded slice data partition C
NAL_SLICE_IDR, //coded slice of an IDR picture
NAL_SEI, //supplemental enhancement information (SEI)
NAL_SPS, //sequence parameter set
NAL_PPS, //picture parameter set
NAL_AU_DELIMITER, //access unit delimiter
NAL_SEQ_END, //end of sequence
NAL_STREAM_END, //end of stream
NAL_FILLER_DATA, //filler data
NAL_SPS_EXT, //sequence parameter set extension
NAL_PREFIX_UNIT, //prefix NAL unit
NAL_SUBSET_SPS, //subset sequence parameter set
//16 -18 reserved
NAL_SLICE_AUX = 19, //coded slice of an auxiliary coded picture without partitioning
NAL_SLICE_EXT, //coded slice extension
NAL_SLICE_EXT_DEPV //coded slice extension for depth view components
//22 & 23 reserved, 24 - 31 unspecified
};
struct NaluHeadMvcExt {
bool non_idr_flag;
uint8_t priority_id;
uint16_t view_id;
uint8_t temporal_id;
bool anchor_pic_flag;
bool inter_view_flag;
};
struct NaluHeadSvcExt {
bool idr_flag;
uint8_t priority_id;
bool no_inter_layer_pred_flag;
uint8_t dependency_id;
uint8_t quality_id;
uint8_t temporal_id;
bool use_ref_base_pic_flag;
bool discardable_flag;
bool output_flag;
uint8_t reserved_three_2bits;
};
class Parser;
class NalUnit {
public:
//the min size of a valid nal unit
enum {
NAL_UNIT_SEQUENCE_SIZE = 4
};
/* nal should be a complete nal unit buffer without start code or length bytes */
bool parseNalUnit(const uint8_t* nal, size_t size);
public:
const uint8_t* m_data;
uint32_t m_size;
uint16_t nal_ref_idc;
uint16_t nal_unit_type;
//calc value, used by other syntax structs
bool m_idrPicFlag;
uint8_t m_nalUnitHeaderBytes;
NaluHeadMvcExt m_mvc;
NaluHeadSvcExt m_svc;
private:
bool parseSvcExtension(BitReader& br);
bool parseMvcExtension(BitReader& br);
};
struct HRDParameters {
uint8_t cpb_cnt_minus1;
uint8_t bit_rate_scale;
uint8_t cpb_size_scale;
uint32_t bit_rate_value_minus1[32];
uint32_t cpb_size_value_minus1[32];
bool cbr_flag[32];
uint8_t initial_cpb_removal_delay_length_minus1;
uint8_t cpb_removal_delay_length_minus1;
uint8_t dpb_output_delay_length_minus1;
uint8_t time_offset_length;
};
struct VUIParameters {
bool aspect_ratio_info_present_flag;
uint8_t aspect_ratio_idc;
uint16_t sar_width;
uint16_t sar_height;
bool overscan_info_present_flag;
bool overscan_appropriate_flag;
bool video_signal_type_present_flag;
uint8_t video_format;
bool video_full_range_flag;
bool colour_description_present_flag;
uint8_t colour_primaries;
uint8_t transfer_characteristics;
uint8_t matrix_coefficients;
bool chroma_loc_info_present_flag;
uint8_t chroma_sample_loc_type_top_field;
uint8_t chroma_sample_loc_type_bottom_field;
bool timing_info_present_flag;
uint32_t num_units_in_tick;
uint32_t time_scale;
bool fixed_frame_rate_flag;
bool nal_hrd_parameters_present_flag;
HRDParameters nal_hrd_parameters;
bool vcl_hrd_parameters_present_flag;
HRDParameters vcl_hrd_parameters;
bool low_delay_hrd_flag;
bool pic_struct_present_flag;
bool bitstream_restriction_flag;
bool motion_vectors_over_pic_boundaries_flag;
uint32_t max_bytes_per_pic_denom;
uint32_t max_bits_per_mb_denom;
uint32_t log2_max_mv_length_horizontal;
uint32_t log2_max_mv_length_vertical;
uint32_t max_num_reorder_frames;
uint32_t max_dec_frame_buffering;
};
struct SPS {
uint8_t profile_idc;
bool constraint_set0_flag;
bool constraint_set1_flag;
bool constraint_set2_flag;
bool constraint_set3_flag;
bool constraint_set4_flag;
bool constraint_set5_flag;
uint8_t level_idc;
uint32_t sps_id; //seq_parameter_set_id
uint8_t chroma_format_idc;
bool separate_colour_plane_flag;
uint8_t bit_depth_luma_minus8;
uint8_t bit_depth_chroma_minus8;
bool qpprime_y_zero_transform_bypass_flag;
bool seq_scaling_matrix_present_flag;
bool seq_scaling_list_present_flag[12];
uint8_t scaling_lists_4x4[6][16];
uint8_t scaling_lists_8x8[6][64];
uint8_t log2_max_frame_num_minus4;
uint8_t pic_order_cnt_type;
uint8_t log2_max_pic_order_cnt_lsb_minus4;
bool delta_pic_order_always_zero_flag;
int32_t offset_for_non_ref_pic;
int32_t offset_for_top_to_bottom_field;
uint8_t num_ref_frames_in_pic_order_cnt_cycle;
int32_t offset_for_ref_frame[255];
uint32_t num_ref_frames;
bool gaps_in_frame_num_value_allowed_flag;
uint32_t pic_width_in_mbs_minus1;
uint32_t pic_height_in_map_units_minus1;
bool frame_mbs_only_flag;
bool mb_adaptive_frame_field_flag;
bool direct_8x8_inference_flag;
bool frame_cropping_flag;
uint32_t frame_crop_left_offset;
uint32_t frame_crop_right_offset;
uint32_t frame_crop_top_offset;
uint32_t frame_crop_bottom_offset;
bool vui_parameters_present_flag;
VUIParameters m_vui;
//Because these variables calced from other variables instead of
//reading from bits stream, so using different style and spec do like this
//used to calc slice`s maxPicNum
uint32_t m_maxFrameNum;
uint8_t m_chromaArrayType;
int32_t m_width;
int32_t m_height;
int32_t m_cropX;
int32_t m_cropY;
int32_t m_cropRectWidth;
int32_t m_cropRectHeight;
};
struct PPS {
PPS();
~PPS();
uint32_t pps_id;
uint32_t sps_id;
bool entropy_coding_mode_flag;
bool pic_order_present_flag;
uint32_t num_slice_groups_minus1;
uint8_t slice_group_map_type;
uint32_t run_length_minus1[8];
uint32_t top_left[8];
uint32_t bottom_right[8];
bool slice_group_change_direction_flag;
uint32_t slice_group_change_rate_minus1;
uint32_t pic_size_in_map_units_minus1;
uint8_t* slice_group_id;
uint8_t num_ref_idx_l0_active_minus1;
uint8_t num_ref_idx_l1_active_minus1;
bool weighted_pred_flag;
uint8_t weighted_bipred_idc;
int8_t pic_init_qp_minus26;
int8_t pic_init_qs_minus26;
int8_t chroma_qp_index_offset;
bool deblocking_filter_control_present_flag;
bool constrained_intra_pred_flag;
bool redundant_pic_cnt_present_flag;
bool transform_8x8_mode_flag;
bool pic_scaling_list_present_flag[12];
uint8_t scaling_lists_4x4[6][16];
uint8_t scaling_lists_8x8[6][64];
int8_t second_chroma_qp_index_offset;
SharedPtr<SPS> m_sps;
//Because these variables is non pod type, it can not use memset and use offsetof to weed out it
};
struct RefPicListModification {
uint8_t modification_of_pic_nums_idc;
uint32_t abs_diff_pic_num_minus1;
uint32_t long_term_pic_num;
uint32_t abs_diff_view_idx_minus1;
};
struct PredWeightTable {
uint8_t luma_log2_weight_denom;
uint8_t chroma_log2_weight_denom;
bool luma_weight_l0_flag;
//32 is the max of num_ref_idx_l0_active_minus1
int16_t luma_weight_l0[32];
int8_t luma_offset_l0[32];
bool chroma_weight_l0_flag;
int16_t chroma_weight_l0[32][2];
int8_t chroma_offset_l0[32][2];
bool luma_weight_l1_flag;
int16_t luma_weight_l1[32];
int8_t luma_offset_l1[32];
bool chroma_weight_l1_flag;
int16_t chroma_weight_l1[32][2];
int8_t chroma_offset_l1[32][2];
};
struct RefPicMarking {
uint8_t memory_management_control_operation;
uint32_t difference_of_pic_nums_minus1;
uint32_t long_term_pic_num;
uint32_t long_term_frame_idx;
uint32_t max_long_term_frame_idx_plus1;
};
struct DecRefPicMarking {
bool no_output_of_prior_pics_flag;
bool long_term_reference_flag;
bool adaptive_ref_pic_marking_mode_flag;
RefPicMarking ref_pic_marking[10];
uint8_t n_ref_pic_marking;
};
class SliceHeader {
public:
SliceHeader();
bool parseHeader(Parser* nalparser, NalUnit* nalu);
private:
bool refPicListModification(NalReader& nr,
RefPicListModification* pm0, RefPicListModification* pm1, bool is_mvc);
bool predWeightTable(NalReader& nr, uint8_t chroma_array_type);
bool decRefPicMarking(NalUnit* nalu, NalReader& nr);
public:
uint32_t first_mb_in_slice;
uint32_t slice_type;
uint8_t colour_plane_id;
uint16_t frame_num;
bool field_pic_flag;
bool bottom_field_flag;
uint32_t idr_pic_id;
uint16_t pic_order_cnt_lsb;
int32_t delta_pic_order_cnt_bottom;
int32_t delta_pic_order_cnt[2];
uint8_t redundant_pic_cnt;
bool direct_spatial_mv_pred_flag;
bool num_ref_idx_active_override_flag;
uint8_t num_ref_idx_l0_active_minus1;
uint8_t num_ref_idx_l1_active_minus1;
bool ref_pic_list_modification_flag_l0;
uint8_t n_ref_pic_list_modification_l0;
RefPicListModification ref_pic_list_modification_l0[32];
bool ref_pic_list_modification_flag_l1;
uint8_t n_ref_pic_list_modification_l1;
RefPicListModification ref_pic_list_modification_l1[32];
PredWeightTable pred_weight_table;
DecRefPicMarking dec_ref_pic_marking;
uint8_t cabac_init_idc;
int8_t slice_qp_delta;
bool sp_for_switch_flag;
int8_t slice_qs_delta;
uint8_t disable_deblocking_filter_idc;
int8_t slice_alpha_c0_offset_div2;
int8_t slice_beta_offset_div2;
uint16_t slice_group_change_cycle;
//the allowned max value of abs_diff_pic_num_minus1
uint32_t m_maxPicNum;
//the size of the slice header in bits
uint32_t m_headerSize;
//the number of emulation prevention bytes
uint32_t m_emulationPreventionBytes;
SharedPtr<PPS> m_pps;
//Because these variables is non pod type, it can not use memset and use offsetof to weed out it
};
class Parser {
public:
enum {
MAX_CPB_CNT_MINUS1 = 31,
MAX_CHROMA_FORMAT_IDC = 3,
SCALING_LIST_DEFAULT_VALUE = 16
};
typedef std::map<uint8_t, SharedPtr<SPS> > SpsMap;
typedef std::map<uint8_t, SharedPtr<PPS> > PpsMap;
bool parseSps(SharedPtr<SPS>& sps, const NalUnit* nalu);
bool parsePps(SharedPtr<PPS>& pps, const NalUnit* nalu);
inline SharedPtr<PPS> searchPps(uint8_t id) const;
inline SharedPtr<SPS> searchSps(uint8_t id) const;
private:
bool hrdParameters(HRDParameters* hrd, NalReader& nr);
bool vuiParameters(SharedPtr<SPS>& sps, NalReader& nr);
static const uint8_t EXTENDED_SAR;
SpsMap m_spsMap;
PpsMap m_ppsMap;
};
}
}
#endif