Blob Blame History Raw
/*
 * Copyright (C) 2013-2016 Intel Corporation. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "codecparsers/h265Parser.h"
#include "common/log.h"
#include "common/nalreader.h"

#include "vaapi/vaapiptrs.h"
#include "vaapi/vaapicontext.h"
#include "vaapi/vaapidisplay.h"
#include "vaapidecpicture.h"

#include <algorithm>

#include "vaapidecoder_h265.h"

namespace YamiMediaCodec{
typedef VaapiDecoderH265::PicturePtr PicturePtr;
using std::bind;
using std::placeholders::_1;
using std::ref;

using namespace YamiParser::H265;

bool isIdr(const NalUnit* const nalu)
{
    return nalu->nal_unit_type == NalUnit::IDR_W_RADL
            || nalu->nal_unit_type == NalUnit::IDR_N_LP;
}

bool isBla(const NalUnit* const nalu)
{
    return nalu->nal_unit_type == NalUnit::BLA_W_LP
            || nalu->nal_unit_type == NalUnit::BLA_W_RADL
            || nalu->nal_unit_type == NalUnit::BLA_N_LP;
}

bool isIrap(const NalUnit* const nalu)
{
    return nalu->nal_unit_type >=  NalUnit::BLA_W_LP
            && nalu->nal_unit_type <= NalUnit::RSV_IRAP_VCL23;
}

bool isRasl(const NalUnit* const nalu)
{
    return nalu->nal_unit_type == NalUnit::RASL_R
            || nalu->nal_unit_type == NalUnit::RASL_N;
}

bool isRadl(const NalUnit* const nalu)
{
    return nalu->nal_unit_type == NalUnit::RADL_R
            || nalu->nal_unit_type == NalUnit::RADL_N;
}

bool isCra(const NalUnit* const nalu)
{
    return nalu->nal_unit_type == NalUnit::CRA_NUT;
}

bool isSublayerNoRef(const NalUnit* const nalu)
{
    static const uint8_t noRef[] = {
        NalUnit::TRAIL_N,
        NalUnit::TSA_N,
        NalUnit::STSA_N,
        NalUnit::RADL_N,
        NalUnit::RASL_N,
        NalUnit::RSV_VCL_N10,
        NalUnit::RSV_VCL_N12,
        NalUnit::RSV_VCL_N14
    };
    static const uint8_t* end  = noRef + N_ELEMENTS(noRef);
    return std::binary_search(noRef, end, nalu->nal_unit_type);
}

class VaapiDecPictureH265 : public VaapiDecPicture
{
public:
    VaapiDecPictureH265(const ContextPtr& context, const SurfacePtr& surface, int64_t timeStamp):
        VaapiDecPicture(context, surface, timeStamp)
    {
    }
    VaapiDecPictureH265()
    {
    }
    int32_t     m_poc;
    uint16_t    m_pocLsb;
    bool        m_noRaslOutputFlag;
    bool        m_picOutputFlag;
    uint32_t    m_picLatencyCount;

    //is unused reference picture
    bool        m_isUnusedReference;
    //is this picture ref able?
    bool        m_isReference;

};

inline bool VaapiDecoderH265::DPB::PocLess::operator()(const PicturePtr& left, const PicturePtr& right) const
{
    return left->m_poc < right->m_poc;
}

VaapiDecoderH265::DPB::DPB(OutputCallback output):
    m_output(output),
    m_dummy(new VaapiDecPictureH265)
{
}

bool VaapiDecoderH265::DPB::initShortTermRef(RefSet& ref, int32_t currPoc,
            const int32_t* delta, const uint8_t* used,  uint8_t num)
{
    if (num > 16)
        return false;
    ref.clear();
    for (uint8_t i = 0; i < num; i++) {
        int32_t poc = currPoc + delta[i];
        VaapiDecPictureH265* pic = getPic(poc);
        if (!pic) {
            ERROR("can't find short ref %d for %d", poc, currPoc);
        } else {
            if (used[i])
                ref.push_back(pic);
            else
                m_stFoll.push_back(pic);
        }
    }
    return true;
}

bool VaapiDecoderH265::DPB::initShortTermRef(const PicturePtr& picture,
                                             const SliceHeader* const slice)
{
    const PPS *const pps = slice->pps.get();
    const SPS *const sps = pps->sps.get();

    const ShortTermRefPicSet* stRef;
    if (!slice->short_term_ref_pic_set_sps_flag)
        stRef = &slice->short_term_ref_pic_sets;
    else
        stRef = &sps->short_term_ref_pic_set[slice->short_term_ref_pic_set_idx];

    //clear it here
    m_stFoll.clear();

    if (!initShortTermRef(m_stCurrBefore, picture->m_poc,
            stRef->DeltaPocS0, stRef->UsedByCurrPicS0, stRef->NumNegativePics))
        return false;
    if (!initShortTermRef(m_stCurrAfter, picture->m_poc,
            stRef->DeltaPocS1, stRef->UsedByCurrPicS1, stRef->NumPositivePics))
        return false;
    return true;
}

bool VaapiDecoderH265::DPB::initLongTermRef(const PicturePtr& picture, const SliceHeader *const slice)
{
    const PPS *const pps = slice->pps.get();
    const SPS *const sps = pps->sps.get();

    int32_t deltaPocMsbCycleLt[16];

    const int32_t maxPicOrderCntLsb =1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
    uint16_t num = slice->num_long_term_sps + slice->num_long_term_pics;
    //(7-38)
    for (int i = 0; i < num; i++) {
        if( i == 0  || i == slice->num_long_term_sps)
            deltaPocMsbCycleLt[ i ] = slice->delta_poc_msb_cycle_lt[i];
        else
            deltaPocMsbCycleLt[ i ] = slice->delta_poc_msb_cycle_lt[ i ] + deltaPocMsbCycleLt[i-1];
    }
    //(8-5)
    for (int i = 0; i < num; i++) {
        int32_t poc;
        bool used;
        if (i < slice->num_long_term_sps) {
            poc = sps->lt_ref_pic_poc_lsb_sps[slice->lt_idx_sps[i]];
            used = sps->used_by_curr_pic_lt_sps_flag[slice->lt_idx_sps[i]];
        } else {
            poc = slice->poc_lsb_lt[i];
            used = slice->used_by_curr_pic_lt_flag[i];
        }
        if (slice->delta_poc_msb_present_flag[i]) {
            poc += picture->m_poc - deltaPocMsbCycleLt[i] * maxPicOrderCntLsb
                    - slice->slice_pic_order_cnt_lsb;
        }
        VaapiDecPictureH265* pic = getPic(poc, slice->delta_poc_msb_present_flag[i]);
        if (!pic) {
            ERROR("can't find long ref %d for %d", poc, picture->m_poc);
        } else {
            if (used)
                m_ltCurr.push_back(pic);
            else
                m_ltFoll.push_back(pic);
        }
    }
    return true;

}

void markUnusedReference(const PicturePtr& picture)
{
    picture->m_isUnusedReference = true;
}

void clearReference(const PicturePtr& picture)
{
    if (picture->m_isUnusedReference)
        picture->m_isReference = false;
}

inline bool isOutputNeeded(const PicturePtr& picture)
{
    return picture->m_picOutputFlag;
}

inline bool isReference(const PicturePtr& picture)
{
    return picture->m_isReference;
}

inline bool isUnusedPicture(const PicturePtr& picture)
{
    return !isReference(picture) && !isOutputNeeded(picture);
}

void VaapiDecoderH265::DPB::removeUnused()
{
    forEach(clearReference);

    /* Remove unused pictures from DPB */
    PictureList::iterator it;
    for (it = m_pictures.begin(); it != m_pictures.end();) {
        if (isUnusedPicture(*it))
            m_pictures.erase(it++);
        else
            ++it;
    }
}

void VaapiDecoderH265::DPB::clearRefSet()
{
    m_stCurrBefore.clear();
    m_stCurrAfter.clear();
    m_stFoll.clear();
    m_ltCurr.clear();
    m_ltFoll.clear();
}
/*8.3.2*/
bool VaapiDecoderH265::DPB::initReference(const PicturePtr& picture,
     const SliceHeader *const slice,  const NalUnit *const nalu, bool newStream)
{
    clearRefSet();
    if (isIdr(nalu))
        return true;
    if (!initShortTermRef(picture, slice))
        return false;
    if (!initLongTermRef(picture, slice))
        return false;
    return true;
}

bool matchPocLsb(const PicturePtr& picture, int32_t poc)
{
    return picture->m_pocLsb == poc;
}

VaapiDecPictureH265* VaapiDecoderH265::DPB::getPic(int32_t poc, bool hasMsb)
{
    PictureList::iterator it;
    if (hasMsb) {
        m_dummy->m_poc = poc;
        it = m_pictures.find(m_dummy);
    } else {
        it = find_if(m_pictures.begin(), m_pictures.end(), bind(matchPocLsb, _1, poc));
    }
    if (it != m_pictures.end()) {
        const PicturePtr& picture = *it;
        if (picture->m_isReference) {
            //use by current decode picture
            picture->m_isUnusedReference = false;
            return picture.get();
        }
    }
    return NULL;
}

void VaapiDecoderH265::DPB::forEach(ForEachFunction fn)
{
    std::for_each(m_pictures.begin(), m_pictures.end(), fn);
}

bool VaapiDecoderH265::DPB::checkReorderPics(const SPS* const sps)
{
    uint32_t num = count_if(m_pictures.begin(), m_pictures.end(), isOutputNeeded);
    return num > sps->sps_max_num_reorder_pics[sps->sps_max_sub_layers_minus1];
}

bool checkPicLatencyCount(const PicturePtr& picture, uint32_t spsMaxLatencyPictures)
{
    return isOutputNeeded(picture) && (picture->m_picLatencyCount >= spsMaxLatencyPictures);
}

bool VaapiDecoderH265::DPB::checkLatency(const SPS* const sps)
{
    uint8_t highestTid = sps->sps_max_sub_layers_minus1;
    if (!sps->sps_max_latency_increase_plus1[highestTid])
        return false;
    uint16_t spsMaxLatencyPictures = sps->sps_max_num_reorder_pics[highestTid]
        + sps->sps_max_latency_increase_plus1[highestTid] - 1;
    return find_if(m_pictures.begin(),
                   m_pictures.end(),
                   bind(checkPicLatencyCount, _1, spsMaxLatencyPictures)
                   ) != m_pictures.end();

}

bool VaapiDecoderH265::DPB::checkDpbSize(const SPS* const sps)
{
    uint8_t highestTid = sps->sps_max_sub_layers_minus1;
    return m_pictures.size() >= (size_t)(sps->sps_max_dec_pic_buffering_minus1[highestTid] + 1);
}

//C.5.2.2
bool VaapiDecoderH265::DPB::init(const PicturePtr& picture,
     const SliceHeader *const slice,  const NalUnit *const nalu, bool newStream)
{
    forEach(markUnusedReference);
    if (!initReference(picture, slice, nalu, newStream))
        return false;
    if (isIrap(nalu) && picture->m_noRaslOutputFlag && !newStream) {
        bool noOutputOfPriorPicsFlag;
        //TODO how to check C.5.2.2 item 1's second otherwise
        if (isCra(nalu))
            noOutputOfPriorPicsFlag = true;
        else
            noOutputOfPriorPicsFlag = slice->no_output_of_prior_pics_flag;
        clearRefSet();
        if (!noOutputOfPriorPicsFlag) {
            removeUnused();
            bumpAll();
        }
        m_pictures.clear();
        return true;
    }
    removeUnused();
    const PPS* const pps = slice->pps.get();
    const SPS* const sps = pps->sps.get();
    while (checkReorderPics(sps) || checkLatency(sps) || checkDpbSize(sps)) {
        if (!bump())
            return false;
    }

    return true;
}

bool VaapiDecoderH265::DPB::output(const PicturePtr& picture)
{
    picture->m_picOutputFlag = false;

    return m_output(picture) == YAMI_SUCCESS;
}

bool VaapiDecoderH265::DPB::bump()
{
    PictureList::iterator it =
        find_if(m_pictures.begin(),m_pictures.end(), isOutputNeeded);
    if (it == m_pictures.end())
        return false;
    bool success = output(*it);
    if (!isReference(*it))
        m_pictures.erase(it);
    return success;
}

void VaapiDecoderH265::DPB::bumpAll()
{
     while (bump())
        /* nothing */;
}

void addLatency(const PicturePtr& picture)
{
    if (picture->m_picOutputFlag)
        picture->m_picLatencyCount++;
}

bool VaapiDecoderH265::DPB::add(const PicturePtr& picture, const SliceHeader* const lastSlice)
{
    const PPS* const pps = lastSlice->pps.get();
    const SPS* const sps = pps->sps.get();

    forEach(addLatency);
    picture->m_picLatencyCount = 0;
    picture->m_isReference = true;
    m_pictures.insert(picture);
    while (checkReorderPics(sps) || checkLatency(sps))
        bump();
    return true;
}


void VaapiDecoderH265::DPB::flush()
{
    bumpAll();
    clearRefSet();
    m_pictures.clear();
}

VaapiDecoderH265::VaapiDecoderH265():
    m_prevPicOrderCntMsb(0),
    m_prevPicOrderCntLsb(0),
    m_nalLengthSize(0),
    m_newStream(true),
    m_endOfSequence(false),
    m_dpb(bind(&VaapiDecoderH265::outputPicture, this, _1))
{
    m_parser.reset(new Parser());
    m_prevSlice.reset(new SliceHeader());
}

VaapiDecoderH265::~VaapiDecoderH265()
{
    stop();
}

YamiStatus VaapiDecoderH265::start(VideoConfigBuffer* buffer)
{
    if (buffer->data && buffer->size > 0) {
        if (!decodeHevcRecordData(buffer->data, buffer->size)) {
            ERROR("decode record data failed");
            return DECODE_FAIL;
        }
    }

    return YAMI_SUCCESS;
}

YamiStatus VaapiDecoderH265::decodeParamSet(NalUnit* nalu)
{
    bool res = true;

    switch (nalu->nal_unit_type) {
    case NalUnit::VPS_NUT:
        res = m_parser->parseVps(nalu);
        break;
    case NalUnit::SPS_NUT:
        res = m_parser->parseSps(nalu);
        break;
    case NalUnit::PPS_NUT:
        res = m_parser->parsePps(nalu);
    }

    return res ? YAMI_SUCCESS : YAMI_DECODE_INVALID_DATA;
}

YamiStatus VaapiDecoderH265::outputPicture(const PicturePtr& picture)
{
    VaapiDecoderBase::PicturePtr base = std::static_pointer_cast<VaapiDecPicture>(picture);
    return VaapiDecoderBase::outputPicture(base);
}

YamiStatus VaapiDecoderH265::decodeCurrent()
{
    YamiStatus status = YAMI_SUCCESS;
    if (!m_current)
        return status;
    if (!m_current->decode()) {
        ERROR("decode %d failed", m_current->m_poc);
        //ignore it
        return status;
    }
    if (!m_dpb.add(m_current, m_prevSlice.get()))
        return YAMI_DECODE_INVALID_DATA;
    m_current.reset();
    m_newStream = false;
    return status;
}

#define FILL_SCALING_LIST(mxm) \
void fillScalingList##mxm(VAIQMatrixBufferHEVC* iqMatrix, const ScalingList* const scalingList) \
{ \
    for (size_t i = 0; i < N_ELEMENTS(iqMatrix->ScalingList##mxm); i++) { \
        for (size_t j = 0; j < N_ELEMENTS(UpperRightDiagonal##mxm); j++) { \
            iqMatrix->ScalingList##mxm[i][UpperRightDiagonal##mxm[j]] = scalingList->scalingList##mxm[i][j]; \
        } \
    } \
}

FILL_SCALING_LIST(4x4)
FILL_SCALING_LIST(8x8)
FILL_SCALING_LIST(16x16)
void fillScalingList32x32(VAIQMatrixBufferHEVC* iqMatrix, const ScalingList* const scalingList)
{
    for (size_t i = 0; i < N_ELEMENTS(iqMatrix->ScalingList32x32); i++) {
        for (size_t j = 0; j < N_ELEMENTS(UpperRightDiagonal32x32); j++) {
            // According to spec "7.3.4 Scaling list data syntax",
            // just use scalingList32x32[0] and scalingList32x32[3].
            iqMatrix->ScalingList32x32[i][UpperRightDiagonal32x32[j]] = scalingList->scalingList32x32[i * 3][j];
        }
    }
}

#define FILL_SCALING_LIST_DC(mxm)                                                                     \
    void fillScalingListDc##mxm(VAIQMatrixBufferHEVC* iqMatrix, const ScalingList* const scalingList) \
    {                                                                                                 \
        for (size_t i = 0; i < N_ELEMENTS(iqMatrix->ScalingListDC##mxm); i++) {                       \
            iqMatrix->ScalingListDC##mxm[i] = scalingList->scalingListDC##mxm[i];                     \
        }                                                                                             \
    }

FILL_SCALING_LIST_DC(16x16)
void fillScalingListDc32x32(VAIQMatrixBufferHEVC* iqMatrix, const ScalingList* const scalingList)
{
    for (size_t i = 0; i < N_ELEMENTS(iqMatrix->ScalingListDC32x32); i++) {
        // similar to scalingList32x32.
        iqMatrix->ScalingListDC32x32[i] = scalingList->scalingListDC32x32[i * 3];
    }
}

bool VaapiDecoderH265::fillIqMatrix(const PicturePtr& picture, const SliceHeader* const slice)
{
    const PPS* const pps = slice->pps.get();
    const SPS* const sps = pps->sps.get();
    const ScalingList* scalingList;
    if (pps->pps_scaling_list_data_present_flag) {
        scalingList = &pps->scaling_list;
    } else if(sps->scaling_list_enabled_flag) {
        if(sps->sps_scaling_list_data_present_flag) {
            scalingList = &sps->scaling_list;
        } else {
            scalingList = &pps->scaling_list;
        }
    } else {
        //default scaling list
        return true;
    }
    VAIQMatrixBufferHEVC* iqMatrix;
    if (!picture->editIqMatrix(iqMatrix))
        return false;
    fillScalingList4x4(iqMatrix, scalingList);
    fillScalingList8x8(iqMatrix, scalingList);
    fillScalingList16x16(iqMatrix, scalingList);
    fillScalingList32x32(iqMatrix, scalingList);
    fillScalingListDc16x16(iqMatrix, scalingList);
    fillScalingListDc32x32(iqMatrix, scalingList);
    return true;
}

void VaapiDecoderH265::fillReference(VAPictureHEVC* refs, int32_t& n,
    const RefSet& refset, uint32_t flags)
{
    for (size_t i = 0; i < refset.size(); i++) {
        VAPictureHEVC* r = refs + n;
        const VaapiDecPictureH265* pic = refset[i];

        r->picture_id = refset[i]->getSurfaceID();
        r->pic_order_cnt = pic->m_poc;
        r->flags = flags;

        //record for late use
        m_pocToIndex[pic->m_poc] = n;

        n++;

    }
}

void VaapiDecoderH265::fillReference(VAPictureHEVC* refs, int32_t size)
{
    int32_t n = 0;
    //clear index map
    m_pocToIndex.clear();

    fillReference(refs, n, m_dpb.m_stCurrBefore, VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE);
    fillReference(refs, n, m_dpb.m_stCurrAfter, VA_PICTURE_HEVC_RPS_ST_CURR_AFTER);
    fillReference(refs, n, m_dpb.m_stFoll, 0);
    fillReference(refs, n, m_dpb.m_ltCurr, VA_PICTURE_HEVC_LONG_TERM_REFERENCE | VA_PICTURE_HEVC_RPS_LT_CURR);
    fillReference(refs, n, m_dpb.m_ltFoll, VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
    for (int i = n; i < size; i++) {
        VAPictureHEVC* ref = refs + i;
        ref->picture_id = VA_INVALID_SURFACE;
        ref->pic_order_cnt = 0;
        ref->flags = VA_PICTURE_HEVC_INVALID;
    }


}
bool VaapiDecoderH265::fillPicture(const PicturePtr& picture, const SliceHeader* const slice)
{
    VAPictureParameterBufferHEVC* param;
    if (!picture->editPicture(param))
        return false;
    param->CurrPic.picture_id = picture->getSurfaceID();
    param->CurrPic.pic_order_cnt = picture->m_poc;
    fillReference(param->ReferenceFrames,  N_ELEMENTS(param->ReferenceFrames));

    const PPS* const pps = slice->pps.get();
    const SPS* const sps = pps->sps.get();
#define FILL(h, f) param->f = h->f
    FILL(sps, pic_width_in_luma_samples);
    FILL(sps, pic_height_in_luma_samples);
#define FILL_PIC(h, f) param->pic_fields.bits.f  = h->f
    FILL_PIC(sps, chroma_format_idc);
    FILL_PIC(sps, separate_colour_plane_flag);
    FILL_PIC(sps, pcm_enabled_flag);
    FILL_PIC(sps, scaling_list_enabled_flag);
    FILL_PIC(pps, transform_skip_enabled_flag);
    FILL_PIC(sps, amp_enabled_flag);
    FILL_PIC(sps, strong_intra_smoothing_enabled_flag);
    FILL_PIC(pps, sign_data_hiding_enabled_flag);
    FILL_PIC(pps, constrained_intra_pred_flag);
    FILL_PIC(pps, cu_qp_delta_enabled_flag);
    FILL_PIC(pps, weighted_pred_flag);
    FILL_PIC(pps, weighted_bipred_flag);
    FILL_PIC(pps, transquant_bypass_enabled_flag);
    FILL_PIC(pps, tiles_enabled_flag);
    FILL_PIC(pps, entropy_coding_sync_enabled_flag);
    param->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag
        = pps->pps_loop_filter_across_slices_enabled_flag;
    FILL_PIC(pps, loop_filter_across_tiles_enabled_flag);
    FILL_PIC(sps, pcm_loop_filter_disabled_flag);
    //how to fill this?
    //NoPicReorderingFlag
    //NoBiPredFlag

    param->sps_max_dec_pic_buffering_minus1 =
        sps->sps_max_dec_pic_buffering_minus1[0];
    FILL(sps, bit_depth_luma_minus8);
    FILL(sps, bit_depth_chroma_minus8);
    FILL(sps, pcm_sample_bit_depth_luma_minus1);
    FILL(sps, pcm_sample_bit_depth_chroma_minus1);
    FILL(sps, log2_min_luma_coding_block_size_minus3);
    FILL(sps, log2_diff_max_min_luma_coding_block_size);
    FILL(sps, log2_min_transform_block_size_minus2);
    FILL(sps, log2_diff_max_min_transform_block_size);
    FILL(sps, log2_min_pcm_luma_coding_block_size_minus3);
    FILL(sps, log2_diff_max_min_pcm_luma_coding_block_size);
    FILL(sps, max_transform_hierarchy_depth_intra);
    FILL(sps, max_transform_hierarchy_depth_inter);
    FILL(pps, init_qp_minus26);
    FILL(pps, diff_cu_qp_delta_depth);
    param->pps_cb_qp_offset = pps->pps_cb_qp_offset;
    param->pps_cr_qp_offset = pps->pps_cr_qp_offset;
    FILL(pps, log2_parallel_merge_level_minus2);
    FILL(pps, num_tile_columns_minus1);
    FILL(pps, num_tile_rows_minus1);
    for (int i = 0; i <= pps->num_tile_columns_minus1; i++) {
        param->column_width_minus1[i] = pps->column_width_minus1[i];
    }
    for (int i = 0; i <= pps->num_tile_rows_minus1; i++) {
        param->row_height_minus1[i] = pps->row_height_minus1[i];
    }


#define FILL_SLICE(h, f)    param->slice_parsing_fields.bits.f = h->f
#define FILL_SLICE_1(h, f) param->slice_parsing_fields.bits.h##_##f = h->f

    FILL_SLICE(pps, lists_modification_present_flag);
    FILL_SLICE(sps, long_term_ref_pics_present_flag);
    FILL_SLICE_1(sps, temporal_mvp_enabled_flag);

    FILL_SLICE(pps, cabac_init_present_flag);
    FILL_SLICE(pps, output_flag_present_flag);
    FILL_SLICE(pps, dependent_slice_segments_enabled_flag);
    FILL_SLICE_1(pps, slice_chroma_qp_offsets_present_flag);
    FILL_SLICE(sps, sample_adaptive_offset_enabled_flag);
    FILL_SLICE(pps, deblocking_filter_override_enabled_flag);
    param->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag =
        pps->pps_deblocking_filter_disabled_flag;
    FILL_SLICE(pps, slice_segment_header_extension_present_flag);

    /* how to fill following fields
    RapPicFlag
    IdrPicFlag
    IntraPicFlag  */

    FILL(sps, log2_max_pic_order_cnt_lsb_minus4);
    FILL(sps, num_short_term_ref_pic_sets);
    param->num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
    FILL(pps, num_ref_idx_l0_default_active_minus1);
    FILL(pps, num_ref_idx_l1_default_active_minus1);
    param->pps_beta_offset_div2 = pps->pps_beta_offset_div2;
    param->pps_tc_offset_div2 = pps->pps_tc_offset_div2;
    FILL(pps, num_extra_slice_header_bits);

    /* how to fill this
     st_rps_bits*/

#undef FILL
#undef FILL_PIC
#undef FILL_SLICE
#undef FILL_SLICE_1

    return true;
}

bool VaapiDecoderH265::getRefPicList(RefSet& refset, const RefSet& stCurr0, const RefSet& stCurr1,
    uint8_t numActive, bool modify, const uint32_t* modiList)
{
    if (numActive > 15) {
        ERROR("bug: reference picutre can't large than 15");
        return false;
    }
    const RefSet& ltCurr = m_dpb.m_ltCurr;
    uint8_t numPocTotalCurr = stCurr0.size() + stCurr1.size() + ltCurr.size();
    if (numActive && !numPocTotalCurr) {
        ERROR("active refs is %d, but num numPocTotalCurr is %d", numActive, numPocTotalCurr);
        return false;
    }
    uint8_t numRpsCurrTempList = std::max(numPocTotalCurr, numActive);
    RefSet temp;
    temp.reserve(numRpsCurrTempList);
    uint32_t rIdx = 0;
    //(8-8) and (8-10)
    while (rIdx < numRpsCurrTempList) {
        for(uint32_t i = 0; i < stCurr0.size() && rIdx < numRpsCurrTempList; rIdx++, i++ )
            temp.push_back(stCurr0[i]);
        for(uint32_t i = 0; i < stCurr1.size() && rIdx < numRpsCurrTempList; rIdx++, i++ )
            temp.push_back(stCurr1[i]);
        for(uint32_t i = 0; i < ltCurr.size() && rIdx < numRpsCurrTempList; rIdx++, i++ )
            temp.push_back(ltCurr[i]);
    }
    refset.clear();
    refset.reserve(numActive);
    //(8-9) and (8-11)
    for( rIdx = 0; rIdx < numActive; rIdx++) {
        uint8_t idx = modify ? modiList[rIdx] : rIdx;
        if (idx < temp.size()) {
            refset.push_back(temp[idx]);
        } else {
            ERROR("can't get idx from temp ref, modify = %d, idx = %d, iIdx = %d", modify, idx, rIdx);
        }
    }
    return true;
}

uint8_t  VaapiDecoderH265::getIndex(int32_t poc)
{
    return m_pocToIndex[poc];
}

void VaapiDecoderH265::fillReferenceIndexForList(VASliceParameterBufferHEVC* sliceParam,
    const RefSet& refset, bool isList0)
{
    int n = isList0?0:1;
    uint32_t i;
    for (i = 0; i < refset.size(); i++) {
        sliceParam->RefPicList[n][i] = getIndex(refset[i]->m_poc);
    }
    for ( ; i < N_ELEMENTS(sliceParam->RefPicList[n]); i++) {
        sliceParam->RefPicList[n][i] = 0xFF;
    }

}

// 8.3.4
bool VaapiDecoderH265::fillReferenceIndex(VASliceParameterBufferHEVC* sliceParam, const SliceHeader* const slice)
{
    RefSet& before = m_dpb.m_stCurrBefore;
    RefSet& after = m_dpb.m_stCurrAfter;

    RefSet refset;
    if (!slice->isISlice()) {
        if (!getRefPicList(refset, before, after,
            slice->num_ref_idx_l0_active_minus1 + 1,
            slice->ref_pic_list_modification.ref_pic_list_modification_flag_l0,
            slice->ref_pic_list_modification.list_entry_l0)) {
            return false;
        }
    }
    fillReferenceIndexForList(sliceParam, refset, true);

    refset.clear();
    if (slice->isBSlice()) {
        if (!getRefPicList(refset, after, before,
            slice->num_ref_idx_l1_active_minus1 + 1,
            slice->ref_pic_list_modification.ref_pic_list_modification_flag_l1,
            slice->ref_pic_list_modification.list_entry_l1)) {
            return false;
        }
    }
    fillReferenceIndexForList(sliceParam, refset, false);

    sliceParam->num_ref_idx_l0_active_minus1 = slice->num_ref_idx_l0_active_minus1;
    sliceParam->num_ref_idx_l1_active_minus1 = slice->num_ref_idx_l1_active_minus1;

    return true;

}

inline int32_t clip3(int32_t x, int32_t y, int32_t z)
{
    if (z < x)
        return x;
    if (z > y)
        return y;
    return z;
}

#define FILL_WEIGHT_TABLE(n) \
void fillPredWedightTableL##n(VASliceParameterBufferHEVC* sliceParam, \
        const SliceHeader* slice, uint8_t chromaLog2WeightDenom) \
{ \
    const PredWeightTable& w = slice->pred_weight_table; \
    for (int i = 0; i <= sliceParam->num_ref_idx_l##n##_active_minus1; i++) { \
        if (w.luma_weight_l##n##_flag[i]) { \
                sliceParam->delta_luma_weight_l##n[i] = w.delta_luma_weight_l##n[i]; \
                sliceParam->luma_offset_l##n[i] = w.luma_offset_l##n[i];\
            } \
            if (w.chroma_weight_l##n##_flag[i]) { \
                for (int j = 0; j < 2; j++) { \
                    int8_t deltaWeight = w.delta_chroma_weight_l##n[i][j]; \
                    int32_t chromaWeight = (1 << chromaLog2WeightDenom) + deltaWeight; \
                    int16_t deltaOffset = w.delta_chroma_offset_l##n[i][j]; \
                    int32_t chromaOffset = \
                        128 + deltaOffset - ((128*chromaWeight)>>chromaLog2WeightDenom);\
\
                    sliceParam->delta_chroma_weight_l##n[i][j] = deltaWeight; \
                    sliceParam->ChromaOffsetL##n[i][j]= (int8_t)clip3(-128, 127, chromaOffset); \
            } \
        } \
    } \
}

FILL_WEIGHT_TABLE(0)
FILL_WEIGHT_TABLE(1)

bool VaapiDecoderH265::fillPredWeightTable(VASliceParameterBufferHEVC* sliceParam, const SliceHeader* const slice)
{
    const PPS* const pps = slice->pps.get();
    const SPS* const sps = pps->sps.get();
    const PredWeightTable& w = slice->pred_weight_table;
    if ((pps->weighted_pred_flag && slice->isPSlice()) ||
            (pps->weighted_bipred_flag && slice->isBSlice())) {
        uint8_t chromaLog2WeightDenom = w.luma_log2_weight_denom;
        sliceParam->luma_log2_weight_denom = w.luma_log2_weight_denom;
        if (sps->chroma_format_idc != 0) {
            sliceParam->delta_chroma_log2_weight_denom
                = w.delta_chroma_log2_weight_denom;
            chromaLog2WeightDenom
                += w.delta_chroma_log2_weight_denom;
        }
        fillPredWedightTableL0(sliceParam,  slice, chromaLog2WeightDenom);
        if (pps->weighted_bipred_flag && slice->isBSlice())
            fillPredWedightTableL1(sliceParam,  slice, chromaLog2WeightDenom);
    }
    return true;
}

bool VaapiDecoderH265::fillSlice(const PicturePtr& picture,
        const SliceHeader* const theSlice, const NalUnit* const nalu)
{
    const SliceHeader* slice = theSlice;
    VASliceParameterBufferHEVC* sliceParam;
    if (!picture->newSlice(sliceParam, nalu->m_data, nalu->m_size))
        return false;
    sliceParam->slice_data_byte_offset =
        slice->getSliceDataByteOffset();
    sliceParam->slice_segment_address = slice->slice_segment_address;

#define FILL_LONG(f) sliceParam->LongSliceFlags.fields.f = slice->f
#define FILL_LONG_SLICE(f) sliceParam->LongSliceFlags.fields.slice_##f = slice->f
    //how to fill this
    //LastSliceOfPic
    FILL_LONG(dependent_slice_segment_flag);

    //follow spec
    if (slice->dependent_slice_segment_flag) {
        slice = m_prevSlice.get();
    }

    if (!fillReferenceIndex(sliceParam, slice))
        return false;

    sliceParam->LongSliceFlags.fields.slice_type = slice->slice_type;
    sliceParam->LongSliceFlags.fields.color_plane_id = slice->colour_plane_id;
    FILL_LONG_SLICE(sao_luma_flag);
    FILL_LONG_SLICE(sao_chroma_flag);
    FILL_LONG(mvd_l1_zero_flag);
    FILL_LONG(cabac_init_flag);
    FILL_LONG_SLICE(temporal_mvp_enabled_flag);

    if (slice->deblocking_filter_override_flag)
        FILL_LONG_SLICE(deblocking_filter_disabled_flag);
    else
        sliceParam->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag=
          slice->pps->pps_deblocking_filter_disabled_flag;
    FILL_LONG(collocated_from_l0_flag);
    FILL_LONG_SLICE(loop_filter_across_slices_enabled_flag);

#define FILL(f) sliceParam->f = slice->f
#define FILL_SLICE(f) sliceParam->slice_##f = slice->f
    FILL(collocated_ref_idx);
    /* following fields fill in fillReference
       num_ref_idx_l0_active_minus1
       num_ref_idx_l1_active_minus1*/

    FILL_SLICE(qp_delta);
    FILL_SLICE(cb_qp_offset);
    FILL_SLICE(cr_qp_offset);
    FILL_SLICE(beta_offset_div2);
    FILL_SLICE(tc_offset_div2);
    if (!fillPredWeightTable(sliceParam, slice))
        return false;
    FILL(five_minus_max_num_merge_cand);
    return true;
}

#define CHECK(v, expect)                                                      \
    do {                                                                      \
        if (v != expect) {                                                    \
            ERROR("the value of %s is %d,  not equals to %d", #v, v, expect); \
            return VAProfileNone;                                             \
        }                                                                     \
    } while (0)

#define CHECK_RANGE(v, min, max)                                \
    do {                                                        \
        if (v < min || v > max) {                               \
            ERROR("%s is %d, not in [%d,%d]", #v, v, min, max); \
            return VAProfileNone;                               \
        }                                                       \
    } while (0)

VAProfile VaapiDecoderH265::getVaProfile(const SPS* const sps)
{
    if (sps->profile_tier_level.general_profile_idc == 0
        || sps->profile_tier_level.general_profile_compatibility_flag[0] == 1) {
        //general_profile_idc = 0 is not defined in spec, but some stream have this
        //we treat it as Main profile
        CHECK(sps->chroma_format_idc, 1);
        CHECK(sps->bit_depth_luma_minus8, 0);
        CHECK(sps->bit_depth_chroma_minus8, 0);
        return VAProfileHEVCMain;
    }

    if (sps->profile_tier_level.general_profile_idc == 1
        || sps->profile_tier_level.general_profile_compatibility_flag[1] == 1) {
        //A.3.2, but we only check some important values
        CHECK(sps->chroma_format_idc, 1);
        CHECK(sps->bit_depth_luma_minus8, 0);
        CHECK(sps->bit_depth_chroma_minus8, 0);
        return VAProfileHEVCMain;
    }
    if (sps->profile_tier_level.general_profile_idc == 2
        || sps->profile_tier_level.general_profile_compatibility_flag[2] == 1) {
        //A.3.3, but we only check some important values
        CHECK(sps->chroma_format_idc, 1);
        CHECK_RANGE(sps->bit_depth_luma_minus8, 0, 2);
        CHECK_RANGE(sps->bit_depth_chroma_minus8, 0, 2);
        return VAProfileHEVCMain10;
    }
    ERROR("unsupported profile %d", sps->profile_tier_level.general_profile_idc);
    return VAProfileNone;
}
#undef CHECK
#undef CHECK_RANGE

YamiStatus VaapiDecoderH265::ensureContext(const SPS* const sps)
{
    uint32_t surfaceNumber = sps->sps_max_dec_pic_buffering_minus1[0] + 1;
    uint32_t width = sps->conformance_window_flag ? sps->croppedWidth : sps->width;
    uint32_t height = sps->conformance_window_flag ? sps->croppedHeight : sps->height;
    uint32_t surfaceWidth = sps->width;
    uint32_t surfaceHeight =sps->height;
    VAProfile profile = getVaProfile(sps);
    uint32_t fourcc = (profile == VAProfileHEVCMain10) ? YAMI_FOURCC_P010 : YAMI_FOURCC_NV12;

    if (setFormat(width, height, surfaceWidth, surfaceHeight, surfaceNumber, fourcc)) {
        decodeCurrent();
        return YAMI_DECODE_FORMAT_CHANGE;
    }

    if (profile == VAProfileNone)
        return YAMI_UNSUPPORTED;
    return ensureProfile(profile);
}

/* 8.3.1 */
void VaapiDecoderH265::getPoc(const PicturePtr& picture,
        const SliceHeader* const slice,
        const NalUnit* const nalu)
{
    const PPS* const pps = slice->pps.get();
    const SPS* const sps = pps->sps.get();

    const uint16_t pocLsb = slice->slice_pic_order_cnt_lsb;
    const int32_t MaxPicOrderCntLsb = 1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
    int32_t picOrderCntMsb;
    if (isIrap(nalu) && picture->m_noRaslOutputFlag) {
        picOrderCntMsb = 0;
    } else {
        if((pocLsb < m_prevPicOrderCntLsb)
                && ((m_prevPicOrderCntLsb - pocLsb) >= (MaxPicOrderCntLsb / 2))) {
            picOrderCntMsb = m_prevPicOrderCntMsb + MaxPicOrderCntLsb;
        } else if((pocLsb > m_prevPicOrderCntLsb)
                && ((pocLsb - m_prevPicOrderCntLsb) > (MaxPicOrderCntLsb / 2))) {
            picOrderCntMsb = m_prevPicOrderCntMsb - MaxPicOrderCntLsb;
        } else {
            picOrderCntMsb =  m_prevPicOrderCntMsb;
        }
    }
    picture->m_poc = picOrderCntMsb + pocLsb;
    picture->m_pocLsb = pocLsb;

    uint8_t temporalID = nalu->nuh_temporal_id_plus1 - 1;
    //fixme:sub-layer non-reference picture.
    if (!temporalID && !isRasl(nalu) &&  !isRadl(nalu) && !isSublayerNoRef(nalu)) {
        m_prevPicOrderCntMsb = picOrderCntMsb;
        m_prevPicOrderCntLsb = pocLsb;
    }
}

SurfacePtr VaapiDecoderH265::createSurface(const SliceHeader* const slice)
{
    SurfacePtr s = VaapiDecoderBase::createSurface();
    if (!s)
        return s;
    SharedPtr<SPS>& sps = slice->pps->sps;

    if (sps->conformance_window_flag)
        s->setCrop(sps->croppedLeft, sps->croppedTop, sps->croppedWidth, sps->croppedHeight);
    else
        s->setCrop(0, 0, sps->width, sps->height);
    return s;
}

YamiStatus VaapiDecoderH265::createPicture(PicturePtr& picture, const SliceHeader* const slice,
    const NalUnit* const nalu)
{

    SurfacePtr surface = createSurface(slice);
    if (!surface)
        return YAMI_DECODE_NO_SURFACE;
    picture.reset(new VaapiDecPictureH265(m_context, surface, m_currentPTS));

    picture->m_noRaslOutputFlag = isIdr(nalu) || isBla(nalu) ||
                                  m_newStream || m_endOfSequence;
    m_noRaslOutputFlag = picture->m_noRaslOutputFlag;
    if (isIrap(nalu))
        m_associatedIrapNoRaslOutputFlag = picture->m_noRaslOutputFlag;
    picture->m_picOutputFlag
        = (isRasl(nalu) && m_associatedIrapNoRaslOutputFlag) ? false : slice->pic_output_flag;

    getPoc(picture, slice, nalu);

    return YAMI_SUCCESS;
}

YamiStatus VaapiDecoderH265::decodeSlice(NalUnit* nalu)
{
    SharedPtr<SliceHeader> currSlice(new SliceHeader());
    SliceHeader* slice = currSlice.get();
    YamiStatus status;

    if (!m_parser->parseSlice(nalu, slice))
        return YAMI_DECODE_INVALID_DATA;

    status = ensureContext(slice->pps->sps.get());
    if (status != YAMI_SUCCESS) {
        return status;
    }
    if (slice->first_slice_segment_in_pic_flag) {
        status = decodeCurrent();
        if (status != YAMI_SUCCESS)
            return status;
        status = createPicture(m_current, slice, nalu);
        if (status != YAMI_SUCCESS)
            return status;
        if (m_noRaslOutputFlag && isRasl(nalu))
            return YAMI_SUCCESS;
        if (!m_current || !m_dpb.init(m_current, slice, nalu, m_newStream))
            return YAMI_DECODE_INVALID_DATA;
        if (!fillPicture(m_current, slice) || !fillIqMatrix(m_current, slice))
            return YAMI_FAIL;
    }
    if (!m_current)
        return YAMI_FAIL;
    if (!fillSlice(m_current, slice, nalu))
        return YAMI_FAIL;
    if (!slice->dependent_slice_segment_flag)
        std::swap(currSlice, m_prevSlice);
    return status;

}

YamiStatus VaapiDecoderH265::decodeNalu(NalUnit* nalu)
{
    uint8_t type = nalu->nal_unit_type;
    YamiStatus status = YAMI_SUCCESS;

    if (NalUnit::TRAIL_N <= type && type <= NalUnit::CRA_NUT) {
        status = decodeSlice(nalu);
    }
    else if (NalUnit::PREFIX_SEI_NUT == type
        || NalUnit::SUFFIX_SEI_NUT == type) {
        //In some bitsstreams, SEI NAL units are inserted between picture NAL
        //units which belong to the same picture. If decode the current
        //picture when meeting a SEI NAL unit, the picture units after the SEI
        //will not be decoded.
    }
    else {
        status = decodeCurrent();
        if (status != YAMI_SUCCESS)
            return status;
        switch (type) {
            case NalUnit::VPS_NUT:
            case NalUnit::SPS_NUT:
            case NalUnit::PPS_NUT:
                status = decodeParamSet(nalu);
                break;
            case NalUnit::EOB_NUT:
                m_newStream = true;
                break;
            case NalUnit::EOS_NUT:
                m_endOfSequence = true;
                break;
            case NalUnit::AUD_NUT:
            case NalUnit::FD_NUT:
            default:
                break;
        }
    }

    return status;
}

void VaapiDecoderH265::flush(bool discardOutput)
{
    decodeCurrent();
    m_dpb.flush();
    m_prevPicOrderCntMsb = 0;
    m_prevPicOrderCntLsb = 0;
    m_newStream = true;
    m_endOfSequence = false;
    m_prevSlice.reset(new SliceHeader());
    if (discardOutput)
        VaapiDecoderBase::flush();
}

void VaapiDecoderH265::flush(void)
{
    flush(true);
}

YamiStatus VaapiDecoderH265::decode(VideoDecodeBuffer* buffer)
{
    if (!buffer || !buffer->data) {
        flush(false);
        return YAMI_SUCCESS;
    }
    m_currentPTS = buffer->timeStamp;

    NalReader nr(buffer->data, buffer->size, m_nalLengthSize);
    const uint8_t* nal;
    int32_t size;
    YamiStatus lastError = YAMI_SUCCESS;
    YamiStatus status;
    while (nr.read(nal, size)) {
        NalUnit nalu;
        if (nalu.parseNaluHeader(nal, size)) {
            status = decodeNalu(&nalu);
            if (status != YAMI_SUCCESS) {
                //we will continue decode if decodeNalu return YAMI_DECODE_INVALID_DATA
                //but we will return the error at end of fucntion
                lastError = status;
                if (status != YAMI_DECODE_INVALID_DATA)
                    return status;
            }
        }
    }

    if (buffer->flag & VIDEO_DECODE_BUFFER_FLAG_FRAME_END) {
        //send current buffer to libva
        decodeCurrent();
    }
    return lastError;
}

bool VaapiDecoderH265::decodeHevcRecordData(uint8_t* buf, int32_t bufSize)
{
    if (buf == NULL || bufSize == 0) {
        ERROR("invalid record data");
        return false;
    }
    /*
     * Some hvcC format don't used buf[0]==1 as flag, so now used the
     * (buf[0] || buf[1] || buf[2] > 1) as hvcC condition.
     */
    if (!(buf[0] || buf[1] || buf[2] > 1)) { //annexb format
        VideoDecodeBuffer buffer;
        memset(&buffer, 0, sizeof(buffer));
        buffer.data = buf;
        buffer.size = bufSize;
        //we ignore no fatal error
        return (decode(&buffer) >= YAMI_SUCCESS);
    }
    if (bufSize < 24) {
        ERROR("invalid avcc record data");
        return false;
    }
    NalUnit nalu;
    const uint8_t* nalBuf;
    int32_t i = 0, j, numNalu, nalBufSize;
    nalBuf = &buf[21];
    int32_t nalLengthSize = (*nalBuf & 0x03) + 1;
    nalBuf++;
    numNalu = *nalBuf & 0x1f;
    nalBuf++;
    for (i = 0; i < numNalu; i++) {
        nalBuf++;
        int cnt = *(nalBuf + 1) & 0xf;
        nalBuf += 2;
        for (j = 0; j < cnt; j++) {
            int nalsize = *(nalBuf + 1) + 2;
            if (buf + bufSize - nalBuf < nalsize)
               return false;
            NalReader nr(nalBuf, nalsize, 2);
            /*sps/pps/vps nal_length_size alway is 2 in hvcC format*/
            if (!nr.read(nalBuf, nalBufSize))
                return false;
            if (!nalu.parseNaluHeader(nalBuf, nalBufSize))
                return false;
            if (decodeNalu(&nalu) != YAMI_SUCCESS)
                return false;
            nalBuf += nalBufSize;
        }
    }
    m_nalLengthSize = nalLengthSize;
    return true;
}

}