Blob Blame History Raw
/*
 * Copyright (C) 2016 Intel Corporation. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "oclpostprocess_osd.h"
#include "common/common_def.h"
#include "common/log.h"
#include "ocl/oclcontext.h"
#include "vpp/oclvppimage.h"

namespace YamiMediaCodec {

YamiStatus
OclPostProcessOsd::process(const SharedPtr<VideoFrame>& src,
    const SharedPtr<VideoFrame>& dst)
{
    YamiStatus status = ensureContext("osd");
    if (status != YAMI_SUCCESS)
        return status;

    if (src->fourcc != YAMI_FOURCC_RGBA || dst->fourcc != YAMI_FOURCC_NV12) {
        ERROR("only support RGBA OSD on NV12 video frame");
        return YAMI_INVALID_PARAM;
    }

    status = computeBlockLuma(dst);
    if (status != YAMI_SUCCESS)
        return status;

    cl_int clStatus;
    cl_mem clBuf = clCreateBuffer(m_context->m_context,
        CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
        m_osdLuma.size() * sizeof(float),
        NULL,
        &clStatus);
    SharedPtr<cl_mem> osdLuma(new cl_mem(clBuf), OclMemDeleter());
    if (!checkOclStatus(clStatus, "CreateBuffer")
        || !checkOclStatus(clEnqueueWriteBuffer(m_context->m_queue, *osdLuma, CL_TRUE, 0,
                                                    m_osdLuma.size() * sizeof(float),
                                                    m_osdLuma.data(), 0, NULL, NULL),
                               "EnqueueWriteBuffer")) {
        return YAMI_FAIL;
    }

    SharedPtr<OclVppCLImage> srcImagePtr, dstImagePtr;
    cl_image_format srcFormat, dstFormat;

    srcFormat.image_channel_order = CL_RGBA;
    srcFormat.image_channel_data_type = CL_UNORM_INT8;
    srcImagePtr = OclVppCLImage::create(m_display, src, m_context, srcFormat);
    if (!srcImagePtr) {
        ERROR("failed to create cl image from src frame");
        return YAMI_FAIL;
    }

    dstFormat.image_channel_order = CL_RG;
    dstFormat.image_channel_data_type = CL_UNORM_INT8;
    dstImagePtr = OclVppCLImage::create(m_display, dst, m_context, dstFormat);
    if (!dstImagePtr) {
        ERROR("failed to create cl image from dst frame");
        return YAMI_FAIL;
    }

    cl_mem bgImageMem[3];
    for (uint32_t n = 0; n < dstImagePtr->numPlanes(); n++) {
        bgImageMem[n] = dstImagePtr->plane(n);
    }

    uint32_t pixelSize = getPixelSize(dstFormat);
    VideoRect crop;
    crop.x = dst->crop.x / pixelSize;
    crop.y = dst->crop.y & ~1;
    crop.width = dst->crop.width / pixelSize;
    crop.height = dst->crop.height;
    if ((CL_SUCCESS != clSetKernelArg(m_kernelOsd, 0, sizeof(cl_mem), &dstImagePtr->plane(0)))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 1, sizeof(cl_mem), &dstImagePtr->plane(1)))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 2, sizeof(cl_mem), &bgImageMem[0]))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 3, sizeof(cl_mem), &bgImageMem[1]))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 4, sizeof(cl_mem), &srcImagePtr->plane(0)))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 5, sizeof(uint32_t), &crop.x))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 6, sizeof(uint32_t), &crop.y))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 7, sizeof(uint32_t), &crop.width))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 8, sizeof(uint32_t), &crop.height))
        || (CL_SUCCESS != clSetKernelArg(m_kernelOsd, 9, sizeof(cl_mem), osdLuma.get()))) {
        ERROR("clSetKernelArg failed");
        return YAMI_FAIL;
    }

    size_t globalWorkSize[2], localWorkSize[2];
    localWorkSize[0] = 8;
    localWorkSize[1] = 8;
    globalWorkSize[0] = ALIGN_POW2(dst->crop.width, localWorkSize[0] * pixelSize) / pixelSize;
    globalWorkSize[1] = ALIGN_POW2(dst->crop.height, localWorkSize[1] * 2) / 2;
    if (!checkOclStatus(clEnqueueNDRangeKernel(m_context->m_queue, m_kernelOsd, 2, NULL,
                            globalWorkSize, localWorkSize, 0, NULL, NULL),
            "EnqueueNDRangeKernel")) {
        return YAMI_FAIL;
    }

    return status;
}

YamiStatus OclPostProcessOsd::setParameters(VppParamType type, void* vppParam)
{
    YamiStatus status = YAMI_INVALID_PARAM;

    switch (type) {
    case VppParamTypeOsd: {
        VppParamOsd* osd = (VppParamOsd*)vppParam;
        if (osd->size == sizeof(VppParamOsd)) {
            m_threshold = osd->threshold;
            status = YAMI_SUCCESS;
        }
    } break;
    default:
        status = OclPostProcessBase::setParameters(type, vppParam);
        break;
    }
    return status;
}

YamiStatus OclPostProcessOsd::computeBlockLuma(const SharedPtr<VideoFrame> frame)
{
    uint32_t blockWidth = frame->crop.height;
    cl_image_format format;
    format.image_channel_order = CL_RGBA;
    format.image_channel_data_type = CL_UNSIGNED_INT8;
    uint32_t pixelSize = getPixelSize(format);

    if (m_blockCount < (int)(frame->crop.width / blockWidth)) {
        m_blockCount = frame->crop.width / blockWidth;
        m_osdLuma.resize(m_blockCount);
    }

    uint32_t padding = frame->crop.x % pixelSize;
    uint32_t alignedWidth = frame->crop.width + padding;
    if (m_lineBuf.size() < alignedWidth)
        m_lineBuf.resize(alignedWidth);

    cl_int clStatus;
    cl_mem clBuf = clCreateBuffer(m_context->m_context,
        CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY,
        m_lineBuf.size() * sizeof(uint32_t),
        NULL,
        &clStatus);
    if (!checkOclStatus(clStatus, "CreateBuffer"))
        return YAMI_FAIL;
    SharedPtr<cl_mem> lineBuf(new cl_mem(clBuf), OclMemDeleter());

    SharedPtr<OclVppCLImage> imagePtr;
    imagePtr = OclVppCLImage::create(m_display, frame, m_context, format);
    if (!imagePtr) {
        ERROR("failed to create cl image from src frame");
        return YAMI_FAIL;
    }

    VideoRect crop;
    crop.x = frame->crop.x / pixelSize;
    crop.y = frame->crop.y;
    crop.width = alignedWidth / pixelSize;
    crop.height = frame->crop.height;
    if ((CL_SUCCESS != clSetKernelArg(m_kernelReduceLuma, 0, sizeof(cl_mem), &imagePtr->plane(0)))
        || (CL_SUCCESS != clSetKernelArg(m_kernelReduceLuma, 1, sizeof(uint32_t), &crop.x))
        || (CL_SUCCESS != clSetKernelArg(m_kernelReduceLuma, 2, sizeof(uint32_t), &crop.y))
        || (CL_SUCCESS != clSetKernelArg(m_kernelReduceLuma, 3, sizeof(uint32_t), &crop.height))
        || (CL_SUCCESS != clSetKernelArg(m_kernelReduceLuma, 4, sizeof(cl_mem), lineBuf.get()))) {
        ERROR("clSetKernelArg failed");
        return YAMI_FAIL;
    }
    size_t localWorkSize = 16;
    size_t globalWorkSize = ALIGN_POW2(alignedWidth, pixelSize * localWorkSize) / pixelSize;
    if (!checkOclStatus(clEnqueueNDRangeKernel(m_context->m_queue, m_kernelReduceLuma, 1, NULL,
                            &globalWorkSize, &localWorkSize, 0, NULL, NULL),
            "EnqueueNDRangeKernel")) {
        return YAMI_FAIL;
    }
    if (!checkOclStatus(clEnqueueReadBuffer(m_context->m_queue,
                            *lineBuf,
                            CL_TRUE,
                            0,
                            m_lineBuf.size() * sizeof(uint32_t),
                            m_lineBuf.data(),
                            0, NULL, NULL),
            "EnqueueReadBuffer")) {
        return YAMI_FAIL;
    }

    uint32_t acc;
    int offset;
    uint32_t blockThreshold = m_threshold * blockWidth * frame->crop.height;
    for (int i = 0; i < m_blockCount; i++) {
        acc = 0;
        offset = i * blockWidth + padding;
        for (uint32_t j = 0; j < blockWidth; j++) {
            acc += m_lineBuf[offset + j];
        }
        if (acc <= blockThreshold)
            m_osdLuma[i] = 1.0;
        else
            m_osdLuma[i] = 0.0;
    }

    return YAMI_SUCCESS;
}

bool OclPostProcessOsd::prepareKernels()
{
    m_kernelOsd = prepareKernel("osd");
    m_kernelReduceLuma = prepareKernel("reduce_luma");

    return (m_kernelOsd != NULL)
        && (m_kernelReduceLuma != NULL);
}

}