|
Packit Service |
fa4841 |
/* FreeRDP: A Remote Desktop Protocol Client
|
|
Packit Service |
fa4841 |
* Optimized alpha blending routines.
|
|
Packit Service |
fa4841 |
* vi:ts=4 sw=4:
|
|
Packit Service |
fa4841 |
*
|
|
Packit Service |
fa4841 |
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
|
|
Packit Service |
fa4841 |
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
Packit Service |
fa4841 |
* not use this file except in compliance with the License. You may obtain
|
|
Packit Service |
fa4841 |
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
|
|
Packit Service |
fa4841 |
* Unless required by applicable law or agreed to in writing, software
|
|
Packit Service |
fa4841 |
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
Packit Service |
fa4841 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
Packit Service |
fa4841 |
* or implied. See the License for the specific language governing
|
|
Packit Service |
fa4841 |
* permissions and limitations under the License.
|
|
Packit Service |
fa4841 |
*
|
|
Packit Service |
fa4841 |
* Note: this code assumes the second operand is fully opaque,
|
|
Packit Service |
fa4841 |
* e.g.
|
|
Packit Service |
fa4841 |
* newval = alpha1*val1 + (1-alpha1)*val2
|
|
Packit Service |
fa4841 |
* rather than
|
|
Packit Service |
fa4841 |
* newval = alpha1*val1 + (1-alpha1)*alpha2*val2
|
|
Packit Service |
fa4841 |
* The IPP gives other options.
|
|
Packit Service |
fa4841 |
*/
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#ifdef HAVE_CONFIG_H
|
|
Packit Service |
fa4841 |
#include "config.h"
|
|
Packit Service |
fa4841 |
#endif
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#include <freerdp/types.h>
|
|
Packit Service |
fa4841 |
#include <freerdp/primitives.h>
|
|
Packit Service |
fa4841 |
#include <winpr/sysinfo.h>
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#ifdef WITH_SSE2
|
|
Packit Service |
fa4841 |
#include <emmintrin.h>
|
|
Packit Service |
fa4841 |
#include <pmmintrin.h>
|
|
Packit Service |
fa4841 |
#endif /* WITH_SSE2 */
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#ifdef WITH_IPP
|
|
Packit Service |
fa4841 |
#include <ippi.h>
|
|
Packit Service |
fa4841 |
#endif /* WITH_IPP */
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#include "prim_internal.h"
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
static primitives_t* generic = NULL;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
/* ------------------------------------------------------------------------- */
|
|
Packit Service |
fa4841 |
#ifdef WITH_SSE2
|
|
Packit Service |
fa4841 |
#if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
bb5c11 |
pstatus_t sse2_alphaComp_argb(
|
|
Packit Service |
bb5c11 |
const BYTE* pSrc1, UINT32 src1Step,
|
|
Packit Service |
bb5c11 |
const BYTE* pSrc2, UINT32 src2Step,
|
|
Packit Service |
bb5c11 |
BYTE* pDst, UINT32 dstStep,
|
|
Packit Service |
bb5c11 |
UINT32 width, UINT32 height)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
bb5c11 |
const UINT32* sptr1 = (const UINT32*) pSrc1;
|
|
Packit Service |
bb5c11 |
const UINT32* sptr2 = (const UINT32*) pSrc2;
|
|
Packit Service |
fa4841 |
UINT32* dptr;
|
|
Packit Service |
bb5c11 |
int linebytes, src1Jump, src2Jump, dstJump, y;
|
|
Packit Service |
fa4841 |
__m128i xmm0, xmm1;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
bb5c11 |
if ((width <= 0) || (height <= 0)) return PRIMITIVES_SUCCESS;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
bb5c11 |
if (width < 4) /* pointless if too small */
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
bb5c11 |
return generic->alphaComp_argb(pSrc1, src1Step, pSrc2, src2Step,
|
|
Packit Service |
bb5c11 |
pDst, dstStep, width, height);
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
bb5c11 |
dptr = (UINT32*) pDst;
|
|
Packit Service |
fa4841 |
linebytes = width * sizeof(UINT32);
|
|
Packit Service |
fa4841 |
src1Jump = (src1Step - linebytes) / sizeof(UINT32);
|
|
Packit Service |
fa4841 |
src2Jump = (src2Step - linebytes) / sizeof(UINT32);
|
|
Packit Service |
bb5c11 |
dstJump = (dstStep - linebytes) / sizeof(UINT32);
|
|
Packit Service |
fa4841 |
xmm0 = _mm_set1_epi32(0);
|
|
Packit Service |
fa4841 |
xmm1 = _mm_set1_epi16(1);
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
for (y = 0; y < height; ++y)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
int pixels = width;
|
|
Packit Service |
fa4841 |
int count;
|
|
Packit Service |
fa4841 |
/* Get to the 16-byte boundary now. */
|
|
Packit Service |
fa4841 |
int leadIn = 0;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
bb5c11 |
switch ((ULONG_PTR) dptr & 0x0f)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
case 0:
|
|
Packit Service |
fa4841 |
leadIn = 0;
|
|
Packit Service |
fa4841 |
break;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
case 4:
|
|
Packit Service |
fa4841 |
leadIn = 3;
|
|
Packit Service |
fa4841 |
break;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
case 8:
|
|
Packit Service |
fa4841 |
leadIn = 2;
|
|
Packit Service |
fa4841 |
break;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
case 12:
|
|
Packit Service |
fa4841 |
leadIn = 1;
|
|
Packit Service |
fa4841 |
break;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
default:
|
|
Packit Service |
fa4841 |
/* We'll never hit a 16-byte boundary, so do the whole
|
|
Packit Service |
fa4841 |
* thing the slow way.
|
|
Packit Service |
fa4841 |
*/
|
|
Packit Service |
fa4841 |
leadIn = width;
|
|
Packit Service |
fa4841 |
break;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
if (leadIn)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
pstatus_t status;
|
|
Packit Service |
bb5c11 |
status = generic->alphaComp_argb((const BYTE*) sptr1,
|
|
Packit Service |
bb5c11 |
src1Step, (const BYTE*) sptr2, src2Step,
|
|
Packit Service |
bb5c11 |
(BYTE*) dptr, dstStep, leadIn, 1);
|
|
Packit Service |
fa4841 |
if (status != PRIMITIVES_SUCCESS)
|
|
Packit Service |
fa4841 |
return status;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
sptr1 += leadIn;
|
|
Packit Service |
fa4841 |
sptr2 += leadIn;
|
|
Packit Service |
bb5c11 |
dptr += leadIn;
|
|
Packit Service |
fa4841 |
pixels -= leadIn;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
/* Use SSE registers to do 4 pixels at a time. */
|
|
Packit Service |
fa4841 |
count = pixels >> 2;
|
|
Packit Service |
fa4841 |
pixels -= count << 2;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
while (count--)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
__m128i xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
|
Packit Service |
fa4841 |
/* BdGdRdAdBcGcRcAcBbGbRbAbBaGaRaAa */
|
|
Packit Service |
fa4841 |
xmm2 = LOAD_SI128(sptr1);
|
|
Packit Service |
fa4841 |
sptr1 += 4;
|
|
Packit Service |
fa4841 |
/* BhGhRhAhBgGgRgAgBfGfRfAfBeGeReAe */
|
|
Packit Service |
fa4841 |
xmm3 = LOAD_SI128(sptr2);
|
|
Packit Service |
fa4841 |
sptr2 += 4;
|
|
Packit Service |
fa4841 |
/* 00Bb00Gb00Rb00Ab00Ba00Ga00Ra00Aa */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_unpackhi_epi8(xmm2, xmm0);
|
|
Packit Service |
fa4841 |
/* 00Bf00Gf00Bf00Af00Be00Ge00Re00Ae */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_unpackhi_epi8(xmm3, xmm0);
|
|
Packit Service |
fa4841 |
/* subtract */
|
|
Packit Service |
fa4841 |
xmm6 = _mm_subs_epi16(xmm4, xmm5);
|
|
Packit Service |
fa4841 |
/* 00Bb00Gb00Rb00Ab00Aa00Aa00Aa00Aa */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_shufflelo_epi16(xmm4, 0xff);
|
|
Packit Service |
fa4841 |
/* 00Ab00Ab00Ab00Ab00Aa00Aa00Aa00Aa */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_shufflehi_epi16(xmm4, 0xff);
|
|
Packit Service |
fa4841 |
/* Add one to alphas */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_adds_epi16(xmm4, xmm1);
|
|
Packit Service |
fa4841 |
/* Multiply and take low word */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_mullo_epi16(xmm4, xmm6);
|
|
Packit Service |
fa4841 |
/* Shift 8 right */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_srai_epi16(xmm4, 8);
|
|
Packit Service |
fa4841 |
/* Add xmm5 */
|
|
Packit Service |
fa4841 |
xmm4 = _mm_adds_epi16(xmm4, xmm5);
|
|
Packit Service |
fa4841 |
/* 00Bj00Gj00Rj00Aj00Bi00Gi00Ri00Ai */
|
|
Packit Service |
fa4841 |
/* 00Bd00Gd00Rd00Ad00Bc00Gc00Rc00Ac */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_unpacklo_epi8(xmm2, xmm0);
|
|
Packit Service |
fa4841 |
/* 00Bh00Gh00Rh00Ah00Bg00Gg00Rg00Ag */
|
|
Packit Service |
fa4841 |
xmm6 = _mm_unpacklo_epi8(xmm3, xmm0);
|
|
Packit Service |
fa4841 |
/* subtract */
|
|
Packit Service |
fa4841 |
xmm7 = _mm_subs_epi16(xmm5, xmm6);
|
|
Packit Service |
fa4841 |
/* 00Bd00Gd00Rd00Ad00Ac00Ac00Ac00Ac */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_shufflelo_epi16(xmm5, 0xff);
|
|
Packit Service |
fa4841 |
/* 00Ad00Ad00Ad00Ad00Ac00Ac00Ac00Ac */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_shufflehi_epi16(xmm5, 0xff);
|
|
Packit Service |
fa4841 |
/* Add one to alphas */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_adds_epi16(xmm5, xmm1);
|
|
Packit Service |
fa4841 |
/* Multiply and take low word */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_mullo_epi16(xmm5, xmm7);
|
|
Packit Service |
fa4841 |
/* Shift 8 right */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_srai_epi16(xmm5, 8);
|
|
Packit Service |
fa4841 |
/* Add xmm6 */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_adds_epi16(xmm5, xmm6);
|
|
Packit Service |
fa4841 |
/* 00Bl00Gl00Rl00Al00Bk00Gk00Rk0ABk */
|
|
Packit Service |
fa4841 |
/* Must mask off remainders or pack gets confused */
|
|
Packit Service |
fa4841 |
xmm3 = _mm_set1_epi16(0x00ffU);
|
|
Packit Service |
fa4841 |
xmm4 = _mm_and_si128(xmm4, xmm3);
|
|
Packit Service |
fa4841 |
xmm5 = _mm_and_si128(xmm5, xmm3);
|
|
Packit Service |
fa4841 |
/* BlGlRlAlBkGkRkAkBjGjRjAjBiGiRiAi */
|
|
Packit Service |
fa4841 |
xmm5 = _mm_packus_epi16(xmm5, xmm4);
|
|
Packit Service |
bb5c11 |
_mm_store_si128((__m128i*) dptr, xmm5);
|
|
Packit Service |
fa4841 |
dptr += 4;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
/* Finish off the remainder. */
|
|
Packit Service |
fa4841 |
if (pixels)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
pstatus_t status;
|
|
Packit Service |
bb5c11 |
status = generic->alphaComp_argb((const BYTE*) sptr1, src1Step,
|
|
Packit Service |
bb5c11 |
(const BYTE*) sptr2, src2Step,
|
|
Packit Service |
bb5c11 |
(BYTE*) dptr, dstStep, pixels, 1);
|
|
Packit Service |
fa4841 |
if (status != PRIMITIVES_SUCCESS)
|
|
Packit Service |
fa4841 |
return status;
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
sptr1 += pixels;
|
|
Packit Service |
fa4841 |
sptr2 += pixels;
|
|
Packit Service |
bb5c11 |
dptr += pixels;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
/* Jump to next row. */
|
|
Packit Service |
fa4841 |
sptr1 += src1Jump;
|
|
Packit Service |
fa4841 |
sptr2 += src2Jump;
|
|
Packit Service |
bb5c11 |
dptr += dstJump;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
return PRIMITIVES_SUCCESS;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
#endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
|
|
Packit Service |
fa4841 |
#endif
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#ifdef WITH_IPP
|
|
Packit Service |
fa4841 |
/* ------------------------------------------------------------------------- */
|
|
Packit Service |
bb5c11 |
static pstatus_t ipp_alphaComp_argb(
|
|
Packit Service |
bb5c11 |
const BYTE* pSrc1, INT32 src1Step,
|
|
Packit Service |
bb5c11 |
const BYTE* pSrc2, INT32 src2Step,
|
|
Packit Service |
bb5c11 |
BYTE* pDst, INT32 dstStep,
|
|
Packit Service |
bb5c11 |
INT32 width, INT32 height)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
IppiSize sz;
|
|
Packit Service |
bb5c11 |
sz.width = width;
|
|
Packit Service |
fa4841 |
sz.height = height;
|
|
Packit Service |
bb5c11 |
return ippiAlphaComp_8u_AC4R(pSrc1, src1Step, pSrc2, src2Step,
|
|
Packit Service |
bb5c11 |
pDst, dstStep, sz, ippAlphaOver);
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
#endif
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
/* ------------------------------------------------------------------------- */
|
|
Packit Service |
fa4841 |
void primitives_init_alphaComp_opt(primitives_t* prims)
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
generic = primitives_get_generic();
|
|
Packit Service |
fa4841 |
primitives_init_alphaComp(prims);
|
|
Packit Service |
fa4841 |
#ifdef WITH_IPP
|
|
Packit Service |
fa4841 |
prims->alphaComp_argb = ipp_alphaComp_argb;
|
|
Packit Service |
fa4841 |
#elif defined(WITH_SSE2)
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
bb5c11 |
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)
|
|
Packit Service |
bb5c11 |
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
|
Packit Service |
fa4841 |
{
|
|
Packit Service |
fa4841 |
prims->alphaComp_argb = sse2_alphaComp_argb;
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
fa4841 |
|
|
Packit Service |
fa4841 |
#endif
|
|
Packit Service |
fa4841 |
}
|
|
Packit Service |
bb5c11 |
|