Blame libfreerdp/primitives/prim_copy.c

Packit 1fb8d4
/* FreeRDP: A Remote Desktop Protocol Client
Packit 1fb8d4
 * Copy operations.
Packit 1fb8d4
 * vi:ts=4 sw=4:
Packit 1fb8d4
 *
Packit 1fb8d4
 * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
Packit 1fb8d4
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
Packit 1fb8d4
 * not use this file except in compliance with the License. You may obtain
Packit 1fb8d4
 * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
Packit 1fb8d4
 * Unless required by applicable law or agreed to in writing, software
Packit 1fb8d4
 * distributed under the License is distributed on an "AS IS" BASIS,
Packit 1fb8d4
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
Packit 1fb8d4
 * or implied. See the License for the specific language governing
Packit 1fb8d4
 * permissions and limitations under the License.
Packit 1fb8d4
 */
Packit 1fb8d4
Packit 1fb8d4
#ifdef HAVE_CONFIG_H
Packit 1fb8d4
#include "config.h"
Packit 1fb8d4
#endif
Packit 1fb8d4
Packit 1fb8d4
#include <string.h>
Packit 1fb8d4
#include <freerdp/types.h>
Packit 1fb8d4
#include <freerdp/primitives.h>
Packit 1fb8d4
#ifdef WITH_IPP
Packit 1fb8d4
# include <ipps.h>
Packit 1fb8d4
# include <ippi.h>
Packit 1fb8d4
#endif /* WITH_IPP */
Packit 1fb8d4
#include "prim_internal.h"
Packit 1fb8d4
Packit 1fb8d4
static primitives_t* generic = NULL;
Packit 1fb8d4
Packit 1fb8d4
/* ------------------------------------------------------------------------- */
Packit 1fb8d4
/*static inline BOOL memory_regions_overlap_1d(*/
Packit 1fb8d4
static BOOL memory_regions_overlap_1d(
Packit 1fb8d4
    const BYTE* p1,
Packit 1fb8d4
    const BYTE* p2,
Packit 1fb8d4
    size_t bytes)
Packit 1fb8d4
{
Packit 1fb8d4
	const ULONG_PTR p1m = (const ULONG_PTR) p1;
Packit 1fb8d4
	const ULONG_PTR p2m = (const ULONG_PTR) p2;
Packit 1fb8d4
Packit 1fb8d4
	if (p1m <= p2m)
Packit 1fb8d4
	{
Packit 1fb8d4
		if (p1m + bytes > p2m) return TRUE;
Packit 1fb8d4
	}
Packit 1fb8d4
	else
Packit 1fb8d4
	{
Packit 1fb8d4
		if (p2m + bytes > p1m) return TRUE;
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	/* else */
Packit 1fb8d4
	return FALSE;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* ------------------------------------------------------------------------- */
Packit 1fb8d4
/*static inline BOOL memory_regions_overlap_2d( */
Packit 1fb8d4
static BOOL memory_regions_overlap_2d(
Packit 1fb8d4
    const BYTE* p1,  int p1Step,  int p1Size,
Packit 1fb8d4
    const BYTE* p2,  int p2Step,  int p2Size,
Packit 1fb8d4
    int width,  int height)
Packit 1fb8d4
{
Packit 1fb8d4
	ULONG_PTR p1m = (ULONG_PTR) p1;
Packit 1fb8d4
	ULONG_PTR p2m = (ULONG_PTR) p2;
Packit 1fb8d4
Packit 1fb8d4
	if (p1m <= p2m)
Packit 1fb8d4
	{
Packit 1fb8d4
		ULONG_PTR p1mEnd = p1m + (height - 1) * p1Step + width * p1Size;
Packit 1fb8d4
Packit 1fb8d4
		if (p1mEnd > p2m) return TRUE;
Packit 1fb8d4
	}
Packit 1fb8d4
	else
Packit 1fb8d4
	{
Packit 1fb8d4
		ULONG_PTR p2mEnd = p2m + (height - 1) * p2Step + width * p2Size;
Packit 1fb8d4
Packit 1fb8d4
		if (p2mEnd > p1m) return TRUE;
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	/* else */
Packit 1fb8d4
	return FALSE;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* ------------------------------------------------------------------------- */
Packit 1fb8d4
static pstatus_t general_copy_8u(
Packit 1fb8d4
    const BYTE* pSrc,
Packit 1fb8d4
    BYTE* pDst,
Packit 1fb8d4
    INT32 len)
Packit 1fb8d4
{
Packit 1fb8d4
	if (memory_regions_overlap_1d(pSrc, pDst, (size_t) len))
Packit 1fb8d4
	{
Packit 1fb8d4
		memmove((void*) pDst, (const void*) pSrc, (size_t) len);
Packit 1fb8d4
	}
Packit 1fb8d4
	else
Packit 1fb8d4
	{
Packit 1fb8d4
		memcpy((void*) pDst, (const void*) pSrc, (size_t) len);
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	return PRIMITIVES_SUCCESS;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
/* ------------------------------------------------------------------------- */
Packit 1fb8d4
/* Copy a block of pixels from one buffer to another.
Packit 1fb8d4
 * The addresses are assumed to have been already offset to the upper-left
Packit 1fb8d4
 * corners of the source and destination region of interest.
Packit 1fb8d4
 */
Packit 1fb8d4
static pstatus_t general_copy_8u_AC4r(
Packit 1fb8d4
    const BYTE* pSrc,  INT32 srcStep,
Packit 1fb8d4
    BYTE* pDst,  INT32 dstStep,
Packit 1fb8d4
    INT32 width,  INT32 height)
Packit 1fb8d4
{
Packit 1fb8d4
	const BYTE* src = (const BYTE*) pSrc;
Packit 1fb8d4
	BYTE* dst = (BYTE*) pDst;
Packit 1fb8d4
	int rowbytes = width * sizeof(UINT32);
Packit 1fb8d4
Packit 1fb8d4
	if ((width == 0) || (height == 0)) return PRIMITIVES_SUCCESS;
Packit 1fb8d4
Packit 1fb8d4
	if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32),
Packit 1fb8d4
	                              pDst, dstStep, sizeof(UINT32), width, height))
Packit 1fb8d4
	{
Packit 1fb8d4
		do
Packit 1fb8d4
		{
Packit 1fb8d4
			generic->copy(src, dst, rowbytes);
Packit 1fb8d4
			src += srcStep;
Packit 1fb8d4
			dst += dstStep;
Packit 1fb8d4
		}
Packit 1fb8d4
		while (--height);
Packit 1fb8d4
	}
Packit 1fb8d4
	else
Packit 1fb8d4
	{
Packit 1fb8d4
		/* TODO: do it in one operation when the rowdata is adjacent. */
Packit 1fb8d4
		do
Packit 1fb8d4
		{
Packit 1fb8d4
			/* If we find a replacement for memcpy that is consistently
Packit 1fb8d4
			 * faster, this could be replaced with that.
Packit 1fb8d4
			 */
Packit 1fb8d4
			memcpy(dst, src, rowbytes);
Packit 1fb8d4
			src += srcStep;
Packit 1fb8d4
			dst += dstStep;
Packit 1fb8d4
		}
Packit 1fb8d4
		while (--height);
Packit 1fb8d4
	}
Packit 1fb8d4
Packit 1fb8d4
	return PRIMITIVES_SUCCESS;
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
#ifdef WITH_IPP
Packit 1fb8d4
/* ------------------------------------------------------------------------- */
Packit 1fb8d4
/* This is just ippiCopy_8u_AC4R without the IppiSize structure parameter.   */
Packit 1fb8d4
static pstatus_t ippiCopy_8u_AC4r(
Packit 1fb8d4
    const BYTE* pSrc,  INT32 srcStep,
Packit 1fb8d4
    BYTE* pDst,  INT32 dstStep,
Packit 1fb8d4
    INT32 width,  INT32 height)
Packit 1fb8d4
{
Packit 1fb8d4
	IppiSize roi;
Packit 1fb8d4
	roi.width  = width;
Packit 1fb8d4
	roi.height = height;
Packit 1fb8d4
	return (pstatus_t) ippiCopy_8u_AC4R(pSrc, srcStep, pDst, dstStep, roi);
Packit 1fb8d4
}
Packit 1fb8d4
#endif /* WITH_IPP */
Packit 1fb8d4
Packit 1fb8d4
/* ------------------------------------------------------------------------- */
Packit 1fb8d4
void primitives_init_copy(
Packit 1fb8d4
    primitives_t* prims)
Packit 1fb8d4
{
Packit 1fb8d4
	/* Start with the default. */
Packit 1fb8d4
	prims->copy_8u = general_copy_8u;
Packit 1fb8d4
	prims->copy_8u_AC4r = general_copy_8u_AC4r;
Packit 1fb8d4
	/* This is just an alias with void* parameters */
Packit 1fb8d4
	prims->copy    = (__copy_t)(prims->copy_8u);
Packit 1fb8d4
}
Packit 1fb8d4
Packit 1fb8d4
void primitives_init_copy_opt(
Packit 1fb8d4
    primitives_t* prims)
Packit 1fb8d4
{
Packit 1fb8d4
	generic = primitives_get_generic();
Packit 1fb8d4
	primitives_init_copy(prims);
Packit 1fb8d4
	/* Pick tuned versions if possible. */
Packit 1fb8d4
#ifdef WITH_IPP
Packit 1fb8d4
	prims->copy_8u = (__copy_8u_t) ippsCopy_8u;
Packit 1fb8d4
	prims->copy_8u_AC4r = (__copy_8u_AC4r_t) ippiCopy_8u_AC4r;
Packit 1fb8d4
#endif
Packit 1fb8d4
	/* Performance with an SSE2 version with no prefetch seemed to be
Packit 1fb8d4
	 * all over the map vs. memcpy.
Packit 1fb8d4
	 * Sometimes it was significantly faster, sometimes dreadfully slower,
Packit 1fb8d4
	 * and it seemed to vary a lot depending on block size and processor.
Packit 1fb8d4
	 * Hence, no SSE version is used here unless once can be written that
Packit 1fb8d4
	 * is consistently faster than memcpy.
Packit 1fb8d4
	 */
Packit 1fb8d4
	/* This is just an alias with void* parameters */
Packit 1fb8d4
	prims->copy    = (__copy_t)(prims->copy_8u);
Packit 1fb8d4
}