Blame pixman/pixman-arm-simd-asm-scaled.S

Packit 030a23
/*
Packit 030a23
 * Copyright © 2008 Mozilla Corporation
Packit 030a23
 * Copyright © 2010 Nokia Corporation
Packit 030a23
 *
Packit 030a23
 * Permission to use, copy, modify, distribute, and sell this software and its
Packit 030a23
 * documentation for any purpose is hereby granted without fee, provided that
Packit 030a23
 * the above copyright notice appear in all copies and that both that
Packit 030a23
 * copyright notice and this permission notice appear in supporting
Packit 030a23
 * documentation, and that the name of Mozilla Corporation not be used in
Packit 030a23
 * advertising or publicity pertaining to distribution of the software without
Packit 030a23
 * specific, written prior permission.  Mozilla Corporation makes no
Packit 030a23
 * representations about the suitability of this software for any purpose.  It
Packit 030a23
 * is provided "as is" without express or implied warranty.
Packit 030a23
 *
Packit 030a23
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
Packit 030a23
 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
Packit 030a23
 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
Packit 030a23
 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
Packit 030a23
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
Packit 030a23
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
Packit 030a23
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
Packit 030a23
 * SOFTWARE.
Packit 030a23
 *
Packit 030a23
 * Author:  Jeff Muizelaar (jeff@infidigm.net)
Packit 030a23
 *
Packit 030a23
 */
Packit 030a23
Packit 030a23
/* Prevent the stack from becoming executable */
Packit 030a23
#if defined(__linux__) && defined(__ELF__)
Packit 030a23
.section .note.GNU-stack,"",%progbits
Packit 030a23
#endif
Packit 030a23
Packit 030a23
	.text
Packit 030a23
	.arch armv6
Packit 030a23
	.object_arch armv4
Packit 030a23
	.arm
Packit 030a23
	.altmacro
Packit 030a23
	.p2align 2
Packit 030a23
Packit 030a23
#include "pixman-arm-asm.h"
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Note: This code is only using armv5te instructions (not even armv6),
Packit 030a23
 *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
Packit 030a23
 *       be split into a few variants, tuned for each microarchitecture.
Packit 030a23
 *
Packit 030a23
 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
Packit 030a23
 * have efficient write combining), it needs to be changed to use 16-byte
Packit 030a23
 * aligned writes using STM instruction.
Packit 030a23
 *
Packit 030a23
 * Nearest scanline scaler macro template uses the following arguments:
Packit 030a23
 *  fname                     - name of the function to generate
Packit 030a23
 *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
Packit 030a23
 *  t                         - type suffix for LDR/STR instructions
Packit 030a23
 *  prefetch_distance         - prefetch in the source image by that many
Packit 030a23
 *                              pixels ahead
Packit 030a23
 *  prefetch_braking_distance - stop prefetching when that many pixels are
Packit 030a23
 *                              remaining before the end of scanline
Packit 030a23
 */
Packit 030a23
Packit 030a23
.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
Packit 030a23
                                      prefetch_distance,        \
Packit 030a23
                                      prefetch_braking_distance
Packit 030a23
Packit 030a23
pixman_asm_function fname
Packit 030a23
	W		.req	r0
Packit 030a23
	DST		.req	r1
Packit 030a23
	SRC		.req	r2
Packit 030a23
	VX		.req	r3
Packit 030a23
	UNIT_X		.req	ip
Packit 030a23
	TMP1		.req	r4
Packit 030a23
	TMP2		.req	r5
Packit 030a23
	VXMASK		.req	r6
Packit 030a23
	PF_OFFS		.req	r7
Packit 030a23
	SRC_WIDTH_FIXED	.req	r8
Packit 030a23
Packit 030a23
	ldr	UNIT_X, [sp]
Packit 030a23
	push	{r4, r5, r6, r7, r8, r10}
Packit 030a23
	mvn	VXMASK, #((1 << bpp_shift) - 1)
Packit 030a23
	ldr	SRC_WIDTH_FIXED, [sp, #28]
Packit 030a23
Packit 030a23
	/* define helper macro */
Packit 030a23
	.macro	scale_2_pixels
Packit 030a23
		ldr&t	TMP1, [SRC, TMP1]
Packit 030a23
		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
Packit 030a23
		adds	VX, VX, UNIT_X
Packit 030a23
		str&t	TMP1, [DST], #(1 << bpp_shift)
Packit 030a23
9:		subpls	VX, VX, SRC_WIDTH_FIXED
Packit 030a23
		bpl	9b
Packit 030a23
Packit 030a23
		ldr&t	TMP2, [SRC, TMP2]
Packit 030a23
		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
Packit 030a23
		adds	VX, VX, UNIT_X
Packit 030a23
		str&t	TMP2, [DST], #(1 << bpp_shift)
Packit 030a23
9:		subpls	VX, VX, SRC_WIDTH_FIXED
Packit 030a23
		bpl	9b
Packit 030a23
	.endm
Packit 030a23
Packit 030a23
	/* now do the scaling */
Packit 030a23
	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
Packit 030a23
	adds	VX, VX, UNIT_X
Packit 030a23
9:	subpls	VX, VX, SRC_WIDTH_FIXED
Packit 030a23
	bpl	9b
Packit 030a23
	subs	W, W, #(8 + prefetch_braking_distance)
Packit 030a23
	blt	2f
Packit 030a23
	/* calculate prefetch offset */
Packit 030a23
	mov	PF_OFFS, #prefetch_distance
Packit 030a23
	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
Packit 030a23
1:	/* main loop, process 8 pixels per iteration with prefetch */
Packit 030a23
	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
Packit 030a23
	add	PF_OFFS, UNIT_X, lsl #3
Packit 030a23
	scale_2_pixels
Packit 030a23
	scale_2_pixels
Packit 030a23
	scale_2_pixels
Packit 030a23
	scale_2_pixels
Packit 030a23
	subs	W, W, #8
Packit 030a23
	bge	1b
Packit 030a23
2:
Packit 030a23
	subs	W, W, #(4 - 8 - prefetch_braking_distance)
Packit 030a23
	blt	2f
Packit 030a23
1:	/* process the remaining pixels */
Packit 030a23
	scale_2_pixels
Packit 030a23
	scale_2_pixels
Packit 030a23
	subs	W, W, #4
Packit 030a23
	bge	1b
Packit 030a23
2:
Packit 030a23
	tst	W, #2
Packit 030a23
	beq	2f
Packit 030a23
	scale_2_pixels
Packit 030a23
2:
Packit 030a23
	tst	W, #1
Packit 030a23
	ldrne&t	TMP1, [SRC, TMP1]
Packit 030a23
	strne&t	TMP1, [DST]
Packit 030a23
	/* cleanup helper macro */
Packit 030a23
	.purgem	scale_2_pixels
Packit 030a23
	.unreq	DST
Packit 030a23
	.unreq	SRC
Packit 030a23
	.unreq	W
Packit 030a23
	.unreq	VX
Packit 030a23
	.unreq	UNIT_X
Packit 030a23
	.unreq	TMP1
Packit 030a23
	.unreq	TMP2
Packit 030a23
	.unreq	VXMASK
Packit 030a23
	.unreq	PF_OFFS
Packit 030a23
	.unreq  SRC_WIDTH_FIXED
Packit 030a23
	/* return */
Packit 030a23
	pop	{r4, r5, r6, r7, r8, r10}
Packit 030a23
	bx	lr
Packit 030a23
.endfunc
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_nearest_scanline_func \
Packit 030a23
    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
Packit 030a23
Packit 030a23
generate_nearest_scanline_func \
Packit 030a23
    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32