|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* Copyright © 2012 Raspberry Pi Foundation
|
|
Packit |
030a23 |
* Copyright © 2012 RISC OS Open Ltd
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
Packit |
030a23 |
* documentation for any purpose is hereby granted without fee, provided that
|
|
Packit |
030a23 |
* the above copyright notice appear in all copies and that both that
|
|
Packit |
030a23 |
* copyright notice and this permission notice appear in supporting
|
|
Packit |
030a23 |
* documentation, and that the name of the copyright holders not be used in
|
|
Packit |
030a23 |
* advertising or publicity pertaining to distribution of the software without
|
|
Packit |
030a23 |
* specific, written prior permission. The copyright holders make no
|
|
Packit |
030a23 |
* representations about the suitability of this software for any purpose. It
|
|
Packit |
030a23 |
* is provided "as is" without express or implied warranty.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
|
|
Packit |
030a23 |
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
Packit |
030a23 |
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
Packit |
030a23 |
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
Packit |
030a23 |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
|
Packit |
030a23 |
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
|
Packit |
030a23 |
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
Packit |
030a23 |
* SOFTWARE.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* Author: Ben Avison (bavison@riscosopen.org)
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Prevent the stack from becoming executable */
|
|
Packit |
030a23 |
#if defined(__linux__) && defined(__ELF__)
|
|
Packit |
030a23 |
.section .note.GNU-stack,"",%progbits
|
|
Packit |
030a23 |
#endif
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.text
|
|
Packit |
030a23 |
.arch armv6
|
|
Packit |
030a23 |
.object_arch armv4
|
|
Packit |
030a23 |
.arm
|
|
Packit |
030a23 |
.altmacro
|
|
Packit |
030a23 |
.p2align 2
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
#include "pixman-arm-asm.h"
|
|
Packit |
030a23 |
#include "pixman-arm-simd-asm.h"
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* A head macro should do all processing which results in an output of up to
|
|
Packit |
030a23 |
* 16 bytes, as far as the final load instruction. The corresponding tail macro
|
|
Packit |
030a23 |
* should complete the processing of the up-to-16 bytes. The calling macro will
|
|
Packit |
030a23 |
* sometimes choose to insert a preload or a decrement of X between them.
|
|
Packit |
030a23 |
* cond ARM condition code for code block
|
|
Packit |
030a23 |
* numbytes Number of output bytes that should be generated this time
|
|
Packit |
030a23 |
* firstreg First WK register in which to place output
|
|
Packit |
030a23 |
* unaligned_src Whether to use non-wordaligned loads of source image
|
|
Packit |
030a23 |
* unaligned_mask Whether to use non-wordaligned loads of mask image
|
|
Packit |
030a23 |
* preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro blit_init
|
|
Packit |
030a23 |
line_saved_regs STRIDE_D, STRIDE_S
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
pixld cond, numbytes, firstreg, SRC, unaligned_src
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
|
|
Packit |
030a23 |
WK4 .req STRIDE_D
|
|
Packit |
030a23 |
WK5 .req STRIDE_S
|
|
Packit |
030a23 |
WK6 .req MASK
|
|
Packit |
030a23 |
WK7 .req STRIDE_M
|
|
Packit |
030a23 |
110: pixld , 16, 0, SRC, unaligned_src
|
|
Packit |
030a23 |
pixld , 16, 4, SRC, unaligned_src
|
|
Packit |
030a23 |
pld [SRC, SCRATCH]
|
|
Packit |
030a23 |
pixst , 16, 0, DST
|
|
Packit |
030a23 |
pixst , 16, 4, DST
|
|
Packit |
030a23 |
subs X, X, #32*8/src_bpp
|
|
Packit |
030a23 |
bhs 110b
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
|
Packit |
030a23 |
4, /* prefetch distance */ \
|
|
Packit |
030a23 |
blit_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
blit_process_head, \
|
|
Packit |
030a23 |
nop_macro, /* process tail */ \
|
|
Packit |
030a23 |
blit_inner_loop
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
|
Packit |
030a23 |
4, /* prefetch distance */ \
|
|
Packit |
030a23 |
blit_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
blit_process_head, \
|
|
Packit |
030a23 |
nop_macro, /* process tail */ \
|
|
Packit |
030a23 |
blit_inner_loop
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
|
Packit |
030a23 |
3, /* prefetch distance */ \
|
|
Packit |
030a23 |
blit_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
blit_process_head, \
|
|
Packit |
030a23 |
nop_macro, /* process tail */ \
|
|
Packit |
030a23 |
blit_inner_loop
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_n_8888_init
|
|
Packit |
030a23 |
ldr SRC, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
mov STRIDE_S, SRC
|
|
Packit |
030a23 |
mov MASK, SRC
|
|
Packit |
030a23 |
mov STRIDE_M, SRC
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_n_0565_init
|
|
Packit |
030a23 |
ldrh SRC, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
orr SRC, SRC, lsl #16
|
|
Packit |
030a23 |
mov STRIDE_S, SRC
|
|
Packit |
030a23 |
mov MASK, SRC
|
|
Packit |
030a23 |
mov STRIDE_M, SRC
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_n_8_init
|
|
Packit |
030a23 |
ldrb SRC, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
orr SRC, SRC, lsl #8
|
|
Packit |
030a23 |
orr SRC, SRC, lsl #16
|
|
Packit |
030a23 |
mov STRIDE_S, SRC
|
|
Packit |
030a23 |
mov MASK, SRC
|
|
Packit |
030a23 |
mov STRIDE_M, SRC
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro fill_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
WK4 .req SRC
|
|
Packit |
030a23 |
WK5 .req STRIDE_S
|
|
Packit |
030a23 |
WK6 .req MASK
|
|
Packit |
030a23 |
WK7 .req STRIDE_M
|
|
Packit |
030a23 |
pixst cond, numbytes, 4, DST
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
|
Packit |
030a23 |
0, /* prefetch distance doesn't apply */ \
|
|
Packit |
030a23 |
src_n_8888_init \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro /* cleanup */ \
|
|
Packit |
030a23 |
nop_macro /* process head */ \
|
|
Packit |
030a23 |
fill_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
|
Packit |
030a23 |
0, /* prefetch distance doesn't apply */ \
|
|
Packit |
030a23 |
src_n_0565_init \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro /* cleanup */ \
|
|
Packit |
030a23 |
nop_macro /* process head */ \
|
|
Packit |
030a23 |
fill_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
|
Packit |
030a23 |
0, /* prefetch distance doesn't apply */ \
|
|
Packit |
030a23 |
src_n_8_init \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro /* cleanup */ \
|
|
Packit |
030a23 |
nop_macro /* process head */ \
|
|
Packit |
030a23 |
fill_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_x888_8888_pixel, cond, reg
|
|
Packit |
030a23 |
orr&cond WK®, WK®, #0xFF000000
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro pixman_composite_src_x888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
pixld cond, numbytes, firstreg, SRC, unaligned_src
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro pixman_composite_src_x888_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
src_x888_8888_pixel cond, %(firstreg+0)
|
|
Packit |
030a23 |
.if numbytes >= 8
|
|
Packit |
030a23 |
src_x888_8888_pixel cond, %(firstreg+1)
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
src_x888_8888_pixel cond, %(firstreg+2)
|
|
Packit |
030a23 |
src_x888_8888_pixel cond, %(firstreg+3)
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
|
Packit |
030a23 |
3, /* prefetch distance */ \
|
|
Packit |
030a23 |
nop_macro, /* init */ \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
pixman_composite_src_x888_8888_process_head, \
|
|
Packit |
030a23 |
pixman_composite_src_x888_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_0565_8888_init
|
|
Packit |
030a23 |
/* Hold loop invariants in MASK and STRIDE_M */
|
|
Packit |
030a23 |
ldr MASK, =0x07E007E0
|
|
Packit |
030a23 |
mov STRIDE_M, #0xFF000000
|
|
Packit |
030a23 |
/* Set GE[3:0] to 1010 so SEL instructions do what we want */
|
|
Packit |
030a23 |
ldr SCRATCH, =0x80008000
|
|
Packit |
030a23 |
uadd8 SCRATCH, SCRATCH, SCRATCH
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_0565_8888_2pixels, reg1, reg2
|
|
Packit |
030a23 |
and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000
|
|
Packit |
030a23 |
bic WK®2, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
|
|
Packit |
030a23 |
orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
|
|
Packit |
030a23 |
mov WK®1, WK®2, lsl #16 @ rrrrr000000bbbbb0000000000000000
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG
|
|
Packit |
030a23 |
bic WK®2, WK®2, WK®1, lsr #16 @ RRRRR000000BBBBB0000000000000000
|
|
Packit |
030a23 |
orr WK®1, WK®1, WK®1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000
|
|
Packit |
030a23 |
orr WK®2, WK®2, WK®2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000
|
|
Packit |
030a23 |
pkhtb WK®1, WK®1, WK®1, asr #5 @ rrrrrrrr--------bbbbbbbb--------
|
|
Packit |
030a23 |
sel WK®1, WK®1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb--------
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg
|
|
Packit |
030a23 |
pkhtb WK®2, WK®2, WK®2, asr #5 @ RRRRRRRR--------BBBBBBBB--------
|
|
Packit |
030a23 |
sel WK®2, WK®2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB--------
|
|
Packit |
030a23 |
orr WK®1, STRIDE_M, WK®1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
|
|
Packit |
030a23 |
orr WK®2, STRIDE_M, WK®2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* This version doesn't need STRIDE_M, but is one instruction longer.
|
|
Packit |
030a23 |
It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case?
|
|
Packit |
030a23 |
and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000
|
|
Packit |
030a23 |
bic WK®1, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
|
|
Packit |
030a23 |
orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
|
|
Packit |
030a23 |
mov WK®2, WK®1, lsr #16 @ 0000000000000000RRRRR000000BBBBB
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000
|
|
Packit |
030a23 |
bic WK®1, WK®1, WK®2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
|
|
Packit |
030a23 |
mov WK®2, WK®2, lsl #3 @ 0000000000000RRRRR000000BBBBB000
|
|
Packit |
030a23 |
mov WK®1, WK®1, lsl #3 @ 0000000000000rrrrr000000bbbbb000
|
|
Packit |
030a23 |
orr WK®2, WK®2, WK®2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB
|
|
Packit |
030a23 |
orr WK®1, WK®1, WK®1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
|
|
Packit |
030a23 |
pkhbt WK®2, WK®2, WK®2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB
|
|
Packit |
030a23 |
pkhbt WK®1, WK®1, WK®1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
|
|
Packit |
030a23 |
sel WK®2, SCRATCH, WK®2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB
|
|
Packit |
030a23 |
sel WK®1, SCRATCH, WK®1 @ --------rrrrrrrrggggggggbbbbbbbb
|
|
Packit |
030a23 |
orr WK®2, WK®2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
|
|
Packit |
030a23 |
orr WK®1, WK®1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_0565_8888_1pixel, reg
|
|
Packit |
030a23 |
bic SCRATCH, WK®, MASK @ 0000000000000000rrrrr000000bbbbb
|
|
Packit |
030a23 |
and WK®, WK®, MASK @ 000000000000000000000gggggg00000
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000
|
|
Packit |
030a23 |
mov WK®, WK®, lsl #5 @ 0000000000000000gggggg0000000000
|
|
Packit |
030a23 |
orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
|
|
Packit |
030a23 |
orr WK®, WK®, WK®, lsr #6 @ 000000000000000gggggggggggg00000
|
|
Packit |
030a23 |
pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
|
|
Packit |
030a23 |
sel WK®, WK®, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb
|
|
Packit |
030a23 |
orr WK®, WK®, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_0565_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
|
|
Packit |
030a23 |
.elseif numbytes == 8
|
|
Packit |
030a23 |
pixld , 4, firstreg, SRC, unaligned_src
|
|
Packit |
030a23 |
.elseif numbytes == 4
|
|
Packit |
030a23 |
pixld , 2, firstreg, SRC, unaligned_src
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_0565_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
src_0565_8888_2pixels firstreg, %(firstreg+1)
|
|
Packit |
030a23 |
src_0565_8888_2pixels %(firstreg+2), %(firstreg+3)
|
|
Packit |
030a23 |
.elseif numbytes == 8
|
|
Packit |
030a23 |
src_0565_8888_2pixels firstreg, %(firstreg+1)
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
src_0565_8888_1pixel firstreg
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
|
|
Packit |
030a23 |
3, /* prefetch distance */ \
|
|
Packit |
030a23 |
src_0565_8888_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
src_0565_8888_process_head, \
|
|
Packit |
030a23 |
src_0565_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_x888_0565_init
|
|
Packit |
030a23 |
/* Hold loop invariant in MASK */
|
|
Packit |
030a23 |
ldr MASK, =0x001F001F
|
|
Packit |
030a23 |
line_saved_regs STRIDE_S, ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_x888_0565_1pixel s, d
|
|
Packit |
030a23 |
and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb
|
|
Packit |
030a23 |
and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000
|
|
Packit |
030a23 |
orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb
|
|
Packit |
030a23 |
orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
|
|
Packit |
030a23 |
/* Top 16 bits are discarded during the following STRH */
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_x888_0565_2pixels slo, shi, d, tmp
|
|
Packit |
030a23 |
and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000
|
|
Packit |
030a23 |
and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB
|
|
Packit |
030a23 |
and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb
|
|
Packit |
030a23 |
orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB
|
|
Packit |
030a23 |
orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB
|
|
Packit |
030a23 |
and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000
|
|
Packit |
030a23 |
orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb
|
|
Packit |
030a23 |
orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
|
|
Packit |
030a23 |
pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
WK4 .req STRIDE_S
|
|
Packit |
030a23 |
WK5 .req STRIDE_M
|
|
Packit |
030a23 |
WK6 .req WK3
|
|
Packit |
030a23 |
WK7 .req ORIG_W
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
pixld , 16, 4, SRC, 0
|
|
Packit |
030a23 |
src_x888_0565_2pixels 4, 5, 0, 0
|
|
Packit |
030a23 |
pixld , 8, 4, SRC, 0
|
|
Packit |
030a23 |
src_x888_0565_2pixels 6, 7, 1, 1
|
|
Packit |
030a23 |
pixld , 8, 6, SRC, 0
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
pixld , numbytes*2, 4, SRC, 0
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro src_x888_0565_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
src_x888_0565_2pixels 4, 5, 2, 2
|
|
Packit |
030a23 |
src_x888_0565_2pixels 6, 7, 3, 4
|
|
Packit |
030a23 |
.elseif numbytes == 8
|
|
Packit |
030a23 |
src_x888_0565_2pixels 4, 5, 1, 1
|
|
Packit |
030a23 |
src_x888_0565_2pixels 6, 7, 2, 2
|
|
Packit |
030a23 |
.elseif numbytes == 4
|
|
Packit |
030a23 |
src_x888_0565_2pixels 4, 5, 1, 1
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
src_x888_0565_1pixel 4, 1
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
pixst , numbytes, 0, DST
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
pixst , numbytes, 1, DST
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \
|
|
Packit |
030a23 |
FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
|
|
Packit |
030a23 |
3, /* prefetch distance */ \
|
|
Packit |
030a23 |
src_x888_0565_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
src_x888_0565_process_head, \
|
|
Packit |
030a23 |
src_x888_0565_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro add_8_8_8pixels cond, dst1, dst2
|
|
Packit |
030a23 |
uqadd8&cond WK&dst1, WK&dst1, MASK
|
|
Packit |
030a23 |
uqadd8&cond WK&dst2, WK&dst2, STRIDE_M
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro add_8_8_4pixels cond, dst
|
|
Packit |
030a23 |
uqadd8&cond WK&dst, WK&dst, MASK
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro add_8_8_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
WK4 .req MASK
|
|
Packit |
030a23 |
WK5 .req STRIDE_M
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
pixld cond, 8, 4, SRC, unaligned_src
|
|
Packit |
030a23 |
pixld cond, 16, firstreg, DST, 0
|
|
Packit |
030a23 |
add_8_8_8pixels cond, firstreg, %(firstreg+1)
|
|
Packit |
030a23 |
pixld cond, 8, 4, SRC, unaligned_src
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
pixld cond, numbytes, 4, SRC, unaligned_src
|
|
Packit |
030a23 |
pixld cond, numbytes, firstreg, DST, 0
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro add_8_8_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3)
|
|
Packit |
030a23 |
.elseif numbytes == 8
|
|
Packit |
030a23 |
add_8_8_8pixels cond, firstreg, %(firstreg+1)
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
add_8_8_4pixels cond, firstreg
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
nop_macro, /* init */ \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
add_8_8_process_head, \
|
|
Packit |
030a23 |
add_8_8_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_8888_init
|
|
Packit |
030a23 |
/* Hold loop invariant in MASK */
|
|
Packit |
030a23 |
ldr MASK, =0x00800080
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, MASK, MASK
|
|
Packit |
030a23 |
line_saved_regs STRIDE_D, STRIDE_S, ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
WK4 .req STRIDE_D
|
|
Packit |
030a23 |
WK5 .req STRIDE_S
|
|
Packit |
030a23 |
WK6 .req STRIDE_M
|
|
Packit |
030a23 |
WK7 .req ORIG_W
|
|
Packit |
030a23 |
pixld , numbytes, %(4+firstreg), SRC, unaligned_src
|
|
Packit |
030a23 |
pixld , numbytes, firstreg, DST, 0
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_8888_check_transparent numbytes, reg0, reg1, reg2, reg3
|
|
Packit |
030a23 |
/* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */
|
|
Packit |
030a23 |
teq WK®0, #0
|
|
Packit |
030a23 |
.if numbytes > 4
|
|
Packit |
030a23 |
teqeq WK®1, #0
|
|
Packit |
030a23 |
.if numbytes > 8
|
|
Packit |
030a23 |
teqeq WK®2, #0
|
|
Packit |
030a23 |
teqeq WK®3, #0
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_8888_prepare next
|
|
Packit |
030a23 |
mov WK&next, WK&next, lsr #24
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_8888_1pixel src, dst, offset, next
|
|
Packit |
030a23 |
/* src = destination component multiplier */
|
|
Packit |
030a23 |
rsb WK&src, WK&src, #255
|
|
Packit |
030a23 |
/* Split even/odd bytes of dst into SCRATCH/dst */
|
|
Packit |
030a23 |
uxtb16 SCRATCH, WK&dst
|
|
Packit |
030a23 |
uxtb16 WK&dst, WK&dst, ror #8
|
|
Packit |
030a23 |
/* Multiply through, adding 0.5 to the upper byte of result for rounding */
|
|
Packit |
030a23 |
mla SCRATCH, SCRATCH, WK&src, MASK
|
|
Packit |
030a23 |
mla WK&dst, WK&dst, WK&src, MASK
|
|
Packit |
030a23 |
/* Where we would have had a stall between the result of the first MLA and the shifter input,
|
|
Packit |
030a23 |
* reload the complete source pixel */
|
|
Packit |
030a23 |
ldr WK&src, [SRC, #offset]
|
|
Packit |
030a23 |
/* Multiply by 257/256 to approximate 256/255 */
|
|
Packit |
030a23 |
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
/* In this stall, start processing the next pixel */
|
|
Packit |
030a23 |
.if offset < -4
|
|
Packit |
030a23 |
mov WK&next, WK&next, lsr #24
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
uxtab16 WK&dst, WK&dst, WK&dst, ror #8
|
|
Packit |
030a23 |
/* Recombine even/odd bytes of multiplied destination */
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
sel WK&dst, SCRATCH, WK&dst
|
|
Packit |
030a23 |
/* Saturated add of source to multiplied destination */
|
|
Packit |
030a23 |
uqadd8 WK&dst, WK&dst, WK&src
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
WK4 .req STRIDE_D
|
|
Packit |
030a23 |
WK5 .req STRIDE_S
|
|
Packit |
030a23 |
WK6 .req STRIDE_M
|
|
Packit |
030a23 |
WK7 .req ORIG_W
|
|
Packit |
030a23 |
over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg)
|
|
Packit |
030a23 |
beq 10f
|
|
Packit |
030a23 |
over_8888_8888_prepare %(4+firstreg)
|
|
Packit |
030a23 |
.set PROCESS_REG, firstreg
|
|
Packit |
030a23 |
.set PROCESS_OFF, -numbytes
|
|
Packit |
030a23 |
.rept numbytes / 4
|
|
Packit |
030a23 |
over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG)
|
|
Packit |
030a23 |
.set PROCESS_REG, PROCESS_REG+1
|
|
Packit |
030a23 |
.set PROCESS_OFF, PROCESS_OFF+4
|
|
Packit |
030a23 |
.endr
|
|
Packit |
030a23 |
pixst , numbytes, firstreg, DST
|
|
Packit |
030a23 |
10:
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_8888_8888_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
over_8888_8888_process_head, \
|
|
Packit |
030a23 |
over_8888_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Multiply each byte of a word by a byte.
|
|
Packit |
030a23 |
* Useful when there aren't any obvious ways to fill the stalls with other instructions.
|
|
Packit |
030a23 |
* word Register containing 4 bytes
|
|
Packit |
030a23 |
* byte Register containing byte multiplier (bits 8-31 must be 0)
|
|
Packit |
030a23 |
* tmp Scratch register
|
|
Packit |
030a23 |
* half Register containing the constant 0x00800080
|
|
Packit |
030a23 |
* GE[3:0] bits must contain 0101
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
.macro mul_8888_8 word, byte, tmp, half
|
|
Packit |
030a23 |
/* Split even/odd bytes of word apart */
|
|
Packit |
030a23 |
uxtb16 tmp, word
|
|
Packit |
030a23 |
uxtb16 word, word, ror #8
|
|
Packit |
030a23 |
/* Multiply bytes together with rounding, then by 257/256 */
|
|
Packit |
030a23 |
mla tmp, tmp, byte, half
|
|
Packit |
030a23 |
mla word, word, byte, half /* 1 stall follows */
|
|
Packit |
030a23 |
uxtab16 tmp, tmp, tmp, ror #8 /* 1 stall follows */
|
|
Packit |
030a23 |
uxtab16 word, word, word, ror #8
|
|
Packit |
030a23 |
/* Recombine bytes */
|
|
Packit |
030a23 |
mov tmp, tmp, ror #8
|
|
Packit |
030a23 |
sel word, tmp, word
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_n_8888_init
|
|
Packit |
030a23 |
/* Mask is constant */
|
|
Packit |
030a23 |
ldr MASK, [sp, #ARGS_STACK_OFFSET+8]
|
|
Packit |
030a23 |
/* Hold loop invariant in STRIDE_M */
|
|
Packit |
030a23 |
ldr STRIDE_M, =0x00800080
|
|
Packit |
030a23 |
/* We only want the alpha bits of the constant mask */
|
|
Packit |
030a23 |
mov MASK, MASK, lsr #24
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, STRIDE_M, STRIDE_M
|
|
Packit |
030a23 |
line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
WK4 .req Y
|
|
Packit |
030a23 |
WK5 .req STRIDE_D
|
|
Packit |
030a23 |
WK6 .req STRIDE_S
|
|
Packit |
030a23 |
WK7 .req ORIG_W
|
|
Packit |
030a23 |
pixld , numbytes, %(4+(firstreg%2)), SRC, unaligned_src
|
|
Packit |
030a23 |
pixld , numbytes, firstreg, DST, 0
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_n_8888_1pixel src, dst
|
|
Packit |
030a23 |
mul_8888_8 WK&src, MASK, SCRATCH, STRIDE_M
|
|
Packit |
030a23 |
sub WK7, WK6, WK&src, lsr #24
|
|
Packit |
030a23 |
mul_8888_8 WK&dst, WK7, SCRATCH, STRIDE_M
|
|
Packit |
030a23 |
uqadd8 WK&dst, WK&dst, WK&src
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_8888_n_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
WK4 .req Y
|
|
Packit |
030a23 |
WK5 .req STRIDE_D
|
|
Packit |
030a23 |
WK6 .req STRIDE_S
|
|
Packit |
030a23 |
WK7 .req ORIG_W
|
|
Packit |
030a23 |
over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg)
|
|
Packit |
030a23 |
beq 10f
|
|
Packit |
030a23 |
mov WK6, #255
|
|
Packit |
030a23 |
.set PROCESS_REG, firstreg
|
|
Packit |
030a23 |
.rept numbytes / 4
|
|
Packit |
030a23 |
.if numbytes == 16 && PROCESS_REG == 2
|
|
Packit |
030a23 |
/* We're using WK6 and WK7 as temporaries, so half way through
|
|
Packit |
030a23 |
* 4 pixels, reload the second two source pixels but this time
|
|
Packit |
030a23 |
* into WK4 and WK5 */
|
|
Packit |
030a23 |
ldmdb SRC, {WK4, WK5}
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
over_8888_n_8888_1pixel %(4+(PROCESS_REG%2)), %(PROCESS_REG)
|
|
Packit |
030a23 |
.set PROCESS_REG, PROCESS_REG+1
|
|
Packit |
030a23 |
.endr
|
|
Packit |
030a23 |
pixst , numbytes, firstreg, DST
|
|
Packit |
030a23 |
10:
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
.unreq WK7
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_8888_n_8888_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
over_8888_n_8888_process_head, \
|
|
Packit |
030a23 |
over_8888_n_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8_8888_init
|
|
Packit |
030a23 |
/* Source is constant, but splitting it into even/odd bytes is a loop invariant */
|
|
Packit |
030a23 |
ldr SRC, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
/* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */
|
|
Packit |
030a23 |
ldr SCRATCH, =0x00800080
|
|
Packit |
030a23 |
uxtb16 STRIDE_S, SRC
|
|
Packit |
030a23 |
uxtb16 SRC, SRC, ror #8
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, SCRATCH, SCRATCH
|
|
Packit |
030a23 |
line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8_8888_newline
|
|
Packit |
030a23 |
ldr STRIDE_D, =0x00800080
|
|
Packit |
030a23 |
b 1f
|
|
Packit |
030a23 |
.ltorg
|
|
Packit |
030a23 |
1:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
WK4 .req STRIDE_M
|
|
Packit |
030a23 |
pixld , numbytes/4, 4, MASK, unaligned_mask
|
|
Packit |
030a23 |
pixld , numbytes, firstreg, DST, 0
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8_8888_1pixel src, dst
|
|
Packit |
030a23 |
uxtb Y, WK4, ror #src*8
|
|
Packit |
030a23 |
/* Trailing part of multiplication of source */
|
|
Packit |
030a23 |
mla SCRATCH, STRIDE_S, Y, STRIDE_D
|
|
Packit |
030a23 |
mla Y, SRC, Y, STRIDE_D
|
|
Packit |
030a23 |
mov ORIG_W, #255
|
|
Packit |
030a23 |
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
uxtab16 Y, Y, Y, ror #8
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
sub ORIG_W, ORIG_W, Y, lsr #24
|
|
Packit |
030a23 |
sel Y, SCRATCH, Y
|
|
Packit |
030a23 |
/* Then multiply the destination */
|
|
Packit |
030a23 |
mul_8888_8 WK&dst, ORIG_W, SCRATCH, STRIDE_D
|
|
Packit |
030a23 |
uqadd8 WK&dst, WK&dst, Y
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
WK4 .req STRIDE_M
|
|
Packit |
030a23 |
teq WK4, #0
|
|
Packit |
030a23 |
beq 10f
|
|
Packit |
030a23 |
.set PROCESS_REG, firstreg
|
|
Packit |
030a23 |
.rept numbytes / 4
|
|
Packit |
030a23 |
over_n_8_8888_1pixel %(PROCESS_REG-firstreg), %(PROCESS_REG)
|
|
Packit |
030a23 |
.set PROCESS_REG, PROCESS_REG+1
|
|
Packit |
030a23 |
.endr
|
|
Packit |
030a23 |
pixst , numbytes, firstreg, DST
|
|
Packit |
030a23 |
10:
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_n_8_8888_init, \
|
|
Packit |
030a23 |
over_n_8_8888_newline, \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
over_n_8_8888_process_head, \
|
|
Packit |
030a23 |
over_n_8_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_reverse_n_8888_init
|
|
Packit |
030a23 |
ldr SRC, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
ldr MASK, =0x00800080
|
|
Packit |
030a23 |
/* Split source pixel into RB/AG parts */
|
|
Packit |
030a23 |
uxtb16 STRIDE_S, SRC
|
|
Packit |
030a23 |
uxtb16 STRIDE_M, SRC, ror #8
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, MASK, MASK
|
|
Packit |
030a23 |
line_saved_regs STRIDE_D, ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_reverse_n_8888_newline
|
|
Packit |
030a23 |
mov STRIDE_D, #0xFF
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
pixld , numbytes, firstreg, DST, 0
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_reverse_n_8888_1pixel d, is_only
|
|
Packit |
030a23 |
teq WK&d, #0
|
|
Packit |
030a23 |
beq 8f /* replace with source */
|
|
Packit |
030a23 |
bics ORIG_W, STRIDE_D, WK&d, lsr #24
|
|
Packit |
030a23 |
.if is_only == 1
|
|
Packit |
030a23 |
beq 49f /* skip store */
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
beq 9f /* write same value back */
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
|
|
Packit |
030a23 |
mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */
|
|
Packit |
030a23 |
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
sel ORIG_W, SCRATCH, ORIG_W
|
|
Packit |
030a23 |
uqadd8 WK&d, WK&d, ORIG_W
|
|
Packit |
030a23 |
b 9f
|
|
Packit |
030a23 |
8: mov WK&d, SRC
|
|
Packit |
030a23 |
9:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4
|
|
Packit |
030a23 |
.if numbytes == 4
|
|
Packit |
030a23 |
over_reverse_n_8888_1pixel reg1, 1
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
and SCRATCH, WK®1, WK®2
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
and SCRATCH, SCRATCH, WK®3
|
|
Packit |
030a23 |
and SCRATCH, SCRATCH, WK®4
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
mvns SCRATCH, SCRATCH, asr #24
|
|
Packit |
030a23 |
beq 49f /* skip store if all opaque */
|
|
Packit |
030a23 |
over_reverse_n_8888_1pixel reg1, 0
|
|
Packit |
030a23 |
over_reverse_n_8888_1pixel reg2, 0
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
over_reverse_n_8888_1pixel reg3, 0
|
|
Packit |
030a23 |
over_reverse_n_8888_1pixel reg4, 0
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
pixst , numbytes, reg1, DST
|
|
Packit |
030a23 |
49:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
|
|
Packit |
030a23 |
3, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_reverse_n_8888_init, \
|
|
Packit |
030a23 |
over_reverse_n_8888_newline, \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
over_reverse_n_8888_process_head, \
|
|
Packit |
030a23 |
over_reverse_n_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_init
|
|
Packit |
030a23 |
HALF .req SRC
|
|
Packit |
030a23 |
TMP0 .req STRIDE_D
|
|
Packit |
030a23 |
TMP1 .req STRIDE_S
|
|
Packit |
030a23 |
TMP2 .req STRIDE_M
|
|
Packit |
030a23 |
TMP3 .req ORIG_W
|
|
Packit |
030a23 |
WK4 .req SCRATCH
|
|
Packit |
030a23 |
line_saved_regs STRIDE_D, STRIDE_M, ORIG_W
|
|
Packit |
030a23 |
ldr SCRATCH, =0x800080
|
|
Packit |
030a23 |
mov HALF, #0x80
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, SCRATCH, SCRATCH
|
|
Packit |
030a23 |
.set DST_PRELOAD_BIAS, 8
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_cleanup
|
|
Packit |
030a23 |
.set DST_PRELOAD_BIAS, 0
|
|
Packit |
030a23 |
.unreq HALF
|
|
Packit |
030a23 |
.unreq TMP0
|
|
Packit |
030a23 |
.unreq TMP1
|
|
Packit |
030a23 |
.unreq TMP2
|
|
Packit |
030a23 |
.unreq TMP3
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_combine m, d
|
|
Packit |
030a23 |
uxtb16 TMP1, TMP0 /* rb_notmask */
|
|
Packit |
030a23 |
uxtb16 TMP2, d /* rb_dest; 1 stall follows */
|
|
Packit |
030a23 |
smlatt TMP3, TMP2, TMP1, HALF /* red */
|
|
Packit |
030a23 |
smlabb TMP2, TMP2, TMP1, HALF /* blue */
|
|
Packit |
030a23 |
uxtb16 TMP0, TMP0, ror #8 /* ag_notmask */
|
|
Packit |
030a23 |
uxtb16 TMP1, d, ror #8 /* ag_dest; 1 stall follows */
|
|
Packit |
030a23 |
smlatt d, TMP1, TMP0, HALF /* alpha */
|
|
Packit |
030a23 |
smlabb TMP1, TMP1, TMP0, HALF /* green */
|
|
Packit |
030a23 |
pkhbt TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
|
|
Packit |
030a23 |
pkhbt TMP1, TMP1, d, lsl #16 /* ag */
|
|
Packit |
030a23 |
uxtab16 TMP0, TMP0, TMP0, ror #8
|
|
Packit |
030a23 |
uxtab16 TMP1, TMP1, TMP1, ror #8
|
|
Packit |
030a23 |
mov TMP0, TMP0, ror #8
|
|
Packit |
030a23 |
sel d, TMP0, TMP1
|
|
Packit |
030a23 |
uqadd8 d, d, m /* d is a late result */
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_1pixel_head
|
|
Packit |
030a23 |
pixld , 4, 1, MASK, 0
|
|
Packit |
030a23 |
pixld , 4, 3, DST, 0
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_1pixel_tail
|
|
Packit |
030a23 |
mvn TMP0, WK1
|
|
Packit |
030a23 |
teq WK1, WK1, asr #32
|
|
Packit |
030a23 |
bne 01f
|
|
Packit |
030a23 |
bcc 03f
|
|
Packit |
030a23 |
mov WK3, WK1
|
|
Packit |
030a23 |
b 02f
|
|
Packit |
030a23 |
01: over_white_8888_8888_ca_combine WK1, WK3
|
|
Packit |
030a23 |
02: pixst , 4, 3, DST
|
|
Packit |
030a23 |
03:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_2pixels_head
|
|
Packit |
030a23 |
pixld , 8, 1, MASK, 0
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_2pixels_tail
|
|
Packit |
030a23 |
pixld , 8, 3, DST
|
|
Packit |
030a23 |
mvn TMP0, WK1
|
|
Packit |
030a23 |
teq WK1, WK1, asr #32
|
|
Packit |
030a23 |
bne 01f
|
|
Packit |
030a23 |
movcs WK3, WK1
|
|
Packit |
030a23 |
bcs 02f
|
|
Packit |
030a23 |
teq WK2, #0
|
|
Packit |
030a23 |
beq 05f
|
|
Packit |
030a23 |
b 02f
|
|
Packit |
030a23 |
01: over_white_8888_8888_ca_combine WK1, WK3
|
|
Packit |
030a23 |
02: mvn TMP0, WK2
|
|
Packit |
030a23 |
teq WK2, WK2, asr #32
|
|
Packit |
030a23 |
bne 03f
|
|
Packit |
030a23 |
movcs WK4, WK2
|
|
Packit |
030a23 |
b 04f
|
|
Packit |
030a23 |
03: over_white_8888_8888_ca_combine WK2, WK4
|
|
Packit |
030a23 |
04: pixst , 8, 3, DST
|
|
Packit |
030a23 |
05:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
.if numbytes == 4
|
|
Packit |
030a23 |
over_white_8888_8888_ca_1pixel_head
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
over_white_8888_8888_ca_2pixels_head
|
|
Packit |
030a23 |
over_white_8888_8888_ca_2pixels_tail
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
over_white_8888_8888_ca_2pixels_head
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_white_8888_8888_ca_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
.if numbytes == 4
|
|
Packit |
030a23 |
over_white_8888_8888_ca_1pixel_tail
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
over_white_8888_8888_ca_2pixels_tail
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_white_8888_8888_ca_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
over_white_8888_8888_ca_cleanup, \
|
|
Packit |
030a23 |
over_white_8888_8888_ca_process_head, \
|
|
Packit |
030a23 |
over_white_8888_8888_ca_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_8888_ca_init
|
|
Packit |
030a23 |
/* Set up constants. RB_SRC and AG_SRC are in registers;
|
|
Packit |
030a23 |
* RB_FLDS, A_SRC, and the two HALF values need to go on the
|
|
Packit |
030a23 |
* stack (and the ful SRC value is already there) */
|
|
Packit |
030a23 |
ldr SCRATCH, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
mov WK0, #0x00FF0000
|
|
Packit |
030a23 |
orr WK0, WK0, #0xFF /* RB_FLDS (0x00FF00FF) */
|
|
Packit |
030a23 |
mov WK1, #0x80 /* HALF default value */
|
|
Packit |
030a23 |
mov WK2, SCRATCH, lsr #24 /* A_SRC */
|
|
Packit |
030a23 |
orr WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */
|
|
Packit |
030a23 |
push {WK0-WK3}
|
|
Packit |
030a23 |
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16
|
|
Packit |
030a23 |
uxtb16 SRC, SCRATCH
|
|
Packit |
030a23 |
uxtb16 STRIDE_S, SCRATCH, ror #8
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, WK3, WK3
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.unreq WK0
|
|
Packit |
030a23 |
.unreq WK1
|
|
Packit |
030a23 |
.unreq WK2
|
|
Packit |
030a23 |
.unreq WK3
|
|
Packit |
030a23 |
WK0 .req Y
|
|
Packit |
030a23 |
WK1 .req STRIDE_D
|
|
Packit |
030a23 |
RB_SRC .req SRC
|
|
Packit |
030a23 |
AG_SRC .req STRIDE_S
|
|
Packit |
030a23 |
WK2 .req STRIDE_M
|
|
Packit |
030a23 |
RB_FLDS .req r8 /* the reloaded constants have to be at consecutive registers starting at an even one */
|
|
Packit |
030a23 |
A_SRC .req r8
|
|
Packit |
030a23 |
HALF .req r9
|
|
Packit |
030a23 |
WK3 .req r10
|
|
Packit |
030a23 |
WK4 .req r11
|
|
Packit |
030a23 |
WK5 .req SCRATCH
|
|
Packit |
030a23 |
WK6 .req ORIG_W
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_8888_ca_cleanup
|
|
Packit |
030a23 |
add sp, sp, #16
|
|
Packit |
030a23 |
.set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.unreq WK0
|
|
Packit |
030a23 |
.unreq WK1
|
|
Packit |
030a23 |
.unreq RB_SRC
|
|
Packit |
030a23 |
.unreq AG_SRC
|
|
Packit |
030a23 |
.unreq WK2
|
|
Packit |
030a23 |
.unreq RB_FLDS
|
|
Packit |
030a23 |
.unreq A_SRC
|
|
Packit |
030a23 |
.unreq HALF
|
|
Packit |
030a23 |
.unreq WK3
|
|
Packit |
030a23 |
.unreq WK4
|
|
Packit |
030a23 |
.unreq WK5
|
|
Packit |
030a23 |
.unreq WK6
|
|
Packit |
030a23 |
WK0 .req r8
|
|
Packit |
030a23 |
WK1 .req r9
|
|
Packit |
030a23 |
WK2 .req r10
|
|
Packit |
030a23 |
WK3 .req r11
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_8888_ca_1pixel_head
|
|
Packit |
030a23 |
pixld , 4, 6, MASK, 0
|
|
Packit |
030a23 |
pixld , 4, 0, DST, 0
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_8888_ca_1pixel_tail
|
|
Packit |
030a23 |
ldrd A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8]
|
|
Packit |
030a23 |
uxtb16 WK1, WK6 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */
|
|
Packit |
030a23 |
teq WK6, WK6, asr #32 /* Zc if transparent, ZC if opaque */
|
|
Packit |
030a23 |
bne 20f
|
|
Packit |
030a23 |
bcc 40f
|
|
Packit |
030a23 |
/* Mask is fully opaque (all channels) */
|
|
Packit |
030a23 |
ldr WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */
|
|
Packit |
030a23 |
eors A_SRC, A_SRC, #0xFF
|
|
Packit |
030a23 |
bne 10f
|
|
Packit |
030a23 |
/* Source is also opaque - same as src_8888_8888 */
|
|
Packit |
030a23 |
mov WK0, WK6
|
|
Packit |
030a23 |
b 30f
|
|
Packit |
030a23 |
10: /* Same as over_8888_8888 */
|
|
Packit |
030a23 |
mul_8888_8 WK0, A_SRC, WK5, HALF
|
|
Packit |
030a23 |
uqadd8 WK0, WK0, WK6
|
|
Packit |
030a23 |
b 30f
|
|
Packit |
030a23 |
20: /* No simplifications possible - do it the hard way */
|
|
Packit |
030a23 |
uxtb16 WK2, WK6, ror #8 /* ag_mask */
|
|
Packit |
030a23 |
mla WK3, WK1, A_SRC, HALF /* rb_mul; 2 cycles */
|
|
Packit |
030a23 |
mla WK4, WK2, A_SRC, HALF /* ag_mul; 2 cycles */
|
|
Packit |
030a23 |
ldrd RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET]
|
|
Packit |
030a23 |
uxtb16 WK5, WK0 /* rb_dest */
|
|
Packit |
030a23 |
uxtab16 WK3, WK3, WK3, ror #8
|
|
Packit |
030a23 |
uxtb16 WK6, WK0, ror #8 /* ag_dest */
|
|
Packit |
030a23 |
uxtab16 WK4, WK4, WK4, ror #8
|
|
Packit |
030a23 |
smlatt WK0, RB_SRC, WK1, HALF /* red1 */
|
|
Packit |
030a23 |
smlabb WK1, RB_SRC, WK1, HALF /* blue1 */
|
|
Packit |
030a23 |
bic WK3, RB_FLDS, WK3, lsr #8
|
|
Packit |
030a23 |
bic WK4, RB_FLDS, WK4, lsr #8
|
|
Packit |
030a23 |
pkhbt WK1, WK1, WK0, lsl #16 /* rb1 */
|
|
Packit |
030a23 |
smlatt WK0, WK5, WK3, HALF /* red2 */
|
|
Packit |
030a23 |
smlabb WK3, WK5, WK3, HALF /* blue2 */
|
|
Packit |
030a23 |
uxtab16 WK1, WK1, WK1, ror #8
|
|
Packit |
030a23 |
smlatt WK5, AG_SRC, WK2, HALF /* alpha1 */
|
|
Packit |
030a23 |
pkhbt WK3, WK3, WK0, lsl #16 /* rb2 */
|
|
Packit |
030a23 |
smlabb WK0, AG_SRC, WK2, HALF /* green1 */
|
|
Packit |
030a23 |
smlatt WK2, WK6, WK4, HALF /* alpha2 */
|
|
Packit |
030a23 |
smlabb WK4, WK6, WK4, HALF /* green2 */
|
|
Packit |
030a23 |
pkhbt WK0, WK0, WK5, lsl #16 /* ag1 */
|
|
Packit |
030a23 |
uxtab16 WK3, WK3, WK3, ror #8
|
|
Packit |
030a23 |
pkhbt WK4, WK4, WK2, lsl #16 /* ag2 */
|
|
Packit |
030a23 |
uxtab16 WK0, WK0, WK0, ror #8
|
|
Packit |
030a23 |
uxtab16 WK4, WK4, WK4, ror #8
|
|
Packit |
030a23 |
mov WK1, WK1, ror #8
|
|
Packit |
030a23 |
mov WK3, WK3, ror #8
|
|
Packit |
030a23 |
sel WK2, WK1, WK0 /* recombine source*mask */
|
|
Packit |
030a23 |
sel WK1, WK3, WK4 /* recombine dest*(1-source_alpha*mask) */
|
|
Packit |
030a23 |
uqadd8 WK0, WK1, WK2 /* followed by 1 stall */
|
|
Packit |
030a23 |
30: /* The destination buffer is already in the L1 cache, so
|
|
Packit |
030a23 |
* there's little point in amalgamating writes */
|
|
Packit |
030a23 |
pixst , 4, 0, DST
|
|
Packit |
030a23 |
40:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
.rept (numbytes / 4) - 1
|
|
Packit |
030a23 |
over_n_8888_8888_ca_1pixel_head
|
|
Packit |
030a23 |
over_n_8888_8888_ca_1pixel_tail
|
|
Packit |
030a23 |
.endr
|
|
Packit |
030a23 |
over_n_8888_8888_ca_1pixel_head
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_8888_ca_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
over_n_8888_8888_ca_1pixel_tail
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
|
|
Packit |
030a23 |
ldr ip, [sp]
|
|
Packit |
030a23 |
cmp ip, #-1
|
|
Packit |
030a23 |
beq pixman_composite_over_white_8888_8888_ca_asm_armv6
|
|
Packit |
030a23 |
/* else drop through... */
|
|
Packit |
030a23 |
.endfunc
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_n_8888_8888_ca_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
over_n_8888_8888_ca_cleanup, \
|
|
Packit |
030a23 |
over_n_8888_8888_ca_process_head, \
|
|
Packit |
030a23 |
over_n_8888_8888_ca_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro in_reverse_8888_8888_init
|
|
Packit |
030a23 |
/* Hold loop invariant in MASK */
|
|
Packit |
030a23 |
ldr MASK, =0x00800080
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, MASK, MASK
|
|
Packit |
030a23 |
/* Offset the source pointer: we only need the alpha bytes */
|
|
Packit |
030a23 |
add SRC, SRC, #3
|
|
Packit |
030a23 |
line_saved_regs ORIG_W
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro in_reverse_8888_8888_head numbytes, reg1, reg2, reg3
|
|
Packit |
030a23 |
ldrb ORIG_W, [SRC], #4
|
|
Packit |
030a23 |
.if numbytes >= 8
|
|
Packit |
030a23 |
ldrb WK®1, [SRC], #4
|
|
Packit |
030a23 |
.if numbytes == 16
|
|
Packit |
030a23 |
ldrb WK®2, [SRC], #4
|
|
Packit |
030a23 |
ldrb WK®3, [SRC], #4
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
add DST, DST, #numbytes
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro in_reverse_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
in_reverse_8888_8888_head numbytes, firstreg, %(firstreg+1), %(firstreg+2)
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro in_reverse_8888_8888_1pixel s, d, offset, is_only
|
|
Packit |
030a23 |
.if is_only != 1
|
|
Packit |
030a23 |
movs s, ORIG_W
|
|
Packit |
030a23 |
.if offset != 0
|
|
Packit |
030a23 |
ldrb ORIG_W, [SRC, #offset]
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
beq 01f
|
|
Packit |
030a23 |
teq STRIDE_M, #0xFF
|
|
Packit |
030a23 |
beq 02f
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
uxtb16 SCRATCH, d /* rb_dest */
|
|
Packit |
030a23 |
uxtb16 d, d, ror #8 /* ag_dest */
|
|
Packit |
030a23 |
mla SCRATCH, SCRATCH, s, MASK
|
|
Packit |
030a23 |
mla d, d, s, MASK
|
|
Packit |
030a23 |
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
uxtab16 d, d, d, ror #8
|
|
Packit |
030a23 |
mov SCRATCH, SCRATCH, ror #8
|
|
Packit |
030a23 |
sel d, SCRATCH, d
|
|
Packit |
030a23 |
b 02f
|
|
Packit |
030a23 |
.if offset == 0
|
|
Packit |
030a23 |
48: /* Last mov d,#0 of the set - used as part of shortcut for
|
|
Packit |
030a23 |
* source values all 0 */
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
01: mov d, #0
|
|
Packit |
030a23 |
02:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro in_reverse_8888_8888_tail numbytes, reg1, reg2, reg3, reg4
|
|
Packit |
030a23 |
.if numbytes == 4
|
|
Packit |
030a23 |
teq ORIG_W, ORIG_W, asr #32
|
|
Packit |
030a23 |
ldrne WK®1, [DST, #-4]
|
|
Packit |
030a23 |
.elseif numbytes == 8
|
|
Packit |
030a23 |
teq ORIG_W, WK®1
|
|
Packit |
030a23 |
teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
|
|
Packit |
030a23 |
ldmnedb DST, {WK®1-WK®2}
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
teq ORIG_W, WK®1
|
|
Packit |
030a23 |
teqeq ORIG_W, WK®2
|
|
Packit |
030a23 |
teqeq ORIG_W, WK®3
|
|
Packit |
030a23 |
teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
|
|
Packit |
030a23 |
ldmnedb DST, {WK®1-WK®4}
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
cmnne DST, #0 /* clear C if NE */
|
|
Packit |
030a23 |
bcs 49f /* no writes to dest if source all -1 */
|
|
Packit |
030a23 |
beq 48f /* set dest to all 0 if source all 0 */
|
|
Packit |
030a23 |
.if numbytes == 4
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel ORIG_W, WK®1, 0, 1
|
|
Packit |
030a23 |
str WK®1, [DST, #-4]
|
|
Packit |
030a23 |
.elseif numbytes == 8
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel STRIDE_M, WK®1, -4, 0
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel STRIDE_M, WK®2, 0, 0
|
|
Packit |
030a23 |
stmdb DST, {WK®1-WK®2}
|
|
Packit |
030a23 |
.else
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel STRIDE_M, WK®1, -12, 0
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel STRIDE_M, WK®2, -8, 0
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel STRIDE_M, WK®3, -4, 0
|
|
Packit |
030a23 |
in_reverse_8888_8888_1pixel STRIDE_M, WK®4, 0, 0
|
|
Packit |
030a23 |
stmdb DST, {WK®1-WK®4}
|
|
Packit |
030a23 |
.endif
|
|
Packit |
030a23 |
49:
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro in_reverse_8888_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
in_reverse_8888_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
in_reverse_8888_8888_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
in_reverse_8888_8888_process_head, \
|
|
Packit |
030a23 |
in_reverse_8888_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_init
|
|
Packit |
030a23 |
ldr SRC, [sp, #ARGS_STACK_OFFSET]
|
|
Packit |
030a23 |
/* Hold loop invariant in MASK */
|
|
Packit |
030a23 |
ldr MASK, =0x00800080
|
|
Packit |
030a23 |
/* Hold multiplier for destination in STRIDE_M */
|
|
Packit |
030a23 |
mov STRIDE_M, #255
|
|
Packit |
030a23 |
sub STRIDE_M, STRIDE_M, SRC, lsr #24
|
|
Packit |
030a23 |
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
Packit |
030a23 |
uadd8 SCRATCH, MASK, MASK
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
Packit |
030a23 |
pixld , numbytes, firstreg, DST, 0
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_1pixel dst
|
|
Packit |
030a23 |
mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
|
|
Packit |
030a23 |
uqadd8 WK&dst, WK&dst, SRC
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro over_n_8888_process_tail cond, numbytes, firstreg
|
|
Packit |
030a23 |
.set PROCESS_REG, firstreg
|
|
Packit |
030a23 |
.rept numbytes / 4
|
|
Packit |
030a23 |
over_n_8888_1pixel %(PROCESS_REG)
|
|
Packit |
030a23 |
.set PROCESS_REG, PROCESS_REG+1
|
|
Packit |
030a23 |
.endr
|
|
Packit |
030a23 |
pixst , numbytes, firstreg, DST
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_composite_function \
|
|
Packit |
030a23 |
pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
|
|
Packit |
030a23 |
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
|
|
Packit |
030a23 |
2, /* prefetch distance */ \
|
|
Packit |
030a23 |
over_n_8888_init, \
|
|
Packit |
030a23 |
nop_macro, /* newline */ \
|
|
Packit |
030a23 |
nop_macro, /* cleanup */ \
|
|
Packit |
030a23 |
over_n_8888_process_head, \
|
|
Packit |
030a23 |
over_n_8888_process_tail
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/******************************************************************************/
|