Blame pixman/pixman-arm-simd-asm.S

Packit 030a23
/*
Packit 030a23
 * Copyright © 2012 Raspberry Pi Foundation
Packit 030a23
 * Copyright © 2012 RISC OS Open Ltd
Packit 030a23
 *
Packit 030a23
 * Permission to use, copy, modify, distribute, and sell this software and its
Packit 030a23
 * documentation for any purpose is hereby granted without fee, provided that
Packit 030a23
 * the above copyright notice appear in all copies and that both that
Packit 030a23
 * copyright notice and this permission notice appear in supporting
Packit 030a23
 * documentation, and that the name of the copyright holders not be used in
Packit 030a23
 * advertising or publicity pertaining to distribution of the software without
Packit 030a23
 * specific, written prior permission.  The copyright holders make no
Packit 030a23
 * representations about the suitability of this software for any purpose.  It
Packit 030a23
 * is provided "as is" without express or implied warranty.
Packit 030a23
 *
Packit 030a23
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
Packit 030a23
 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
Packit 030a23
 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
Packit 030a23
 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
Packit 030a23
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
Packit 030a23
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
Packit 030a23
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
Packit 030a23
 * SOFTWARE.
Packit 030a23
 *
Packit 030a23
 * Author:  Ben Avison (bavison@riscosopen.org)
Packit 030a23
 *
Packit 030a23
 */
Packit 030a23
Packit 030a23
/* Prevent the stack from becoming executable */
Packit 030a23
#if defined(__linux__) && defined(__ELF__)
Packit 030a23
.section .note.GNU-stack,"",%progbits
Packit 030a23
#endif
Packit 030a23
Packit 030a23
	.text
Packit 030a23
	.arch armv6
Packit 030a23
	.object_arch armv4
Packit 030a23
	.arm
Packit 030a23
	.altmacro
Packit 030a23
	.p2align 2
Packit 030a23
Packit 030a23
#include "pixman-arm-asm.h"
Packit 030a23
#include "pixman-arm-simd-asm.h"
Packit 030a23
Packit 030a23
/* A head macro should do all processing which results in an output of up to
Packit 030a23
 * 16 bytes, as far as the final load instruction. The corresponding tail macro
Packit 030a23
 * should complete the processing of the up-to-16 bytes. The calling macro will
Packit 030a23
 * sometimes choose to insert a preload or a decrement of X between them.
Packit 030a23
 *   cond           ARM condition code for code block
Packit 030a23
 *   numbytes       Number of output bytes that should be generated this time
Packit 030a23
 *   firstreg       First WK register in which to place output
Packit 030a23
 *   unaligned_src  Whether to use non-wordaligned loads of source image
Packit 030a23
 *   unaligned_mask Whether to use non-wordaligned loads of mask image
Packit 030a23
 *   preload        If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
Packit 030a23
 */
Packit 030a23
Packit 030a23
.macro blit_init
Packit 030a23
        line_saved_regs STRIDE_D, STRIDE_S
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro blit_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
        pixld   cond, numbytes, firstreg, SRC, unaligned_src
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro blit_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
Packit 030a23
    WK4     .req    STRIDE_D
Packit 030a23
    WK5     .req    STRIDE_S
Packit 030a23
    WK6     .req    MASK
Packit 030a23
    WK7     .req    STRIDE_M
Packit 030a23
110:    pixld   , 16, 0, SRC, unaligned_src
Packit 030a23
        pixld   , 16, 4, SRC, unaligned_src
Packit 030a23
        pld     [SRC, SCRATCH]
Packit 030a23
        pixst   , 16, 0, DST
Packit 030a23
        pixst   , 16, 4, DST
Packit 030a23
        subs    X, X, #32*8/src_bpp
Packit 030a23
        bhs     110b
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
    .unreq  WK6
Packit 030a23
    .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
Packit 030a23
    4, /* prefetch distance */ \
Packit 030a23
    blit_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    blit_process_head, \
Packit 030a23
    nop_macro, /* process tail */ \
Packit 030a23
    blit_inner_loop
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
Packit 030a23
    4, /* prefetch distance */ \
Packit 030a23
    blit_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    blit_process_head, \
Packit 030a23
    nop_macro, /* process tail */ \
Packit 030a23
    blit_inner_loop
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
Packit 030a23
    3, /* prefetch distance */ \
Packit 030a23
    blit_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    blit_process_head, \
Packit 030a23
    nop_macro, /* process tail */ \
Packit 030a23
    blit_inner_loop
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro src_n_8888_init
Packit 030a23
        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        mov     STRIDE_S, SRC
Packit 030a23
        mov     MASK, SRC
Packit 030a23
        mov     STRIDE_M, SRC
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_n_0565_init
Packit 030a23
        ldrh    SRC, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        orr     SRC, SRC, lsl #16
Packit 030a23
        mov     STRIDE_S, SRC
Packit 030a23
        mov     MASK, SRC
Packit 030a23
        mov     STRIDE_M, SRC
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_n_8_init
Packit 030a23
        ldrb    SRC, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        orr     SRC, SRC, lsl #8
Packit 030a23
        orr     SRC, SRC, lsl #16
Packit 030a23
        mov     STRIDE_S, SRC
Packit 030a23
        mov     MASK, SRC
Packit 030a23
        mov     STRIDE_M, SRC
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro fill_process_tail  cond, numbytes, firstreg
Packit 030a23
    WK4     .req    SRC
Packit 030a23
    WK5     .req    STRIDE_S
Packit 030a23
    WK6     .req    MASK
Packit 030a23
    WK7     .req    STRIDE_M
Packit 030a23
        pixst   cond, numbytes, 4, DST
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
    .unreq  WK6
Packit 030a23
    .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
Packit 030a23
    0, /* prefetch distance doesn't apply */ \
Packit 030a23
    src_n_8888_init \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro /* cleanup */ \
Packit 030a23
    nop_macro /* process head */ \
Packit 030a23
    fill_process_tail
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
Packit 030a23
    0, /* prefetch distance doesn't apply */ \
Packit 030a23
    src_n_0565_init \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro /* cleanup */ \
Packit 030a23
    nop_macro /* process head */ \
Packit 030a23
    fill_process_tail
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
Packit 030a23
    0, /* prefetch distance doesn't apply */ \
Packit 030a23
    src_n_8_init \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro /* cleanup */ \
Packit 030a23
    nop_macro /* process head */ \
Packit 030a23
    fill_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro src_x888_8888_pixel, cond, reg
Packit 030a23
        orr&cond WK&reg, WK&reg, #0xFF000000
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro pixman_composite_src_x888_8888_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
        pixld   cond, numbytes, firstreg, SRC, unaligned_src
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro pixman_composite_src_x888_8888_process_tail   cond, numbytes, firstreg
Packit 030a23
        src_x888_8888_pixel cond, %(firstreg+0)
Packit 030a23
 .if numbytes >= 8
Packit 030a23
        src_x888_8888_pixel cond, %(firstreg+1)
Packit 030a23
  .if numbytes == 16
Packit 030a23
        src_x888_8888_pixel cond, %(firstreg+2)
Packit 030a23
        src_x888_8888_pixel cond, %(firstreg+3)
Packit 030a23
  .endif
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
Packit 030a23
    3, /* prefetch distance */ \
Packit 030a23
    nop_macro, /* init */ \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    pixman_composite_src_x888_8888_process_head, \
Packit 030a23
    pixman_composite_src_x888_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro src_0565_8888_init
Packit 030a23
        /* Hold loop invariants in MASK and STRIDE_M */
Packit 030a23
        ldr     MASK, =0x07E007E0
Packit 030a23
        mov     STRIDE_M, #0xFF000000
Packit 030a23
        /* Set GE[3:0] to 1010 so SEL instructions do what we want */
Packit 030a23
        ldr     SCRATCH, =0x80008000
Packit 030a23
        uadd8   SCRATCH, SCRATCH, SCRATCH
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_0565_8888_2pixels, reg1, reg2
Packit 030a23
        and     SCRATCH, WK&reg1, MASK             @ 00000GGGGGG0000000000gggggg00000
Packit 030a23
        bic     WK&reg2, WK&reg1, MASK             @ RRRRR000000BBBBBrrrrr000000bbbbb
Packit 030a23
        orr     SCRATCH, SCRATCH, SCRATCH, lsr #6  @ 00000GGGGGGGGGGGG0000ggggggggggg
Packit 030a23
        mov     WK&reg1, WK&reg2, lsl #16          @ rrrrr000000bbbbb0000000000000000
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #19          @ GGGG0000ggggggggggg00000GGGGGGGG
Packit 030a23
        bic     WK&reg2, WK&reg2, WK&reg1, lsr #16 @ RRRRR000000BBBBB0000000000000000
Packit 030a23
        orr     WK&reg1, WK&reg1, WK&reg1, lsr #5  @ rrrrrrrrrr0bbbbbbbbbb00000000000
Packit 030a23
        orr     WK&reg2, WK&reg2, WK&reg2, lsr #5  @ RRRRRRRRRR0BBBBBBBBBB00000000000
Packit 030a23
        pkhtb   WK&reg1, WK&reg1, WK&reg1, asr #5  @ rrrrrrrr--------bbbbbbbb--------
Packit 030a23
        sel     WK&reg1, WK&reg1, SCRATCH          @ rrrrrrrrggggggggbbbbbbbb--------
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #16          @ ggg00000GGGGGGGGGGGG0000gggggggg
Packit 030a23
        pkhtb   WK&reg2, WK&reg2, WK&reg2, asr #5  @ RRRRRRRR--------BBBBBBBB--------
Packit 030a23
        sel     WK&reg2, WK&reg2, SCRATCH          @ RRRRRRRRGGGGGGGGBBBBBBBB--------
Packit 030a23
        orr     WK&reg1, STRIDE_M, WK&reg1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
Packit 030a23
        orr     WK&reg2, STRIDE_M, WK&reg2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/* This version doesn't need STRIDE_M, but is one instruction longer.
Packit 030a23
   It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case?
Packit 030a23
        and     SCRATCH, WK&reg1, MASK             @ 00000GGGGGG0000000000gggggg00000
Packit 030a23
        bic     WK&reg1, WK&reg1, MASK             @ RRRRR000000BBBBBrrrrr000000bbbbb
Packit 030a23
        orr     SCRATCH, SCRATCH, SCRATCH, lsr #6  @ 00000GGGGGGGGGGGG0000ggggggggggg
Packit 030a23
        mov     WK&reg2, WK&reg1, lsr #16          @ 0000000000000000RRRRR000000BBBBB
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #27          @ GGGGGGGGGGGG0000ggggggggggg00000
Packit 030a23
        bic     WK&reg1, WK&reg1, WK&reg2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
Packit 030a23
        mov     WK&reg2, WK&reg2, lsl #3           @ 0000000000000RRRRR000000BBBBB000
Packit 030a23
        mov     WK&reg1, WK&reg1, lsl #3           @ 0000000000000rrrrr000000bbbbb000
Packit 030a23
        orr     WK&reg2, WK&reg2, WK&reg2, lsr #5  @ 0000000000000RRRRRRRRRR0BBBBBBBB
Packit 030a23
        orr     WK&reg1, WK&reg1, WK&reg1, lsr #5  @ 0000000000000rrrrrrrrrr0bbbbbbbb
Packit 030a23
        pkhbt   WK&reg2, WK&reg2, WK&reg2, lsl #5  @ --------RRRRRRRR--------BBBBBBBB
Packit 030a23
        pkhbt   WK&reg1, WK&reg1, WK&reg1, lsl #5  @ --------rrrrrrrr--------bbbbbbbb
Packit 030a23
        sel     WK&reg2, SCRATCH, WK&reg2          @ --------RRRRRRRRGGGGGGGGBBBBBBBB
Packit 030a23
        sel     WK&reg1, SCRATCH, WK&reg1          @ --------rrrrrrrrggggggggbbbbbbbb
Packit 030a23
        orr     WK&reg2, WK&reg2, #0xFF000000      @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
Packit 030a23
        orr     WK&reg1, WK&reg1, #0xFF000000      @ 11111111rrrrrrrrggggggggbbbbbbbb
Packit 030a23
*/
Packit 030a23
Packit 030a23
.macro src_0565_8888_1pixel, reg
Packit 030a23
        bic     SCRATCH, WK&reg, MASK              @ 0000000000000000rrrrr000000bbbbb
Packit 030a23
        and     WK&reg, WK&reg, MASK               @ 000000000000000000000gggggg00000
Packit 030a23
        mov     SCRATCH, SCRATCH, lsl #3           @ 0000000000000rrrrr000000bbbbb000
Packit 030a23
        mov     WK&reg, WK&reg, lsl #5             @ 0000000000000000gggggg0000000000
Packit 030a23
        orr     SCRATCH, SCRATCH, SCRATCH, lsr #5  @ 0000000000000rrrrrrrrrr0bbbbbbbb
Packit 030a23
        orr     WK&reg, WK&reg, WK&reg, lsr #6     @ 000000000000000gggggggggggg00000
Packit 030a23
        pkhbt   SCRATCH, SCRATCH, SCRATCH, lsl #5  @ --------rrrrrrrr--------bbbbbbbb
Packit 030a23
        sel     WK&reg, WK&reg, SCRATCH            @ --------rrrrrrrrggggggggbbbbbbbb
Packit 030a23
        orr     WK&reg, WK&reg, #0xFF000000        @ 11111111rrrrrrrrggggggggbbbbbbbb
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_0565_8888_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
 .if numbytes == 16
Packit 030a23
        pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
Packit 030a23
 .elseif numbytes == 8
Packit 030a23
        pixld   , 4, firstreg, SRC, unaligned_src
Packit 030a23
 .elseif numbytes == 4
Packit 030a23
        pixld   , 2, firstreg, SRC, unaligned_src
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_0565_8888_process_tail   cond, numbytes, firstreg
Packit 030a23
 .if numbytes == 16
Packit 030a23
        src_0565_8888_2pixels firstreg, %(firstreg+1)
Packit 030a23
        src_0565_8888_2pixels %(firstreg+2), %(firstreg+3)
Packit 030a23
 .elseif numbytes == 8
Packit 030a23
        src_0565_8888_2pixels firstreg, %(firstreg+1)
Packit 030a23
 .else
Packit 030a23
        src_0565_8888_1pixel firstreg
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
Packit 030a23
    3, /* prefetch distance */ \
Packit 030a23
    src_0565_8888_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    src_0565_8888_process_head, \
Packit 030a23
    src_0565_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro src_x888_0565_init
Packit 030a23
        /* Hold loop invariant in MASK */
Packit 030a23
        ldr     MASK, =0x001F001F
Packit 030a23
        line_saved_regs  STRIDE_S, ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_x888_0565_1pixel  s, d
Packit 030a23
        and     WK&d, MASK, WK&s, lsr #3           @ 00000000000rrrrr00000000000bbbbb
Packit 030a23
        and     STRIDE_S, WK&s, #0xFC00            @ 0000000000000000gggggg0000000000
Packit 030a23
        orr     WK&d, WK&d, WK&d, lsr #5           @ 00000000000-----rrrrr000000bbbbb
Packit 030a23
        orr     WK&d, WK&d, STRIDE_S, lsr #5       @ 00000000000-----rrrrrggggggbbbbb
Packit 030a23
        /* Top 16 bits are discarded during the following STRH */
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_x888_0565_2pixels  slo, shi, d, tmp
Packit 030a23
        and     SCRATCH, WK&shi, #0xFC00           @ 0000000000000000GGGGGG0000000000
Packit 030a23
        and     WK&tmp, MASK, WK&shi, lsr #3       @ 00000000000RRRRR00000000000BBBBB
Packit 030a23
        and     WK&shi, MASK, WK&slo, lsr #3       @ 00000000000rrrrr00000000000bbbbb
Packit 030a23
        orr     WK&tmp, WK&tmp, WK&tmp, lsr #5     @ 00000000000-----RRRRR000000BBBBB
Packit 030a23
        orr     WK&tmp, WK&tmp, SCRATCH, lsr #5    @ 00000000000-----RRRRRGGGGGGBBBBB
Packit 030a23
        and     SCRATCH, WK&slo, #0xFC00           @ 0000000000000000gggggg0000000000
Packit 030a23
        orr     WK&shi, WK&shi, WK&shi, lsr #5     @ 00000000000-----rrrrr000000bbbbb
Packit 030a23
        orr     WK&shi, WK&shi, SCRATCH, lsr #5    @ 00000000000-----rrrrrggggggbbbbb
Packit 030a23
        pkhbt   WK&d, WK&shi, WK&tmp, lsl #16      @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_x888_0565_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
        WK4     .req    STRIDE_S
Packit 030a23
        WK5     .req    STRIDE_M
Packit 030a23
        WK6     .req    WK3
Packit 030a23
        WK7     .req    ORIG_W
Packit 030a23
 .if numbytes == 16
Packit 030a23
        pixld   , 16, 4, SRC, 0
Packit 030a23
        src_x888_0565_2pixels  4, 5, 0, 0
Packit 030a23
        pixld   , 8, 4, SRC, 0
Packit 030a23
        src_x888_0565_2pixels  6, 7, 1, 1
Packit 030a23
        pixld   , 8, 6, SRC, 0
Packit 030a23
 .else
Packit 030a23
        pixld   , numbytes*2, 4, SRC, 0
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro src_x888_0565_process_tail   cond, numbytes, firstreg
Packit 030a23
 .if numbytes == 16
Packit 030a23
        src_x888_0565_2pixels  4, 5, 2, 2
Packit 030a23
        src_x888_0565_2pixels  6, 7, 3, 4
Packit 030a23
 .elseif numbytes == 8
Packit 030a23
        src_x888_0565_2pixels  4, 5, 1, 1
Packit 030a23
        src_x888_0565_2pixels  6, 7, 2, 2
Packit 030a23
 .elseif numbytes == 4
Packit 030a23
        src_x888_0565_2pixels  4, 5, 1, 1
Packit 030a23
 .else
Packit 030a23
        src_x888_0565_1pixel  4, 1
Packit 030a23
 .endif
Packit 030a23
 .if numbytes == 16
Packit 030a23
        pixst   , numbytes, 0, DST
Packit 030a23
 .else
Packit 030a23
        pixst   , numbytes, 1, DST
Packit 030a23
 .endif
Packit 030a23
        .unreq  WK4
Packit 030a23
        .unreq  WK5
Packit 030a23
        .unreq  WK6
Packit 030a23
        .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \
Packit 030a23
    FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
Packit 030a23
    3, /* prefetch distance */ \
Packit 030a23
    src_x888_0565_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    src_x888_0565_process_head, \
Packit 030a23
    src_x888_0565_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro add_8_8_8pixels  cond, dst1, dst2
Packit 030a23
        uqadd8&cond  WK&dst1, WK&dst1, MASK
Packit 030a23
        uqadd8&cond  WK&dst2, WK&dst2, STRIDE_M
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro add_8_8_4pixels  cond, dst
Packit 030a23
        uqadd8&cond  WK&dst, WK&dst, MASK
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro add_8_8_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
    WK4     .req    MASK
Packit 030a23
    WK5     .req    STRIDE_M
Packit 030a23
 .if numbytes == 16
Packit 030a23
        pixld   cond, 8, 4, SRC, unaligned_src
Packit 030a23
        pixld   cond, 16, firstreg, DST, 0
Packit 030a23
        add_8_8_8pixels cond, firstreg, %(firstreg+1)
Packit 030a23
        pixld   cond, 8, 4, SRC, unaligned_src
Packit 030a23
 .else
Packit 030a23
        pixld   cond, numbytes, 4, SRC, unaligned_src
Packit 030a23
        pixld   cond, numbytes, firstreg, DST, 0
Packit 030a23
 .endif
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro add_8_8_process_tail  cond, numbytes, firstreg
Packit 030a23
 .if numbytes == 16
Packit 030a23
        add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3)
Packit 030a23
 .elseif numbytes == 8
Packit 030a23
        add_8_8_8pixels cond, firstreg, %(firstreg+1)
Packit 030a23
 .else
Packit 030a23
        add_8_8_4pixels cond, firstreg
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    nop_macro, /* init */ \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    add_8_8_process_head, \
Packit 030a23
    add_8_8_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro over_8888_8888_init
Packit 030a23
        /* Hold loop invariant in MASK */
Packit 030a23
        ldr     MASK, =0x00800080
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, MASK, MASK
Packit 030a23
        line_saved_regs STRIDE_D, STRIDE_S, ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
    WK4     .req    STRIDE_D
Packit 030a23
    WK5     .req    STRIDE_S
Packit 030a23
    WK6     .req    STRIDE_M
Packit 030a23
    WK7     .req    ORIG_W
Packit 030a23
        pixld   , numbytes, %(4+firstreg), SRC, unaligned_src
Packit 030a23
        pixld   , numbytes, firstreg, DST, 0
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
    .unreq  WK6
Packit 030a23
    .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_8888_check_transparent  numbytes, reg0, reg1, reg2, reg3
Packit 030a23
        /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */
Packit 030a23
        teq     WK&reg0, #0
Packit 030a23
 .if numbytes > 4
Packit 030a23
        teqeq   WK&reg1, #0
Packit 030a23
  .if numbytes > 8
Packit 030a23
        teqeq   WK&reg2, #0
Packit 030a23
        teqeq   WK&reg3, #0
Packit 030a23
  .endif
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_8888_prepare  next
Packit 030a23
        mov     WK&next, WK&next, lsr #24
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_8888_1pixel src, dst, offset, next
Packit 030a23
        /* src = destination component multiplier */
Packit 030a23
        rsb     WK&src, WK&src, #255
Packit 030a23
        /* Split even/odd bytes of dst into SCRATCH/dst */
Packit 030a23
        uxtb16  SCRATCH, WK&dst
Packit 030a23
        uxtb16  WK&dst, WK&dst, ror #8
Packit 030a23
        /* Multiply through, adding 0.5 to the upper byte of result for rounding */
Packit 030a23
        mla     SCRATCH, SCRATCH, WK&src, MASK
Packit 030a23
        mla     WK&dst, WK&dst, WK&src, MASK
Packit 030a23
        /* Where we would have had a stall between the result of the first MLA and the shifter input,
Packit 030a23
         * reload the complete source pixel */
Packit 030a23
        ldr     WK&src, [SRC, #offset]
Packit 030a23
        /* Multiply by 257/256 to approximate 256/255 */
Packit 030a23
        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
Packit 030a23
        /* In this stall, start processing the next pixel */
Packit 030a23
 .if offset < -4
Packit 030a23
        mov     WK&next, WK&next, lsr #24
Packit 030a23
 .endif
Packit 030a23
        uxtab16 WK&dst, WK&dst, WK&dst, ror #8
Packit 030a23
        /* Recombine even/odd bytes of multiplied destination */
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #8
Packit 030a23
        sel     WK&dst, SCRATCH, WK&dst
Packit 030a23
        /* Saturated add of source to multiplied destination */
Packit 030a23
        uqadd8  WK&dst, WK&dst, WK&src
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_8888_process_tail  cond, numbytes, firstreg
Packit 030a23
    WK4     .req    STRIDE_D
Packit 030a23
    WK5     .req    STRIDE_S
Packit 030a23
    WK6     .req    STRIDE_M
Packit 030a23
    WK7     .req    ORIG_W
Packit 030a23
        over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg)
Packit 030a23
        beq     10f
Packit 030a23
        over_8888_8888_prepare  %(4+firstreg)
Packit 030a23
 .set PROCESS_REG, firstreg
Packit 030a23
 .set PROCESS_OFF, -numbytes
Packit 030a23
 .rept numbytes / 4
Packit 030a23
        over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG)
Packit 030a23
  .set PROCESS_REG, PROCESS_REG+1
Packit 030a23
  .set PROCESS_OFF, PROCESS_OFF+4
Packit 030a23
 .endr
Packit 030a23
        pixst   , numbytes, firstreg, DST
Packit 030a23
10:
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
    .unreq  WK6
Packit 030a23
    .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    over_8888_8888_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    over_8888_8888_process_head, \
Packit 030a23
    over_8888_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
/* Multiply each byte of a word by a byte.
Packit 030a23
 * Useful when there aren't any obvious ways to fill the stalls with other instructions.
Packit 030a23
 * word  Register containing 4 bytes
Packit 030a23
 * byte  Register containing byte multiplier (bits 8-31 must be 0)
Packit 030a23
 * tmp   Scratch register
Packit 030a23
 * half  Register containing the constant 0x00800080
Packit 030a23
 * GE[3:0] bits must contain 0101
Packit 030a23
 */
Packit 030a23
.macro mul_8888_8  word, byte, tmp, half
Packit 030a23
        /* Split even/odd bytes of word apart */
Packit 030a23
        uxtb16  tmp, word
Packit 030a23
        uxtb16  word, word, ror #8
Packit 030a23
        /* Multiply bytes together with rounding, then by 257/256 */
Packit 030a23
        mla     tmp, tmp, byte, half
Packit 030a23
        mla     word, word, byte, half /* 1 stall follows */
Packit 030a23
        uxtab16 tmp, tmp, tmp, ror #8  /* 1 stall follows */
Packit 030a23
        uxtab16 word, word, word, ror #8
Packit 030a23
        /* Recombine bytes */
Packit 030a23
        mov     tmp, tmp, ror #8
Packit 030a23
        sel     word, tmp, word
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro over_8888_n_8888_init
Packit 030a23
        /* Mask is constant */
Packit 030a23
        ldr     MASK, [sp, #ARGS_STACK_OFFSET+8]
Packit 030a23
        /* Hold loop invariant in STRIDE_M */
Packit 030a23
        ldr     STRIDE_M, =0x00800080
Packit 030a23
        /* We only want the alpha bits of the constant mask */
Packit 030a23
        mov     MASK, MASK, lsr #24
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, STRIDE_M, STRIDE_M
Packit 030a23
        line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
    WK4     .req    Y
Packit 030a23
    WK5     .req    STRIDE_D
Packit 030a23
    WK6     .req    STRIDE_S
Packit 030a23
    WK7     .req    ORIG_W
Packit 030a23
        pixld   , numbytes, %(4+(firstreg%2)), SRC, unaligned_src
Packit 030a23
        pixld   , numbytes, firstreg, DST, 0
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
    .unreq  WK6
Packit 030a23
    .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_n_8888_1pixel src, dst
Packit 030a23
        mul_8888_8  WK&src, MASK, SCRATCH, STRIDE_M
Packit 030a23
        sub     WK7, WK6, WK&src, lsr #24
Packit 030a23
        mul_8888_8  WK&dst, WK7, SCRATCH, STRIDE_M
Packit 030a23
        uqadd8  WK&dst, WK&dst, WK&src
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_8888_n_8888_process_tail  cond, numbytes, firstreg
Packit 030a23
    WK4     .req    Y
Packit 030a23
    WK5     .req    STRIDE_D
Packit 030a23
    WK6     .req    STRIDE_S
Packit 030a23
    WK7     .req    ORIG_W
Packit 030a23
        over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg)
Packit 030a23
        beq     10f
Packit 030a23
        mov     WK6, #255
Packit 030a23
 .set PROCESS_REG, firstreg
Packit 030a23
 .rept numbytes / 4
Packit 030a23
  .if numbytes == 16 && PROCESS_REG == 2
Packit 030a23
        /* We're using WK6 and WK7 as temporaries, so half way through
Packit 030a23
         * 4 pixels, reload the second two source pixels but this time
Packit 030a23
         * into WK4 and WK5 */
Packit 030a23
        ldmdb   SRC, {WK4, WK5}
Packit 030a23
  .endif
Packit 030a23
        over_8888_n_8888_1pixel  %(4+(PROCESS_REG%2)), %(PROCESS_REG)
Packit 030a23
  .set PROCESS_REG, PROCESS_REG+1
Packit 030a23
 .endr
Packit 030a23
        pixst   , numbytes, firstreg, DST
Packit 030a23
10:
Packit 030a23
    .unreq  WK4
Packit 030a23
    .unreq  WK5
Packit 030a23
    .unreq  WK6
Packit 030a23
    .unreq  WK7
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    over_8888_n_8888_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    over_8888_n_8888_process_head, \
Packit 030a23
    over_8888_n_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro over_n_8_8888_init
Packit 030a23
        /* Source is constant, but splitting it into even/odd bytes is a loop invariant */
Packit 030a23
        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */
Packit 030a23
        ldr     SCRATCH, =0x00800080
Packit 030a23
        uxtb16  STRIDE_S, SRC
Packit 030a23
        uxtb16  SRC, SRC, ror #8
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, SCRATCH, SCRATCH
Packit 030a23
        line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8_8888_newline
Packit 030a23
        ldr     STRIDE_D, =0x00800080
Packit 030a23
        b       1f
Packit 030a23
 .ltorg
Packit 030a23
1:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
    WK4     .req    STRIDE_M
Packit 030a23
        pixld   , numbytes/4, 4, MASK, unaligned_mask
Packit 030a23
        pixld   , numbytes, firstreg, DST, 0
Packit 030a23
    .unreq  WK4
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8_8888_1pixel src, dst
Packit 030a23
        uxtb    Y, WK4, ror #src*8
Packit 030a23
        /* Trailing part of multiplication of source */
Packit 030a23
        mla     SCRATCH, STRIDE_S, Y, STRIDE_D
Packit 030a23
        mla     Y, SRC, Y, STRIDE_D
Packit 030a23
        mov     ORIG_W, #255
Packit 030a23
        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
Packit 030a23
        uxtab16 Y, Y, Y, ror #8
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #8
Packit 030a23
        sub     ORIG_W, ORIG_W, Y, lsr #24
Packit 030a23
        sel     Y, SCRATCH, Y
Packit 030a23
        /* Then multiply the destination */
Packit 030a23
        mul_8888_8  WK&dst, ORIG_W, SCRATCH, STRIDE_D
Packit 030a23
        uqadd8  WK&dst, WK&dst, Y
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8_8888_process_tail  cond, numbytes, firstreg
Packit 030a23
    WK4     .req    STRIDE_M
Packit 030a23
        teq     WK4, #0
Packit 030a23
        beq     10f
Packit 030a23
 .set PROCESS_REG, firstreg
Packit 030a23
 .rept numbytes / 4
Packit 030a23
        over_n_8_8888_1pixel  %(PROCESS_REG-firstreg), %(PROCESS_REG)
Packit 030a23
  .set PROCESS_REG, PROCESS_REG+1
Packit 030a23
 .endr
Packit 030a23
        pixst   , numbytes, firstreg, DST
Packit 030a23
10:
Packit 030a23
    .unreq  WK4
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    over_n_8_8888_init, \
Packit 030a23
    over_n_8_8888_newline, \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    over_n_8_8888_process_head, \
Packit 030a23
    over_n_8_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro over_reverse_n_8888_init
Packit 030a23
        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        ldr     MASK, =0x00800080
Packit 030a23
        /* Split source pixel into RB/AG parts */
Packit 030a23
        uxtb16  STRIDE_S, SRC
Packit 030a23
        uxtb16  STRIDE_M, SRC, ror #8
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, MASK, MASK
Packit 030a23
        line_saved_regs  STRIDE_D, ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_reverse_n_8888_newline
Packit 030a23
        mov     STRIDE_D, #0xFF
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_reverse_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
        pixld   , numbytes, firstreg, DST, 0
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_reverse_n_8888_1pixel  d, is_only
Packit 030a23
        teq     WK&d, #0
Packit 030a23
        beq     8f       /* replace with source */
Packit 030a23
        bics    ORIG_W, STRIDE_D, WK&d, lsr #24
Packit 030a23
 .if is_only == 1
Packit 030a23
        beq     49f      /* skip store */
Packit 030a23
 .else
Packit 030a23
        beq     9f       /* write same value back */
Packit 030a23
 .endif
Packit 030a23
        mla     SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
Packit 030a23
        mla     ORIG_W, STRIDE_M, ORIG_W, MASK  /* alpha/green */
Packit 030a23
        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
Packit 030a23
        uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #8
Packit 030a23
        sel     ORIG_W, SCRATCH, ORIG_W
Packit 030a23
        uqadd8  WK&d, WK&d, ORIG_W
Packit 030a23
        b       9f
Packit 030a23
8:      mov     WK&d, SRC
Packit 030a23
9:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_reverse_n_8888_tail  numbytes, reg1, reg2, reg3, reg4
Packit 030a23
 .if numbytes == 4
Packit 030a23
        over_reverse_n_8888_1pixel  reg1, 1
Packit 030a23
 .else
Packit 030a23
        and     SCRATCH, WK&reg1, WK&reg2
Packit 030a23
  .if numbytes == 16
Packit 030a23
        and     SCRATCH, SCRATCH, WK&reg3
Packit 030a23
        and     SCRATCH, SCRATCH, WK&reg4
Packit 030a23
  .endif
Packit 030a23
        mvns    SCRATCH, SCRATCH, asr #24
Packit 030a23
        beq     49f /* skip store if all opaque */
Packit 030a23
        over_reverse_n_8888_1pixel  reg1, 0
Packit 030a23
        over_reverse_n_8888_1pixel  reg2, 0
Packit 030a23
  .if numbytes == 16
Packit 030a23
        over_reverse_n_8888_1pixel  reg3, 0
Packit 030a23
        over_reverse_n_8888_1pixel  reg4, 0
Packit 030a23
  .endif
Packit 030a23
 .endif
Packit 030a23
        pixst   , numbytes, reg1, DST
Packit 030a23
49:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_reverse_n_8888_process_tail  cond, numbytes, firstreg
Packit 030a23
        over_reverse_n_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
Packit 030a23
    3, /* prefetch distance */ \
Packit 030a23
    over_reverse_n_8888_init, \
Packit 030a23
    over_reverse_n_8888_newline, \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    over_reverse_n_8888_process_head, \
Packit 030a23
    over_reverse_n_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_init
Packit 030a23
        HALF    .req    SRC
Packit 030a23
        TMP0    .req    STRIDE_D
Packit 030a23
        TMP1    .req    STRIDE_S
Packit 030a23
        TMP2    .req    STRIDE_M
Packit 030a23
        TMP3    .req    ORIG_W
Packit 030a23
        WK4     .req    SCRATCH
Packit 030a23
        line_saved_regs STRIDE_D, STRIDE_M, ORIG_W
Packit 030a23
        ldr     SCRATCH, =0x800080
Packit 030a23
        mov     HALF, #0x80
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, SCRATCH, SCRATCH
Packit 030a23
        .set DST_PRELOAD_BIAS, 8
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_cleanup
Packit 030a23
        .set DST_PRELOAD_BIAS, 0
Packit 030a23
        .unreq  HALF
Packit 030a23
        .unreq  TMP0
Packit 030a23
        .unreq  TMP1
Packit 030a23
        .unreq  TMP2
Packit 030a23
        .unreq  TMP3
Packit 030a23
        .unreq  WK4
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_combine  m, d
Packit 030a23
        uxtb16  TMP1, TMP0                /* rb_notmask */
Packit 030a23
        uxtb16  TMP2, d                   /* rb_dest; 1 stall follows */
Packit 030a23
        smlatt  TMP3, TMP2, TMP1, HALF    /* red */
Packit 030a23
        smlabb  TMP2, TMP2, TMP1, HALF    /* blue */
Packit 030a23
        uxtb16  TMP0, TMP0, ror #8        /* ag_notmask */
Packit 030a23
        uxtb16  TMP1, d, ror #8           /* ag_dest; 1 stall follows */
Packit 030a23
        smlatt  d, TMP1, TMP0, HALF       /* alpha */
Packit 030a23
        smlabb  TMP1, TMP1, TMP0, HALF    /* green */
Packit 030a23
        pkhbt   TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
Packit 030a23
        pkhbt   TMP1, TMP1, d, lsl #16    /* ag */
Packit 030a23
        uxtab16 TMP0, TMP0, TMP0, ror #8
Packit 030a23
        uxtab16 TMP1, TMP1, TMP1, ror #8
Packit 030a23
        mov     TMP0, TMP0, ror #8
Packit 030a23
        sel     d, TMP0, TMP1
Packit 030a23
        uqadd8  d, d, m                   /* d is a late result */
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_1pixel_head
Packit 030a23
        pixld   , 4, 1, MASK, 0
Packit 030a23
        pixld   , 4, 3, DST, 0
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_1pixel_tail
Packit 030a23
        mvn     TMP0, WK1
Packit 030a23
        teq     WK1, WK1, asr #32
Packit 030a23
        bne     01f
Packit 030a23
        bcc     03f
Packit 030a23
        mov     WK3, WK1
Packit 030a23
        b       02f
Packit 030a23
01:     over_white_8888_8888_ca_combine WK1, WK3
Packit 030a23
02:     pixst   , 4, 3, DST
Packit 030a23
03:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_2pixels_head
Packit 030a23
        pixld   , 8, 1, MASK, 0
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_2pixels_tail
Packit 030a23
        pixld   , 8, 3, DST
Packit 030a23
        mvn     TMP0, WK1
Packit 030a23
        teq     WK1, WK1, asr #32
Packit 030a23
        bne     01f
Packit 030a23
        movcs   WK3, WK1
Packit 030a23
        bcs     02f
Packit 030a23
        teq     WK2, #0
Packit 030a23
        beq     05f
Packit 030a23
        b       02f
Packit 030a23
01:     over_white_8888_8888_ca_combine WK1, WK3
Packit 030a23
02:     mvn     TMP0, WK2
Packit 030a23
        teq     WK2, WK2, asr #32
Packit 030a23
        bne     03f
Packit 030a23
        movcs   WK4, WK2
Packit 030a23
        b       04f
Packit 030a23
03:     over_white_8888_8888_ca_combine WK2, WK4
Packit 030a23
04:     pixst   , 8, 3, DST
Packit 030a23
05:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
 .if numbytes == 4
Packit 030a23
        over_white_8888_8888_ca_1pixel_head
Packit 030a23
 .else
Packit 030a23
  .if numbytes == 16
Packit 030a23
        over_white_8888_8888_ca_2pixels_head
Packit 030a23
        over_white_8888_8888_ca_2pixels_tail
Packit 030a23
  .endif
Packit 030a23
        over_white_8888_8888_ca_2pixels_head
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_white_8888_8888_ca_process_tail  cond, numbytes, firstreg
Packit 030a23
 .if numbytes == 4
Packit 030a23
        over_white_8888_8888_ca_1pixel_tail
Packit 030a23
 .else
Packit 030a23
        over_white_8888_8888_ca_2pixels_tail
Packit 030a23
 .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    over_white_8888_8888_ca_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    over_white_8888_8888_ca_cleanup, \
Packit 030a23
    over_white_8888_8888_ca_process_head, \
Packit 030a23
    over_white_8888_8888_ca_process_tail
Packit 030a23
Packit 030a23
Packit 030a23
.macro over_n_8888_8888_ca_init
Packit 030a23
        /* Set up constants. RB_SRC and AG_SRC are in registers;
Packit 030a23
         * RB_FLDS, A_SRC, and the two HALF values need to go on the
Packit 030a23
         * stack (and the ful SRC value is already there) */
Packit 030a23
        ldr     SCRATCH, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        mov     WK0, #0x00FF0000
Packit 030a23
        orr     WK0, WK0, #0xFF        /* RB_FLDS (0x00FF00FF) */
Packit 030a23
        mov     WK1, #0x80             /* HALF default value */
Packit 030a23
        mov     WK2, SCRATCH, lsr #24  /* A_SRC */
Packit 030a23
        orr     WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */
Packit 030a23
        push    {WK0-WK3}
Packit 030a23
 .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16
Packit 030a23
        uxtb16  SRC, SCRATCH
Packit 030a23
        uxtb16  STRIDE_S, SCRATCH, ror #8
Packit 030a23
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, WK3, WK3
Packit 030a23
Packit 030a23
        .unreq  WK0
Packit 030a23
        .unreq  WK1
Packit 030a23
        .unreq  WK2
Packit 030a23
        .unreq  WK3
Packit 030a23
        WK0     .req    Y
Packit 030a23
        WK1     .req    STRIDE_D
Packit 030a23
        RB_SRC  .req    SRC
Packit 030a23
        AG_SRC  .req    STRIDE_S
Packit 030a23
        WK2     .req    STRIDE_M
Packit 030a23
        RB_FLDS .req    r8       /* the reloaded constants have to be at consecutive registers starting at an even one */
Packit 030a23
        A_SRC   .req    r8
Packit 030a23
        HALF    .req    r9
Packit 030a23
        WK3     .req    r10
Packit 030a23
        WK4     .req    r11
Packit 030a23
        WK5     .req    SCRATCH
Packit 030a23
        WK6     .req    ORIG_W
Packit 030a23
Packit 030a23
        line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_8888_ca_cleanup
Packit 030a23
        add     sp, sp, #16
Packit 030a23
 .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16
Packit 030a23
Packit 030a23
        .unreq  WK0
Packit 030a23
        .unreq  WK1
Packit 030a23
        .unreq  RB_SRC
Packit 030a23
        .unreq  AG_SRC
Packit 030a23
        .unreq  WK2
Packit 030a23
        .unreq  RB_FLDS
Packit 030a23
        .unreq  A_SRC
Packit 030a23
        .unreq  HALF
Packit 030a23
        .unreq  WK3
Packit 030a23
        .unreq  WK4
Packit 030a23
        .unreq  WK5
Packit 030a23
        .unreq  WK6
Packit 030a23
        WK0     .req    r8
Packit 030a23
        WK1     .req    r9
Packit 030a23
        WK2     .req    r10
Packit 030a23
        WK3     .req    r11
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_8888_ca_1pixel_head
Packit 030a23
        pixld   , 4, 6, MASK, 0
Packit 030a23
        pixld   , 4, 0, DST, 0
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_8888_ca_1pixel_tail
Packit 030a23
        ldrd    A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8]
Packit 030a23
        uxtb16  WK1, WK6                 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */
Packit 030a23
        teq     WK6, WK6, asr #32        /* Zc if transparent, ZC if opaque */
Packit 030a23
        bne     20f
Packit 030a23
        bcc     40f
Packit 030a23
        /* Mask is fully opaque (all channels) */
Packit 030a23
        ldr     WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */
Packit 030a23
        eors    A_SRC, A_SRC, #0xFF
Packit 030a23
        bne     10f
Packit 030a23
        /* Source is also opaque - same as src_8888_8888 */
Packit 030a23
        mov     WK0, WK6
Packit 030a23
        b       30f
Packit 030a23
10:     /* Same as over_8888_8888 */
Packit 030a23
        mul_8888_8 WK0, A_SRC, WK5, HALF
Packit 030a23
        uqadd8  WK0, WK0, WK6
Packit 030a23
        b       30f
Packit 030a23
20:     /* No simplifications possible - do it the hard way */
Packit 030a23
        uxtb16  WK2, WK6, ror #8         /* ag_mask */
Packit 030a23
        mla     WK3, WK1, A_SRC, HALF    /* rb_mul; 2 cycles */
Packit 030a23
        mla     WK4, WK2, A_SRC, HALF    /* ag_mul; 2 cycles */
Packit 030a23
        ldrd    RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET]
Packit 030a23
        uxtb16  WK5, WK0                 /* rb_dest */
Packit 030a23
        uxtab16 WK3, WK3, WK3, ror #8
Packit 030a23
        uxtb16  WK6, WK0, ror #8         /* ag_dest */
Packit 030a23
        uxtab16 WK4, WK4, WK4, ror #8
Packit 030a23
        smlatt  WK0, RB_SRC, WK1, HALF   /* red1 */
Packit 030a23
        smlabb  WK1, RB_SRC, WK1, HALF   /* blue1 */
Packit 030a23
        bic     WK3, RB_FLDS, WK3, lsr #8
Packit 030a23
        bic     WK4, RB_FLDS, WK4, lsr #8
Packit 030a23
        pkhbt   WK1, WK1, WK0, lsl #16   /* rb1 */
Packit 030a23
        smlatt  WK0, WK5, WK3, HALF      /* red2 */
Packit 030a23
        smlabb  WK3, WK5, WK3, HALF      /* blue2 */
Packit 030a23
        uxtab16 WK1, WK1, WK1, ror #8
Packit 030a23
        smlatt  WK5, AG_SRC, WK2, HALF   /* alpha1 */
Packit 030a23
        pkhbt   WK3, WK3, WK0, lsl #16   /* rb2 */
Packit 030a23
        smlabb  WK0, AG_SRC, WK2, HALF   /* green1 */
Packit 030a23
        smlatt  WK2, WK6, WK4, HALF      /* alpha2 */
Packit 030a23
        smlabb  WK4, WK6, WK4, HALF      /* green2 */
Packit 030a23
        pkhbt   WK0, WK0, WK5, lsl #16   /* ag1 */
Packit 030a23
        uxtab16 WK3, WK3, WK3, ror #8
Packit 030a23
        pkhbt   WK4, WK4, WK2, lsl #16   /* ag2 */
Packit 030a23
        uxtab16 WK0, WK0, WK0, ror #8
Packit 030a23
        uxtab16 WK4, WK4, WK4, ror #8
Packit 030a23
        mov     WK1, WK1, ror #8
Packit 030a23
        mov     WK3, WK3, ror #8
Packit 030a23
        sel     WK2, WK1, WK0            /* recombine source*mask */
Packit 030a23
        sel     WK1, WK3, WK4            /* recombine dest*(1-source_alpha*mask) */
Packit 030a23
        uqadd8  WK0, WK1, WK2            /* followed by 1 stall */
Packit 030a23
30:     /* The destination buffer is already in the L1 cache, so
Packit 030a23
         * there's little point in amalgamating writes */
Packit 030a23
        pixst   , 4, 0, DST
Packit 030a23
40:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_8888_ca_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
 .rept (numbytes / 4) - 1
Packit 030a23
        over_n_8888_8888_ca_1pixel_head
Packit 030a23
        over_n_8888_8888_ca_1pixel_tail
Packit 030a23
 .endr
Packit 030a23
        over_n_8888_8888_ca_1pixel_head
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_8888_ca_process_tail  cond, numbytes, firstreg
Packit 030a23
        over_n_8888_8888_ca_1pixel_tail
Packit 030a23
.endm
Packit 030a23
Packit 030a23
pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
Packit 030a23
        ldr     ip, [sp]
Packit 030a23
        cmp     ip, #-1
Packit 030a23
        beq     pixman_composite_over_white_8888_8888_ca_asm_armv6
Packit 030a23
        /* else drop through... */
Packit 030a23
 .endfunc
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    over_n_8888_8888_ca_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    over_n_8888_8888_ca_cleanup, \
Packit 030a23
    over_n_8888_8888_ca_process_head, \
Packit 030a23
    over_n_8888_8888_ca_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro in_reverse_8888_8888_init
Packit 030a23
        /* Hold loop invariant in MASK */
Packit 030a23
        ldr     MASK, =0x00800080
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, MASK, MASK
Packit 030a23
        /* Offset the source pointer: we only need the alpha bytes */
Packit 030a23
        add     SRC, SRC, #3
Packit 030a23
        line_saved_regs  ORIG_W
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro in_reverse_8888_8888_head  numbytes, reg1, reg2, reg3
Packit 030a23
        ldrb    ORIG_W, [SRC], #4
Packit 030a23
 .if numbytes >= 8
Packit 030a23
        ldrb    WK&reg1, [SRC], #4
Packit 030a23
  .if numbytes == 16
Packit 030a23
        ldrb    WK&reg2, [SRC], #4
Packit 030a23
        ldrb    WK&reg3, [SRC], #4
Packit 030a23
  .endif
Packit 030a23
 .endif
Packit 030a23
        add     DST, DST, #numbytes
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro in_reverse_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
        in_reverse_8888_8888_head  numbytes, firstreg, %(firstreg+1), %(firstreg+2)
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro in_reverse_8888_8888_1pixel  s, d, offset, is_only
Packit 030a23
 .if is_only != 1
Packit 030a23
        movs    s, ORIG_W
Packit 030a23
  .if offset != 0
Packit 030a23
        ldrb    ORIG_W, [SRC, #offset]
Packit 030a23
  .endif
Packit 030a23
        beq     01f
Packit 030a23
        teq     STRIDE_M, #0xFF
Packit 030a23
        beq     02f
Packit 030a23
 .endif
Packit 030a23
        uxtb16  SCRATCH, d                 /* rb_dest */
Packit 030a23
        uxtb16  d, d, ror #8               /* ag_dest */
Packit 030a23
        mla     SCRATCH, SCRATCH, s, MASK
Packit 030a23
        mla     d, d, s, MASK
Packit 030a23
        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
Packit 030a23
        uxtab16 d, d, d, ror #8
Packit 030a23
        mov     SCRATCH, SCRATCH, ror #8
Packit 030a23
        sel     d, SCRATCH, d
Packit 030a23
        b       02f
Packit 030a23
 .if offset == 0
Packit 030a23
48:     /* Last mov d,#0 of the set - used as part of shortcut for
Packit 030a23
         * source values all 0 */
Packit 030a23
 .endif
Packit 030a23
01:     mov     d, #0
Packit 030a23
02:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro in_reverse_8888_8888_tail  numbytes, reg1, reg2, reg3, reg4
Packit 030a23
 .if numbytes == 4
Packit 030a23
        teq     ORIG_W, ORIG_W, asr #32
Packit 030a23
        ldrne   WK&reg1, [DST, #-4]
Packit 030a23
 .elseif numbytes == 8
Packit 030a23
        teq     ORIG_W, WK&reg1
Packit 030a23
        teqeq   ORIG_W, ORIG_W, asr #32  /* all 0 or all -1? */
Packit 030a23
        ldmnedb DST, {WK&reg1-WK&reg2}
Packit 030a23
 .else
Packit 030a23
        teq     ORIG_W, WK&reg1
Packit 030a23
        teqeq   ORIG_W, WK&reg2
Packit 030a23
        teqeq   ORIG_W, WK&reg3
Packit 030a23
        teqeq   ORIG_W, ORIG_W, asr #32  /* all 0 or all -1? */
Packit 030a23
        ldmnedb DST, {WK&reg1-WK&reg4}
Packit 030a23
 .endif
Packit 030a23
        cmnne   DST, #0   /* clear C if NE */
Packit 030a23
        bcs     49f       /* no writes to dest if source all -1 */
Packit 030a23
        beq     48f       /* set dest to all 0 if source all 0 */
Packit 030a23
 .if numbytes == 4
Packit 030a23
        in_reverse_8888_8888_1pixel  ORIG_W, WK&reg1, 0, 1
Packit 030a23
        str     WK&reg1, [DST, #-4]
Packit 030a23
 .elseif numbytes == 8
Packit 030a23
        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg1, -4, 0
Packit 030a23
        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg2, 0, 0
Packit 030a23
        stmdb   DST, {WK&reg1-WK&reg2}
Packit 030a23
 .else
Packit 030a23
        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg1, -12, 0
Packit 030a23
        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg2, -8, 0
Packit 030a23
        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg3, -4, 0
Packit 030a23
        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg4, 0, 0
Packit 030a23
        stmdb   DST, {WK&reg1-WK&reg4}
Packit 030a23
 .endif
Packit 030a23
49:
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro in_reverse_8888_8888_process_tail  cond, numbytes, firstreg
Packit 030a23
        in_reverse_8888_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    in_reverse_8888_8888_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    in_reverse_8888_8888_process_head, \
Packit 030a23
    in_reverse_8888_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/
Packit 030a23
Packit 030a23
.macro over_n_8888_init
Packit 030a23
        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
Packit 030a23
        /* Hold loop invariant in MASK */
Packit 030a23
        ldr     MASK, =0x00800080
Packit 030a23
        /* Hold multiplier for destination in STRIDE_M */
Packit 030a23
        mov     STRIDE_M, #255
Packit 030a23
        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
Packit 030a23
        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
Packit 030a23
        uadd8   SCRATCH, MASK, MASK
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
Packit 030a23
        pixld   , numbytes, firstreg, DST, 0
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_1pixel dst
Packit 030a23
        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
Packit 030a23
        uqadd8  WK&dst, WK&dst, SRC
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro over_n_8888_process_tail  cond, numbytes, firstreg
Packit 030a23
 .set PROCESS_REG, firstreg
Packit 030a23
 .rept numbytes / 4
Packit 030a23
        over_n_8888_1pixel %(PROCESS_REG)
Packit 030a23
  .set PROCESS_REG, PROCESS_REG+1
Packit 030a23
 .endr
Packit 030a23
        pixst   , numbytes, firstreg, DST
Packit 030a23
.endm
Packit 030a23
Packit 030a23
generate_composite_function \
Packit 030a23
    pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
Packit 030a23
    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
Packit 030a23
    2, /* prefetch distance */ \
Packit 030a23
    over_n_8888_init, \
Packit 030a23
    nop_macro, /* newline */ \
Packit 030a23
    nop_macro, /* cleanup */ \
Packit 030a23
    over_n_8888_process_head, \
Packit 030a23
    over_n_8888_process_tail
Packit 030a23
Packit 030a23
/******************************************************************************/