Blame pixman/pixman-mips-dspr2-asm.h

Packit 030a23
/*
Packit 030a23
 * Copyright (c) 2012
Packit 030a23
 *      MIPS Technologies, Inc., California.
Packit 030a23
 *
Packit 030a23
 * Redistribution and use in source and binary forms, with or without
Packit 030a23
 * modification, are permitted provided that the following conditions
Packit 030a23
 * are met:
Packit 030a23
 * 1. Redistributions of source code must retain the above copyright
Packit 030a23
 *    notice, this list of conditions and the following disclaimer.
Packit 030a23
 * 2. Redistributions in binary form must reproduce the above copyright
Packit 030a23
 *    notice, this list of conditions and the following disclaimer in the
Packit 030a23
 *    documentation and/or other materials provided with the distribution.
Packit 030a23
 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
Packit 030a23
 *    contributors may be used to endorse or promote products derived from
Packit 030a23
 *    this software without specific prior written permission.
Packit 030a23
 *
Packit 030a23
 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
Packit 030a23
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 030a23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit 030a23
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
Packit 030a23
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
Packit 030a23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
Packit 030a23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
Packit 030a23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
Packit 030a23
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
Packit 030a23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
Packit 030a23
 * SUCH DAMAGE.
Packit 030a23
 *
Packit 030a23
 * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
Packit 030a23
 */
Packit 030a23
Packit 030a23
#ifndef PIXMAN_MIPS_DSPR2_ASM_H
Packit 030a23
#define PIXMAN_MIPS_DSPR2_ASM_H
Packit 030a23
Packit 030a23
#define zero $0
Packit 030a23
#define AT   $1
Packit 030a23
#define v0   $2
Packit 030a23
#define v1   $3
Packit 030a23
#define a0   $4
Packit 030a23
#define a1   $5
Packit 030a23
#define a2   $6
Packit 030a23
#define a3   $7
Packit 030a23
#define t0   $8
Packit 030a23
#define t1   $9
Packit 030a23
#define t2   $10
Packit 030a23
#define t3   $11
Packit 030a23
#define t4   $12
Packit 030a23
#define t5   $13
Packit 030a23
#define t6   $14
Packit 030a23
#define t7   $15
Packit 030a23
#define s0   $16
Packit 030a23
#define s1   $17
Packit 030a23
#define s2   $18
Packit 030a23
#define s3   $19
Packit 030a23
#define s4   $20
Packit 030a23
#define s5   $21
Packit 030a23
#define s6   $22
Packit 030a23
#define s7   $23
Packit 030a23
#define t8   $24
Packit 030a23
#define t9   $25
Packit 030a23
#define k0   $26
Packit 030a23
#define k1   $27
Packit 030a23
#define gp   $28
Packit 030a23
#define sp   $29
Packit 030a23
#define fp   $30
Packit 030a23
#define s8   $30
Packit 030a23
#define ra   $31
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
Packit 030a23
 */
Packit 030a23
#define LEAF_MIPS32R2(symbol)                           \
Packit 030a23
                .globl  symbol;                         \
Packit 030a23
                .align  2;                              \
Packit 030a23
                .hidden symbol;                         \
Packit 030a23
                .type   symbol, @function;              \
Packit 030a23
                .ent    symbol, 0;                      \
Packit 030a23
symbol:         .frame  sp, 0, ra;                      \
Packit 030a23
                .set    push;                           \
Packit 030a23
                .set    arch=mips32r2;                  \
Packit 030a23
                .set    noreorder;                      \
Packit 030a23
                .set    noat;
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2
Packit 030a23
 */
Packit 030a23
#define LEAF_MIPS_DSPR2(symbol)                         \
Packit 030a23
LEAF_MIPS32R2(symbol)                                   \
Packit 030a23
                .set    dspr2;
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * END - mark end of function
Packit 030a23
 */
Packit 030a23
#define END(function)                                   \
Packit 030a23
                .set    pop;                            \
Packit 030a23
                .end    function;                       \
Packit 030a23
                .size   function,.-function
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Checks if stack offset is big enough for storing/restoring regs_num
Packit 030a23
 * number of register to/from stack. Stack offset must be greater than
Packit 030a23
 * or equal to the number of bytes needed for storing registers (regs_num*4).
Packit 030a23
 * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
Packit 030a23
 * preserved for input arguments of the functions, already stored in a0-a3),
Packit 030a23
 * stack size can be further optimized by utilizing this space.
Packit 030a23
 */
Packit 030a23
.macro CHECK_STACK_OFFSET regs_num, stack_offset
Packit 030a23
.if \stack_offset < \regs_num * 4 - 16
Packit 030a23
.error "Stack offset too small."
Packit 030a23
.endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Saves set of registers on stack. Maximum number of registers that
Packit 030a23
 * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
Packit 030a23
 * Stack offset is number of bytes that are added to stack pointer (sp)
Packit 030a23
 * before registers are pushed in order to provide enough space on stack
Packit 030a23
 * (offset must be multiple of 4, and must be big enough, as described by
Packit 030a23
 * CHECK_STACK_OFFSET macro). This macro is intended to be used in
Packit 030a23
 * combination with RESTORE_REGS_FROM_STACK macro. Example:
Packit 030a23
 *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
Packit 030a23
 *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
Packit 030a23
 */
Packit 030a23
.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
Packit 030a23
                          r2  = 0, r3  = 0, r4  = 0, \
Packit 030a23
                          r5  = 0, r6  = 0, r7  = 0, \
Packit 030a23
                          r8  = 0, r9  = 0, r10 = 0, \
Packit 030a23
                          r11 = 0, r12 = 0, r13 = 0, \
Packit 030a23
                          r14 = 0
Packit 030a23
    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
Packit 030a23
    .error "Stack offset must be pozitive and multiple of 4."
Packit 030a23
    .endif
Packit 030a23
    .if \stack_offset != 0
Packit 030a23
    addiu           sp, sp, -\stack_offset
Packit 030a23
    .endif
Packit 030a23
    sw              \r1, 0(sp)
Packit 030a23
    .if \r2 != 0
Packit 030a23
    sw              \r2, 4(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r3 != 0
Packit 030a23
    sw              \r3, 8(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r4 != 0
Packit 030a23
    sw              \r4, 12(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r5 != 0
Packit 030a23
    CHECK_STACK_OFFSET 5, \stack_offset
Packit 030a23
    sw              \r5, 16(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r6 != 0
Packit 030a23
    CHECK_STACK_OFFSET 6, \stack_offset
Packit 030a23
    sw              \r6, 20(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r7 != 0
Packit 030a23
    CHECK_STACK_OFFSET 7, \stack_offset
Packit 030a23
    sw              \r7, 24(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r8 != 0
Packit 030a23
    CHECK_STACK_OFFSET 8, \stack_offset
Packit 030a23
    sw              \r8, 28(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r9 != 0
Packit 030a23
    CHECK_STACK_OFFSET 9, \stack_offset
Packit 030a23
    sw              \r9, 32(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r10 != 0
Packit 030a23
    CHECK_STACK_OFFSET 10, \stack_offset
Packit 030a23
    sw              \r10, 36(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r11 != 0
Packit 030a23
    CHECK_STACK_OFFSET 11, \stack_offset
Packit 030a23
    sw              \r11, 40(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r12 != 0
Packit 030a23
    CHECK_STACK_OFFSET 12, \stack_offset
Packit 030a23
    sw              \r12, 44(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r13 != 0
Packit 030a23
    CHECK_STACK_OFFSET 13, \stack_offset
Packit 030a23
    sw              \r13, 48(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r14 != 0
Packit 030a23
    CHECK_STACK_OFFSET 14, \stack_offset
Packit 030a23
    sw              \r14, 52(sp)
Packit 030a23
    .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Restores set of registers from stack. Maximum number of registers that
Packit 030a23
 * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
Packit 030a23
 * Stack offset is number of bytes that are added to stack pointer (sp)
Packit 030a23
 * after registers are restored (offset must be multiple of 4, and must
Packit 030a23
 * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
Packit 030a23
 * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
Packit 030a23
 * Example:
Packit 030a23
 *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
Packit 030a23
 *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
Packit 030a23
 */
Packit 030a23
.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
Packit 030a23
                               r2  = 0, r3  = 0, r4  = 0, \
Packit 030a23
                               r5  = 0, r6  = 0, r7  = 0, \
Packit 030a23
                               r8  = 0, r9  = 0, r10 = 0, \
Packit 030a23
                               r11 = 0, r12 = 0, r13 = 0, \
Packit 030a23
                               r14 = 0
Packit 030a23
    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
Packit 030a23
    .error "Stack offset must be pozitive and multiple of 4."
Packit 030a23
    .endif
Packit 030a23
    lw              \r1, 0(sp)
Packit 030a23
    .if \r2 != 0
Packit 030a23
    lw              \r2, 4(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r3 != 0
Packit 030a23
    lw              \r3, 8(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r4 != 0
Packit 030a23
    lw              \r4, 12(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r5 != 0
Packit 030a23
    CHECK_STACK_OFFSET 5, \stack_offset
Packit 030a23
    lw              \r5, 16(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r6 != 0
Packit 030a23
    CHECK_STACK_OFFSET 6, \stack_offset
Packit 030a23
    lw              \r6, 20(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r7 != 0
Packit 030a23
    CHECK_STACK_OFFSET 7, \stack_offset
Packit 030a23
    lw              \r7, 24(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r8 != 0
Packit 030a23
    CHECK_STACK_OFFSET 8, \stack_offset
Packit 030a23
    lw              \r8, 28(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r9 != 0
Packit 030a23
    CHECK_STACK_OFFSET 9, \stack_offset
Packit 030a23
    lw              \r9, 32(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r10 != 0
Packit 030a23
    CHECK_STACK_OFFSET 10, \stack_offset
Packit 030a23
    lw              \r10, 36(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r11 != 0
Packit 030a23
    CHECK_STACK_OFFSET 11, \stack_offset
Packit 030a23
    lw              \r11, 40(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r12 != 0
Packit 030a23
    CHECK_STACK_OFFSET 12, \stack_offset
Packit 030a23
    lw              \r12, 44(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r13 != 0
Packit 030a23
    CHECK_STACK_OFFSET 13, \stack_offset
Packit 030a23
    lw              \r13, 48(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \r14 != 0
Packit 030a23
    CHECK_STACK_OFFSET 14, \stack_offset
Packit 030a23
    lw              \r14, 52(sp)
Packit 030a23
    .endif
Packit 030a23
    .if \stack_offset != 0
Packit 030a23
    addiu           sp, sp, \stack_offset
Packit 030a23
    .endif
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
Packit 030a23
 * returned in (out_8888) register. Requires two temporary registers
Packit 030a23
 * (scratch1 and scratch2).
Packit 030a23
 */
Packit 030a23
.macro CONVERT_1x0565_TO_1x8888 in_565,   \
Packit 030a23
                                out_8888, \
Packit 030a23
                                scratch1, scratch2
Packit 030a23
    lui     \out_8888, 0xff00
Packit 030a23
    sll     \scratch1, \in_565,   0x3
Packit 030a23
    andi    \scratch2, \scratch1, 0xff
Packit 030a23
    ext     \scratch1, \in_565,   0x2, 0x3
Packit 030a23
    or      \scratch1, \scratch2, \scratch1
Packit 030a23
    or      \out_8888, \out_8888, \scratch1
Packit 030a23
Packit 030a23
    sll     \scratch1, \in_565,   0x5
Packit 030a23
    andi    \scratch1, \scratch1, 0xfc00
Packit 030a23
    srl     \scratch2, \in_565,   0x1
Packit 030a23
    andi    \scratch2, \scratch2, 0x300
Packit 030a23
    or      \scratch2, \scratch1, \scratch2
Packit 030a23
    or      \out_8888, \out_8888, \scratch2
Packit 030a23
Packit 030a23
    andi    \scratch1, \in_565,   0xf800
Packit 030a23
    srl     \scratch2, \scratch1, 0x5
Packit 030a23
    andi    \scratch2, \scratch2, 0xff00
Packit 030a23
    or      \scratch1, \scratch1, \scratch2
Packit 030a23
    sll     \scratch1, \scratch1, 0x8
Packit 030a23
    or      \out_8888, \out_8888, \scratch1
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels
Packit 030a23
 * returned in (out1_8888 and out2_8888) registers. Requires four scratch
Packit 030a23
 * registers (scratch1 ... scratch4). It also requires maskG and maskB for
Packit 030a23
 * color component extractions. These masks must have following values:
Packit 030a23
 *   li       maskG, 0x07e007e0
Packit 030a23
 *   li       maskB, 0x001F001F
Packit 030a23
 */
Packit 030a23
.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565,     \
Packit 030a23
                                out1_8888, out2_8888, \
Packit 030a23
                                maskG, maskB,         \
Packit 030a23
                                scratch1, scratch2, scratch3, scratch4
Packit 030a23
    sll               \scratch1,  \in1_565,   16
Packit 030a23
    or                \scratch1,  \scratch1,  \in2_565
Packit 030a23
    lui               \out2_8888, 0xff00
Packit 030a23
    ori               \out2_8888, \out2_8888, 0xff00
Packit 030a23
    shrl.ph           \scratch2,  \scratch1,  11
Packit 030a23
    and               \scratch3,  \scratch1,  \maskG
Packit 030a23
    shra.ph           \scratch4,  \scratch2,  2
Packit 030a23
    shll.ph           \scratch2,  \scratch2,  3
Packit 030a23
    shll.ph           \scratch3,  \scratch3,  5
Packit 030a23
    or                \scratch2,  \scratch2,  \scratch4
Packit 030a23
    shrl.qb           \scratch4,  \scratch3,  6
Packit 030a23
    or                \out2_8888, \out2_8888, \scratch2
Packit 030a23
    or                \scratch3,  \scratch3,  \scratch4
Packit 030a23
    and               \scratch1,  \scratch1,  \maskB
Packit 030a23
    shll.ph           \scratch2,  \scratch1,  3
Packit 030a23
    shra.ph           \scratch4,  \scratch1,  2
Packit 030a23
    or                \scratch2,  \scratch2,  \scratch4
Packit 030a23
    or                \scratch3,  \scratch2,  \scratch3
Packit 030a23
    precrq.ph.w       \out1_8888, \out2_8888, \scratch3
Packit 030a23
    precr_sra.ph.w    \out2_8888, \scratch3,  0
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
Packit 030a23
 * returned in (out_565) register. Requires two temporary registers
Packit 030a23
 * (scratch1 and scratch2).
Packit 030a23
 */
Packit 030a23
.macro CONVERT_1x8888_TO_1x0565 in_8888, \
Packit 030a23
                                out_565, \
Packit 030a23
                                scratch1, scratch2
Packit 030a23
    ext     \out_565,  \in_8888,  0x3, 0x5
Packit 030a23
    srl     \scratch1, \in_8888,  0x5
Packit 030a23
    andi    \scratch1, \scratch1, 0x07e0
Packit 030a23
    srl     \scratch2, \in_8888,  0x8
Packit 030a23
    andi    \scratch2, \scratch2, 0xf800
Packit 030a23
    or      \out_565,  \out_565,  \scratch1
Packit 030a23
    or      \out_565,  \out_565,  \scratch2
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5
Packit 030a23
 * pixels returned in (out1_565 and out2_565) registers. Requires two temporary
Packit 030a23
 * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB
Packit 030a23
 * for color component extractions. These masks must have following values:
Packit 030a23
 *   li       maskR, 0xf800f800
Packit 030a23
 *   li       maskG, 0x07e007e0
Packit 030a23
 *   li       maskB, 0x001F001F
Packit 030a23
 * Value of input register in2_8888 is lost.
Packit 030a23
 */
Packit 030a23
.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888,  \
Packit 030a23
                                out1_565, out2_565,  \
Packit 030a23
                                maskR, maskG, maskB, \
Packit 030a23
                                scratch1, scratch2
Packit 030a23
    precr.qb.ph    \scratch1, \in2_8888, \in1_8888
Packit 030a23
    precrq.qb.ph   \in2_8888, \in2_8888, \in1_8888
Packit 030a23
    and            \out1_565, \scratch1, \maskR
Packit 030a23
    shrl.ph        \scratch1, \scratch1, 3
Packit 030a23
    shll.ph        \in2_8888, \in2_8888, 3
Packit 030a23
    and            \scratch1, \scratch1, \maskB
Packit 030a23
    or             \out1_565, \out1_565, \scratch1
Packit 030a23
    and            \in2_8888, \in2_8888, \maskG
Packit 030a23
    or             \out1_565, \out1_565, \in2_8888
Packit 030a23
    srl            \out2_565, \out1_565, 16
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed
Packit 030a23
 * for rounding process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro MIPS_UN8x4_MUL_UN8 s_8888,  \
Packit 030a23
                          m_8,     \
Packit 030a23
                          d_8888,  \
Packit 030a23
                          maskLSR, \
Packit 030a23
                          scratch1, scratch2, scratch3
Packit 030a23
    replv.ph          \m_8,      \m_8                 /*   0 | M | 0 | M */
Packit 030a23
    muleu_s.ph.qbl    \scratch1, \s_8888,   \m_8      /*    A*M  |  R*M */
Packit 030a23
    muleu_s.ph.qbr    \scratch2, \s_8888,   \m_8      /*    G*M  |  B*M */
Packit 030a23
    shra_r.ph         \scratch3, \scratch1, 8
Packit 030a23
    shra_r.ph         \d_8888,   \scratch2, 8
Packit 030a23
    and               \scratch3, \scratch3, \maskLSR  /*   0 |A*M| 0 |R*M */
Packit 030a23
    and               \d_8888,   \d_8888,   \maskLSR  /*   0 |G*M| 0 |B*M */
Packit 030a23
    addq.ph           \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */
Packit 030a23
    addq.ph           \scratch2, \scratch2, \d_8888   /* G*M+G*M | B*M+B*M */
Packit 030a23
    shra_r.ph         \scratch1, \scratch1, 8
Packit 030a23
    shra_r.ph         \scratch2, \scratch2, 8
Packit 030a23
    precr.qb.ph       \d_8888,   \scratch1, \scratch2
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR
Packit 030a23
 * needed for rounding process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \
Packit 030a23
                              s2_8888, \
Packit 030a23
                              m1_8,    \
Packit 030a23
                              m2_8,    \
Packit 030a23
                              d1_8888, \
Packit 030a23
                              d2_8888, \
Packit 030a23
                              maskLSR, \
Packit 030a23
                              scratch1, scratch2, scratch3, \
Packit 030a23
                              scratch4, scratch5, scratch6
Packit 030a23
    replv.ph          \m1_8,     \m1_8                /*  0 | M1 | 0 | M1 */
Packit 030a23
    replv.ph          \m2_8,     \m2_8                /*  0 | M2 | 0 | M2 */
Packit 030a23
    muleu_s.ph.qbl    \scratch1, \s1_8888,  \m1_8     /*  A1*M1  |  R1*M1 */
Packit 030a23
    muleu_s.ph.qbr    \scratch2, \s1_8888,  \m1_8     /*  G1*M1  |  B1*M1 */
Packit 030a23
    muleu_s.ph.qbl    \scratch3, \s2_8888,  \m2_8     /*  A2*M2  |  R2*M2 */
Packit 030a23
    muleu_s.ph.qbr    \scratch4, \s2_8888,  \m2_8     /*  G2*M2  |  B2*M2 */
Packit 030a23
    shra_r.ph         \scratch5, \scratch1, 8
Packit 030a23
    shra_r.ph         \d1_8888,  \scratch2, 8
Packit 030a23
    shra_r.ph         \scratch6, \scratch3, 8
Packit 030a23
    shra_r.ph         \d2_8888,  \scratch4, 8
Packit 030a23
    and               \scratch5, \scratch5, \maskLSR  /* 0 |A1*M1| 0 |R1*M1 */
Packit 030a23
    and               \d1_8888,  \d1_8888,  \maskLSR  /* 0 |G1*M1| 0 |B1*M1 */
Packit 030a23
    and               \scratch6, \scratch6, \maskLSR  /* 0 |A2*M2| 0 |R2*M2 */
Packit 030a23
    and               \d2_8888,  \d2_8888,  \maskLSR  /* 0 |G2*M2| 0 |B2*M2 */
Packit 030a23
    addq.ph           \scratch1, \scratch1, \scratch5
Packit 030a23
    addq.ph           \scratch2, \scratch2, \d1_8888
Packit 030a23
    addq.ph           \scratch3, \scratch3, \scratch6
Packit 030a23
    addq.ph           \scratch4, \scratch4, \d2_8888
Packit 030a23
    shra_r.ph         \scratch1, \scratch1, 8
Packit 030a23
    shra_r.ph         \scratch2, \scratch2, 8
Packit 030a23
    shra_r.ph         \scratch3, \scratch3, 8
Packit 030a23
    shra_r.ph         \scratch4, \scratch4, 8
Packit 030a23
    precr.qb.ph       \d1_8888,  \scratch1, \scratch2
Packit 030a23
    precr.qb.ph       \d2_8888,  \scratch3, \scratch4
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR
Packit 030a23
 * needed for rounding process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro MIPS_UN8x4_MUL_UN8x4 s_8888,  \
Packit 030a23
                            m_8888,  \
Packit 030a23
                            d_8888,  \
Packit 030a23
                            maskLSR, \
Packit 030a23
                            scratch1, scratch2, scratch3, scratch4
Packit 030a23
    preceu.ph.qbl     \scratch1, \m_8888              /*   0 | A | 0 | R */
Packit 030a23
    preceu.ph.qbr     \scratch2, \m_8888              /*   0 | G | 0 | B */
Packit 030a23
    muleu_s.ph.qbl    \scratch3, \s_8888,   \scratch1 /*    A*A  |  R*R */
Packit 030a23
    muleu_s.ph.qbr    \scratch4, \s_8888,   \scratch2 /*    G*G  |  B*B */
Packit 030a23
    shra_r.ph         \scratch1, \scratch3, 8
Packit 030a23
    shra_r.ph         \scratch2, \scratch4, 8
Packit 030a23
    and               \scratch1, \scratch1, \maskLSR  /*   0 |A*A| 0 |R*R */
Packit 030a23
    and               \scratch2, \scratch2, \maskLSR  /*   0 |G*G| 0 |B*B */
Packit 030a23
    addq.ph           \scratch1, \scratch1, \scratch3
Packit 030a23
    addq.ph           \scratch2, \scratch2, \scratch4
Packit 030a23
    shra_r.ph         \scratch1, \scratch1, 8
Packit 030a23
    shra_r.ph         \scratch2, \scratch2, 8
Packit 030a23
    precr.qb.ph       \d_8888,   \scratch1, \scratch2
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires
Packit 030a23
 * maskLSR needed for rounding process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
Packit 030a23
.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888,  \
Packit 030a23
                                s2_8888,  \
Packit 030a23
                                m1_8888,  \
Packit 030a23
                                m2_8888,  \
Packit 030a23
                                d1_8888,  \
Packit 030a23
                                d2_8888,  \
Packit 030a23
                                maskLSR,  \
Packit 030a23
                                scratch1, scratch2, scratch3, \
Packit 030a23
                                scratch4, scratch5, scratch6
Packit 030a23
    preceu.ph.qbl     \scratch1, \m1_8888             /*   0 | A | 0 | R */
Packit 030a23
    preceu.ph.qbr     \scratch2, \m1_8888             /*   0 | G | 0 | B */
Packit 030a23
    preceu.ph.qbl     \scratch3, \m2_8888             /*   0 | A | 0 | R */
Packit 030a23
    preceu.ph.qbr     \scratch4, \m2_8888             /*   0 | G | 0 | B */
Packit 030a23
    muleu_s.ph.qbl    \scratch5, \s1_8888,  \scratch1 /*    A*A  |  R*R */
Packit 030a23
    muleu_s.ph.qbr    \scratch6, \s1_8888,  \scratch2 /*    G*G  |  B*B */
Packit 030a23
    muleu_s.ph.qbl    \scratch1, \s2_8888,  \scratch3 /*    A*A  |  R*R */
Packit 030a23
    muleu_s.ph.qbr    \scratch2, \s2_8888,  \scratch4 /*    G*G  |  B*B */
Packit 030a23
    shra_r.ph         \scratch3, \scratch5, 8
Packit 030a23
    shra_r.ph         \scratch4, \scratch6, 8
Packit 030a23
    shra_r.ph         \d1_8888,  \scratch1, 8
Packit 030a23
    shra_r.ph         \d2_8888,  \scratch2, 8
Packit 030a23
    and               \scratch3, \scratch3, \maskLSR  /*   0 |A*A| 0 |R*R */
Packit 030a23
    and               \scratch4, \scratch4, \maskLSR  /*   0 |G*G| 0 |B*B */
Packit 030a23
    and               \d1_8888,  \d1_8888,  \maskLSR  /*   0 |A*A| 0 |R*R */
Packit 030a23
    and               \d2_8888,  \d2_8888,  \maskLSR  /*   0 |G*G| 0 |B*B */
Packit 030a23
    addq.ph           \scratch3, \scratch3, \scratch5
Packit 030a23
    addq.ph           \scratch4, \scratch4, \scratch6
Packit 030a23
    addq.ph           \d1_8888,  \d1_8888,  \scratch1
Packit 030a23
    addq.ph           \d2_8888,  \d2_8888,  \scratch2
Packit 030a23
    shra_r.ph         \scratch3, \scratch3, 8
Packit 030a23
    shra_r.ph         \scratch4, \scratch4, 8
Packit 030a23
    shra_r.ph         \scratch5, \d1_8888,  8
Packit 030a23
    shra_r.ph         \scratch6, \d2_8888,  8
Packit 030a23
    precr.qb.ph       \d1_8888,  \scratch3, \scratch4
Packit 030a23
    precr.qb.ph       \d2_8888,  \scratch5, \scratch6
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
Packit 030a23
 * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
Packit 030a23
 * needed for rounding process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro OVER_8888_8_8888 s_8888,   \
Packit 030a23
                        m_8,      \
Packit 030a23
                        d_8888,   \
Packit 030a23
                        out_8888, \
Packit 030a23
                        maskLSR,  \
Packit 030a23
                        scratch1, scratch2, scratch3, scratch4
Packit 030a23
    MIPS_UN8x4_MUL_UN8 \s_8888,   \m_8, \
Packit 030a23
                       \scratch1, \maskLSR, \
Packit 030a23
                       \scratch2, \scratch3, \scratch4
Packit 030a23
Packit 030a23
    not                \scratch2, \scratch1
Packit 030a23
    srl                \scratch2, \scratch2, 24
Packit 030a23
Packit 030a23
    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch2, \
Packit 030a23
                       \d_8888,   \maskLSR,  \
Packit 030a23
                       \scratch3, \scratch4, \out_8888
Packit 030a23
Packit 030a23
    addu_s.qb          \out_8888, \d_8888,   \scratch1
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
Packit 030a23
 * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and
Packit 030a23
 * m2_8). It also requires maskLSR needed for rounding process. maskLSR must
Packit 030a23
 * have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro OVER_2x8888_2x8_2x8888 s1_8888,   \
Packit 030a23
                              s2_8888,   \
Packit 030a23
                              m1_8,      \
Packit 030a23
                              m2_8,      \
Packit 030a23
                              d1_8888,   \
Packit 030a23
                              d2_8888,   \
Packit 030a23
                              out1_8888, \
Packit 030a23
                              out2_8888, \
Packit 030a23
                              maskLSR,   \
Packit 030a23
                              scratch1, scratch2, scratch3, \
Packit 030a23
                              scratch4, scratch5, scratch6
Packit 030a23
    MIPS_2xUN8x4_MUL_2xUN8 \s1_8888,   \s2_8888, \
Packit 030a23
                           \m1_8,      \m2_8, \
Packit 030a23
                           \scratch1,  \scratch2, \
Packit 030a23
                           \maskLSR, \
Packit 030a23
                           \scratch3,  \scratch4, \out1_8888, \
Packit 030a23
                           \out2_8888, \scratch5, \scratch6
Packit 030a23
Packit 030a23
    not                    \scratch3,  \scratch1
Packit 030a23
    srl                    \scratch3,  \scratch3, 24
Packit 030a23
    not                    \scratch4,  \scratch2
Packit 030a23
    srl                    \scratch4,  \scratch4, 24
Packit 030a23
Packit 030a23
    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
Packit 030a23
                           \scratch3,  \scratch4, \
Packit 030a23
                           \d1_8888,   \d2_8888, \
Packit 030a23
                           \maskLSR, \
Packit 030a23
                           \scratch5,  \scratch6, \out1_8888, \
Packit 030a23
                           \out2_8888, \scratch3, \scratch4
Packit 030a23
Packit 030a23
    addu_s.qb              \out1_8888, \d1_8888,  \scratch1
Packit 030a23
    addu_s.qb              \out2_8888, \d2_8888,  \scratch2
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
Packit 030a23
 * destination pixel (d_8888). It also requires maskLSR needed for rounding
Packit 030a23
 * process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro OVER_8888_8888 s_8888,   \
Packit 030a23
                      d_8888,   \
Packit 030a23
                      out_8888, \
Packit 030a23
                      maskLSR,  \
Packit 030a23
                      scratch1, scratch2, scratch3, scratch4
Packit 030a23
    not                \scratch1, \s_8888
Packit 030a23
    srl                \scratch1, \scratch1, 24
Packit 030a23
Packit 030a23
    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch1, \
Packit 030a23
                       \out_8888, \maskLSR, \
Packit 030a23
                       \scratch2, \scratch3, \scratch4
Packit 030a23
Packit 030a23
    addu_s.qb          \out_8888, \out_8888, \s_8888
Packit 030a23
.endm
Packit 030a23
Packit 030a23
/*
Packit 030a23
 * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
Packit 030a23
 * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
Packit 030a23
 * needed for rounding process. maskLSR must have following value:
Packit 030a23
 *   li       maskLSR, 0x00ff00ff
Packit 030a23
 */
Packit 030a23
.macro OVER_2x8888_2x8888 s1_8888,   \
Packit 030a23
                          s2_8888,   \
Packit 030a23
                          d1_8888,   \
Packit 030a23
                          d2_8888,   \
Packit 030a23
                          out1_8888, \
Packit 030a23
                          out2_8888, \
Packit 030a23
                          maskLSR,   \
Packit 030a23
                          scratch1, scratch2, scratch3, \
Packit 030a23
                          scratch4, scratch5, scratch6
Packit 030a23
    not                    \scratch1,  \s1_8888
Packit 030a23
    srl                    \scratch1,  \scratch1,  24
Packit 030a23
    not                    \scratch2,  \s2_8888
Packit 030a23
    srl                    \scratch2,  \scratch2,  24
Packit 030a23
    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
Packit 030a23
                           \scratch1,  \scratch2,  \
Packit 030a23
                           \out1_8888, \out2_8888, \
Packit 030a23
                           \maskLSR, \
Packit 030a23
                           \scratch3,  \scratch4, \scratch5, \
Packit 030a23
                           \scratch6,  \d1_8888,  \d2_8888
Packit 030a23
Packit 030a23
    addu_s.qb              \out1_8888, \out1_8888, \s1_8888
Packit 030a23
    addu_s.qb              \out2_8888, \out2_8888, \s2_8888
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
Packit 030a23
                                    m_8,      \
Packit 030a23
                                    d_8888,   \
Packit 030a23
                                    out_8888, \
Packit 030a23
                                    maskLSR,  \
Packit 030a23
                                    scratch1, scratch2, scratch3
Packit 030a23
    MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
Packit 030a23
                       \out_8888, \maskLSR, \
Packit 030a23
                       \scratch1, \scratch2, \scratch3
Packit 030a23
Packit 030a23
    addu_s.qb          \out_8888, \out_8888, \d_8888
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888,   \
Packit 030a23
                             s2_8888,   \
Packit 030a23
                             m1_8,      \
Packit 030a23
                             m2_8,      \
Packit 030a23
                             d1_8888,   \
Packit 030a23
                             d2_8888,   \
Packit 030a23
                             out1_8888, \
Packit 030a23
                             out2_8888, \
Packit 030a23
                             maskLSR,   \
Packit 030a23
                             scratch1,  scratch2, scratch3, \
Packit 030a23
                             scratch4, scratch5, scratch6
Packit 030a23
    MIPS_2xUN8x4_MUL_2xUN8 \s1_8888,   \s2_8888, \
Packit 030a23
                           \m1_8,      \m2_8, \
Packit 030a23
                           \out1_8888, \out2_8888, \
Packit 030a23
                           \maskLSR, \
Packit 030a23
                           \scratch1,  \scratch2, \scratch3, \
Packit 030a23
                           \scratch4,  \scratch5, \scratch6
Packit 030a23
Packit 030a23
    addu_s.qb             \out1_8888, \out1_8888, \d1_8888
Packit 030a23
    addu_s.qb             \out2_8888, \out2_8888, \d2_8888
Packit 030a23
.endm
Packit 030a23
Packit 030a23
.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br,         \
Packit 030a23
                                         scratch1, scratch2,     \
Packit 030a23
                                         alpha, red, green, blue \
Packit 030a23
                                         wt1, wt2, wb1, wb2
Packit 030a23
    andi            \scratch1, \tl,  0xff
Packit 030a23
    andi            \scratch2, \tr,  0xff
Packit 030a23
    andi            \alpha,    \bl,  0xff
Packit 030a23
    andi            \red,      \br,  0xff
Packit 030a23
Packit 030a23
    multu           $ac0,      \wt1, \scratch1
Packit 030a23
    maddu           $ac0,      \wt2, \scratch2
Packit 030a23
    maddu           $ac0,      \wb1, \alpha
Packit 030a23
    maddu           $ac0,      \wb2, \red
Packit 030a23
Packit 030a23
    ext             \scratch1, \tl,  8, 8
Packit 030a23
    ext             \scratch2, \tr,  8, 8
Packit 030a23
    ext             \alpha,    \bl,  8, 8
Packit 030a23
    ext             \red,      \br,  8, 8
Packit 030a23
Packit 030a23
    multu           $ac1,      \wt1, \scratch1
Packit 030a23
    maddu           $ac1,      \wt2, \scratch2
Packit 030a23
    maddu           $ac1,      \wb1, \alpha
Packit 030a23
    maddu           $ac1,      \wb2, \red
Packit 030a23
Packit 030a23
    ext             \scratch1, \tl,  16, 8
Packit 030a23
    ext             \scratch2, \tr,  16, 8
Packit 030a23
    ext             \alpha,    \bl,  16, 8
Packit 030a23
    ext             \red,      \br,  16, 8
Packit 030a23
Packit 030a23
    mflo            \blue,     $ac0
Packit 030a23
Packit 030a23
    multu           $ac2,      \wt1, \scratch1
Packit 030a23
    maddu           $ac2,      \wt2, \scratch2
Packit 030a23
    maddu           $ac2,      \wb1, \alpha
Packit 030a23
    maddu           $ac2,      \wb2, \red
Packit 030a23
Packit 030a23
    ext             \scratch1, \tl,  24, 8
Packit 030a23
    ext             \scratch2, \tr,  24, 8
Packit 030a23
    ext             \alpha,    \bl,  24, 8
Packit 030a23
    ext             \red,      \br,  24, 8
Packit 030a23
Packit 030a23
    mflo            \green,    $ac1
Packit 030a23
Packit 030a23
    multu           $ac3,      \wt1, \scratch1
Packit 030a23
    maddu           $ac3,      \wt2, \scratch2
Packit 030a23
    maddu           $ac3,      \wb1, \alpha
Packit 030a23
    maddu           $ac3,      \wb2, \red
Packit 030a23
Packit 030a23
    mflo            \red,      $ac2
Packit 030a23
    mflo            \alpha,    $ac3
Packit 030a23
Packit 030a23
    precr.qb.ph     \alpha,    \alpha, \red
Packit 030a23
    precr.qb.ph     \scratch1, \green, \blue
Packit 030a23
    precrq.qb.ph    \tl,       \alpha, \scratch1
Packit 030a23
.endm
Packit 030a23
Packit 030a23
#endif //PIXMAN_MIPS_DSPR2_ASM_H