|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* Copyright (c) 2012
|
|
Packit |
030a23 |
* MIPS Technologies, Inc., California.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* Redistribution and use in source and binary forms, with or without
|
|
Packit |
030a23 |
* modification, are permitted provided that the following conditions
|
|
Packit |
030a23 |
* are met:
|
|
Packit |
030a23 |
* 1. Redistributions of source code must retain the above copyright
|
|
Packit |
030a23 |
* notice, this list of conditions and the following disclaimer.
|
|
Packit |
030a23 |
* 2. Redistributions in binary form must reproduce the above copyright
|
|
Packit |
030a23 |
* notice, this list of conditions and the following disclaimer in the
|
|
Packit |
030a23 |
* documentation and/or other materials provided with the distribution.
|
|
Packit |
030a23 |
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
|
Packit |
030a23 |
* contributors may be used to endorse or promote products derived from
|
|
Packit |
030a23 |
* this software without specific prior written permission.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
|
|
Packit |
030a23 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
Packit |
030a23 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
Packit |
030a23 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
|
|
Packit |
030a23 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
Packit |
030a23 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
Packit |
030a23 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
Packit |
030a23 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
Packit |
030a23 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
Packit |
030a23 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
Packit |
030a23 |
* SUCH DAMAGE.
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
#include "pixman-mips-dspr2-asm.h"
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* This routine could be optimized for MIPS64. The current code only
|
|
Packit |
030a23 |
* uses MIPS32 instructions.
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
#ifdef EB
|
|
Packit |
030a23 |
# define LWHI lwl /* high part is left in big-endian */
|
|
Packit |
030a23 |
# define SWHI swl /* high part is left in big-endian */
|
|
Packit |
030a23 |
# define LWLO lwr /* low part is right in big-endian */
|
|
Packit |
030a23 |
# define SWLO swr /* low part is right in big-endian */
|
|
Packit |
030a23 |
#else
|
|
Packit |
030a23 |
# define LWHI lwr /* high part is right in little-endian */
|
|
Packit |
030a23 |
# define SWHI swr /* high part is right in little-endian */
|
|
Packit |
030a23 |
# define LWLO lwl /* low part is left in big-endian */
|
|
Packit |
030a23 |
# define SWLO swl /* low part is left in big-endian */
|
|
Packit |
030a23 |
#endif
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
LEAF_MIPS32R2(pixman_mips_fast_memcpy)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
slti AT, a2, 8
|
|
Packit |
030a23 |
bne AT, zero, $last8
|
|
Packit |
030a23 |
move v0, a0 /* memcpy returns the dst pointer */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Test if the src and dst are word-aligned, or can be made word-aligned */
|
|
Packit |
030a23 |
xor t8, a1, a0
|
|
Packit |
030a23 |
andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
bne t8, zero, $unaligned
|
|
Packit |
030a23 |
negu a3, a0
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */
|
|
Packit |
030a23 |
beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */
|
|
Packit |
030a23 |
subu a2, a2, a3 /* now a2 is the remining bytes count */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
LWHI t8, 0(a1)
|
|
Packit |
030a23 |
addu a1, a1, a3
|
|
Packit |
030a23 |
SWHI t8, 0(a0)
|
|
Packit |
030a23 |
addu a0, a0, a3
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Now the dst/src are mutually word-aligned with word-aligned addresses */
|
|
Packit |
030a23 |
$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
|
|
Packit |
030a23 |
/* t8 is the byte count after 64-byte chunks */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */
|
|
Packit |
030a23 |
/* There will be at most 1 32-byte chunk after it */
|
|
Packit |
030a23 |
subu a3, a2, t8 /* subtract from a2 the reminder */
|
|
Packit |
030a23 |
/* Here a3 counts bytes in 16w chunks */
|
|
Packit |
030a23 |
addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
addu t0, a0, a2 /* t0 is the "past the end" address */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
|
|
Packit |
030a23 |
* the "t0-32" address
|
|
Packit |
030a23 |
* This means: for x=128 the last "safe" a0 address is "t0-160"
|
|
Packit |
030a23 |
* Alternatively, for x=64 the last "safe" a0 address is "t0-96"
|
|
Packit |
030a23 |
* In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
pref 0, 0(a1) /* bring the first line of src, addr 0 */
|
|
Packit |
030a23 |
pref 0, 32(a1) /* bring the second line of src, addr 32 */
|
|
Packit |
030a23 |
pref 0, 64(a1) /* bring the third line of src, addr 64 */
|
|
Packit |
030a23 |
pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
|
|
Packit |
030a23 |
/* In case the a0 > t9 don't use "pref 30" at all */
|
|
Packit |
030a23 |
sgtu v1, a0, t9
|
|
Packit |
030a23 |
bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */
|
|
Packit |
030a23 |
nop
|
|
Packit |
030a23 |
/* otherwise, start with using pref30 */
|
|
Packit |
030a23 |
pref 30, 64(a0)
|
|
Packit |
030a23 |
$loop16w:
|
|
Packit |
030a23 |
pref 0, 96(a1)
|
|
Packit |
030a23 |
lw t0, 0(a1)
|
|
Packit |
030a23 |
bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */
|
|
Packit |
030a23 |
lw t1, 4(a1)
|
|
Packit |
030a23 |
pref 30, 96(a0) /* continue setting up the dest, addr 96 */
|
|
Packit |
030a23 |
$skip_pref30_96:
|
|
Packit |
030a23 |
lw t2, 8(a1)
|
|
Packit |
030a23 |
lw t3, 12(a1)
|
|
Packit |
030a23 |
lw t4, 16(a1)
|
|
Packit |
030a23 |
lw t5, 20(a1)
|
|
Packit |
030a23 |
lw t6, 24(a1)
|
|
Packit |
030a23 |
lw t7, 28(a1)
|
|
Packit |
030a23 |
pref 0, 128(a1) /* bring the next lines of src, addr 128 */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
sw t0, 0(a0)
|
|
Packit |
030a23 |
sw t1, 4(a0)
|
|
Packit |
030a23 |
sw t2, 8(a0)
|
|
Packit |
030a23 |
sw t3, 12(a0)
|
|
Packit |
030a23 |
sw t4, 16(a0)
|
|
Packit |
030a23 |
sw t5, 20(a0)
|
|
Packit |
030a23 |
sw t6, 24(a0)
|
|
Packit |
030a23 |
sw t7, 28(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
lw t0, 32(a1)
|
|
Packit |
030a23 |
bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */
|
|
Packit |
030a23 |
lw t1, 36(a1)
|
|
Packit |
030a23 |
pref 30, 128(a0) /* continue setting up the dest, addr 128 */
|
|
Packit |
030a23 |
$skip_pref30_128:
|
|
Packit |
030a23 |
lw t2, 40(a1)
|
|
Packit |
030a23 |
lw t3, 44(a1)
|
|
Packit |
030a23 |
lw t4, 48(a1)
|
|
Packit |
030a23 |
lw t5, 52(a1)
|
|
Packit |
030a23 |
lw t6, 56(a1)
|
|
Packit |
030a23 |
lw t7, 60(a1)
|
|
Packit |
030a23 |
pref 0, 160(a1) /* bring the next lines of src, addr 160 */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
sw t0, 32(a0)
|
|
Packit |
030a23 |
sw t1, 36(a0)
|
|
Packit |
030a23 |
sw t2, 40(a0)
|
|
Packit |
030a23 |
sw t3, 44(a0)
|
|
Packit |
030a23 |
sw t4, 48(a0)
|
|
Packit |
030a23 |
sw t5, 52(a0)
|
|
Packit |
030a23 |
sw t6, 56(a0)
|
|
Packit |
030a23 |
sw t7, 60(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
addiu a0, a0, 64 /* adding 64 to dest */
|
|
Packit |
030a23 |
sgtu v1, a0, t9
|
|
Packit |
030a23 |
bne a0, a3, $loop16w
|
|
Packit |
030a23 |
addiu a1, a1, 64 /* adding 64 to src */
|
|
Packit |
030a23 |
move a2, t8
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Here we have src and dest word-aligned but less than 64-bytes to go */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
$chk8w:
|
|
Packit |
030a23 |
pref 0, 0x0(a1)
|
|
Packit |
030a23 |
andi t8, a2, 0x1f /* is there a 32-byte chunk? */
|
|
Packit |
030a23 |
/* the t8 is the reminder count past 32-bytes */
|
|
Packit |
030a23 |
beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */
|
|
Packit |
030a23 |
nop
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
lw t0, 0(a1)
|
|
Packit |
030a23 |
lw t1, 4(a1)
|
|
Packit |
030a23 |
lw t2, 8(a1)
|
|
Packit |
030a23 |
lw t3, 12(a1)
|
|
Packit |
030a23 |
lw t4, 16(a1)
|
|
Packit |
030a23 |
lw t5, 20(a1)
|
|
Packit |
030a23 |
lw t6, 24(a1)
|
|
Packit |
030a23 |
lw t7, 28(a1)
|
|
Packit |
030a23 |
addiu a1, a1, 32
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
sw t0, 0(a0)
|
|
Packit |
030a23 |
sw t1, 4(a0)
|
|
Packit |
030a23 |
sw t2, 8(a0)
|
|
Packit |
030a23 |
sw t3, 12(a0)
|
|
Packit |
030a23 |
sw t4, 16(a0)
|
|
Packit |
030a23 |
sw t5, 20(a0)
|
|
Packit |
030a23 |
sw t6, 24(a0)
|
|
Packit |
030a23 |
sw t7, 28(a0)
|
|
Packit |
030a23 |
addiu a0, a0, 32
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
$chk1w:
|
|
Packit |
030a23 |
andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
|
|
Packit |
030a23 |
beq a2, t8, $last8
|
|
Packit |
030a23 |
subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
|
|
Packit |
030a23 |
addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* copying in words (4-byte chunks) */
|
|
Packit |
030a23 |
$wordCopy_loop:
|
|
Packit |
030a23 |
lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */
|
|
Packit |
030a23 |
addiu a1, a1, 4
|
|
Packit |
030a23 |
addiu a0, a0, 4
|
|
Packit |
030a23 |
bne a0, a3, $wordCopy_loop
|
|
Packit |
030a23 |
sw t3, -4(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* For the last (<8) bytes */
|
|
Packit |
030a23 |
$last8:
|
|
Packit |
030a23 |
blez a2, leave
|
|
Packit |
030a23 |
addu a3, a0, a2 /* a3 is the last dst address */
|
|
Packit |
030a23 |
$last8loop:
|
|
Packit |
030a23 |
lb v1, 0(a1)
|
|
Packit |
030a23 |
addiu a1, a1, 1
|
|
Packit |
030a23 |
addiu a0, a0, 1
|
|
Packit |
030a23 |
bne a0, a3, $last8loop
|
|
Packit |
030a23 |
sb v1, -1(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
leave: j ra
|
|
Packit |
030a23 |
nop
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* UNALIGNED case
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
$unaligned:
|
|
Packit |
030a23 |
/* got here with a3="negu a0" */
|
|
Packit |
030a23 |
andi a3, a3, 0x3 /* test if the a0 is word aligned */
|
|
Packit |
030a23 |
beqz a3, $ua_chk16w
|
|
Packit |
030a23 |
subu a2, a2, a3 /* bytes left after initial a3 bytes */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
LWHI v1, 0(a1)
|
|
Packit |
030a23 |
LWLO v1, 3(a1)
|
|
Packit |
030a23 |
addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */
|
|
Packit |
030a23 |
SWHI v1, 0(a0)
|
|
Packit |
030a23 |
addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
|
|
Packit |
030a23 |
/* t8 is the byte count after 64-byte chunks */
|
|
Packit |
030a23 |
beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */
|
|
Packit |
030a23 |
/* There will be at most 1 32-byte chunk after it */
|
|
Packit |
030a23 |
subu a3, a2, t8 /* subtract from a2 the reminder */
|
|
Packit |
030a23 |
/* Here a3 counts bytes in 16w chunks */
|
|
Packit |
030a23 |
addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
addu t0, a0, a2 /* t0 is the "past the end" address */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
pref 0, 0(a1) /* bring the first line of src, addr 0 */
|
|
Packit |
030a23 |
pref 0, 32(a1) /* bring the second line of src, addr 32 */
|
|
Packit |
030a23 |
pref 0, 64(a1) /* bring the third line of src, addr 64 */
|
|
Packit |
030a23 |
pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
|
|
Packit |
030a23 |
/* In case the a0 > t9 don't use "pref 30" at all */
|
|
Packit |
030a23 |
sgtu v1, a0, t9
|
|
Packit |
030a23 |
bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */
|
|
Packit |
030a23 |
nop
|
|
Packit |
030a23 |
/* otherwise, start with using pref30 */
|
|
Packit |
030a23 |
pref 30, 64(a0)
|
|
Packit |
030a23 |
$ua_loop16w:
|
|
Packit |
030a23 |
pref 0, 96(a1)
|
|
Packit |
030a23 |
LWHI t0, 0(a1)
|
|
Packit |
030a23 |
LWLO t0, 3(a1)
|
|
Packit |
030a23 |
LWHI t1, 4(a1)
|
|
Packit |
030a23 |
bgtz v1, $ua_skip_pref30_96
|
|
Packit |
030a23 |
LWLO t1, 7(a1)
|
|
Packit |
030a23 |
pref 30, 96(a0) /* continue setting up the dest, addr 96 */
|
|
Packit |
030a23 |
$ua_skip_pref30_96:
|
|
Packit |
030a23 |
LWHI t2, 8(a1)
|
|
Packit |
030a23 |
LWLO t2, 11(a1)
|
|
Packit |
030a23 |
LWHI t3, 12(a1)
|
|
Packit |
030a23 |
LWLO t3, 15(a1)
|
|
Packit |
030a23 |
LWHI t4, 16(a1)
|
|
Packit |
030a23 |
LWLO t4, 19(a1)
|
|
Packit |
030a23 |
LWHI t5, 20(a1)
|
|
Packit |
030a23 |
LWLO t5, 23(a1)
|
|
Packit |
030a23 |
LWHI t6, 24(a1)
|
|
Packit |
030a23 |
LWLO t6, 27(a1)
|
|
Packit |
030a23 |
LWHI t7, 28(a1)
|
|
Packit |
030a23 |
LWLO t7, 31(a1)
|
|
Packit |
030a23 |
pref 0, 128(a1) /* bring the next lines of src, addr 128 */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
sw t0, 0(a0)
|
|
Packit |
030a23 |
sw t1, 4(a0)
|
|
Packit |
030a23 |
sw t2, 8(a0)
|
|
Packit |
030a23 |
sw t3, 12(a0)
|
|
Packit |
030a23 |
sw t4, 16(a0)
|
|
Packit |
030a23 |
sw t5, 20(a0)
|
|
Packit |
030a23 |
sw t6, 24(a0)
|
|
Packit |
030a23 |
sw t7, 28(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
LWHI t0, 32(a1)
|
|
Packit |
030a23 |
LWLO t0, 35(a1)
|
|
Packit |
030a23 |
LWHI t1, 36(a1)
|
|
Packit |
030a23 |
bgtz v1, $ua_skip_pref30_128
|
|
Packit |
030a23 |
LWLO t1, 39(a1)
|
|
Packit |
030a23 |
pref 30, 128(a0) /* continue setting up the dest, addr 128 */
|
|
Packit |
030a23 |
$ua_skip_pref30_128:
|
|
Packit |
030a23 |
LWHI t2, 40(a1)
|
|
Packit |
030a23 |
LWLO t2, 43(a1)
|
|
Packit |
030a23 |
LWHI t3, 44(a1)
|
|
Packit |
030a23 |
LWLO t3, 47(a1)
|
|
Packit |
030a23 |
LWHI t4, 48(a1)
|
|
Packit |
030a23 |
LWLO t4, 51(a1)
|
|
Packit |
030a23 |
LWHI t5, 52(a1)
|
|
Packit |
030a23 |
LWLO t5, 55(a1)
|
|
Packit |
030a23 |
LWHI t6, 56(a1)
|
|
Packit |
030a23 |
LWLO t6, 59(a1)
|
|
Packit |
030a23 |
LWHI t7, 60(a1)
|
|
Packit |
030a23 |
LWLO t7, 63(a1)
|
|
Packit |
030a23 |
pref 0, 160(a1) /* bring the next lines of src, addr 160 */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
sw t0, 32(a0)
|
|
Packit |
030a23 |
sw t1, 36(a0)
|
|
Packit |
030a23 |
sw t2, 40(a0)
|
|
Packit |
030a23 |
sw t3, 44(a0)
|
|
Packit |
030a23 |
sw t4, 48(a0)
|
|
Packit |
030a23 |
sw t5, 52(a0)
|
|
Packit |
030a23 |
sw t6, 56(a0)
|
|
Packit |
030a23 |
sw t7, 60(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
addiu a0, a0, 64 /* adding 64 to dest */
|
|
Packit |
030a23 |
sgtu v1, a0, t9
|
|
Packit |
030a23 |
bne a0, a3, $ua_loop16w
|
|
Packit |
030a23 |
addiu a1, a1, 64 /* adding 64 to src */
|
|
Packit |
030a23 |
move a2, t8
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Here we have src and dest word-aligned but less than 64-bytes to go */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
$ua_chk8w:
|
|
Packit |
030a23 |
pref 0, 0x0(a1)
|
|
Packit |
030a23 |
andi t8, a2, 0x1f /* is there a 32-byte chunk? */
|
|
Packit |
030a23 |
/* the t8 is the reminder count */
|
|
Packit |
030a23 |
beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
LWHI t0, 0(a1)
|
|
Packit |
030a23 |
LWLO t0, 3(a1)
|
|
Packit |
030a23 |
LWHI t1, 4(a1)
|
|
Packit |
030a23 |
LWLO t1, 7(a1)
|
|
Packit |
030a23 |
LWHI t2, 8(a1)
|
|
Packit |
030a23 |
LWLO t2, 11(a1)
|
|
Packit |
030a23 |
LWHI t3, 12(a1)
|
|
Packit |
030a23 |
LWLO t3, 15(a1)
|
|
Packit |
030a23 |
LWHI t4, 16(a1)
|
|
Packit |
030a23 |
LWLO t4, 19(a1)
|
|
Packit |
030a23 |
LWHI t5, 20(a1)
|
|
Packit |
030a23 |
LWLO t5, 23(a1)
|
|
Packit |
030a23 |
LWHI t6, 24(a1)
|
|
Packit |
030a23 |
LWLO t6, 27(a1)
|
|
Packit |
030a23 |
LWHI t7, 28(a1)
|
|
Packit |
030a23 |
LWLO t7, 31(a1)
|
|
Packit |
030a23 |
addiu a1, a1, 32
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
sw t0, 0(a0)
|
|
Packit |
030a23 |
sw t1, 4(a0)
|
|
Packit |
030a23 |
sw t2, 8(a0)
|
|
Packit |
030a23 |
sw t3, 12(a0)
|
|
Packit |
030a23 |
sw t4, 16(a0)
|
|
Packit |
030a23 |
sw t5, 20(a0)
|
|
Packit |
030a23 |
sw t6, 24(a0)
|
|
Packit |
030a23 |
sw t7, 28(a0)
|
|
Packit |
030a23 |
addiu a0, a0, 32
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
$ua_chk1w:
|
|
Packit |
030a23 |
andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
|
|
Packit |
030a23 |
beq a2, t8, $ua_smallCopy
|
|
Packit |
030a23 |
subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
|
|
Packit |
030a23 |
addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* copying in words (4-byte chunks) */
|
|
Packit |
030a23 |
$ua_wordCopy_loop:
|
|
Packit |
030a23 |
LWHI v1, 0(a1)
|
|
Packit |
030a23 |
LWLO v1, 3(a1)
|
|
Packit |
030a23 |
addiu a1, a1, 4
|
|
Packit |
030a23 |
addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */
|
|
Packit |
030a23 |
bne a0, a3, $ua_wordCopy_loop
|
|
Packit |
030a23 |
sw v1, -4(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Now less than 4 bytes (value in a2) left to copy */
|
|
Packit |
030a23 |
$ua_smallCopy:
|
|
Packit |
030a23 |
beqz a2, leave
|
|
Packit |
030a23 |
addu a3, a0, a2 /* a3 is the last dst address */
|
|
Packit |
030a23 |
$ua_smallCopy_loop:
|
|
Packit |
030a23 |
lb v1, 0(a1)
|
|
Packit |
030a23 |
addiu a1, a1, 1
|
|
Packit |
030a23 |
addiu a0, a0, 1
|
|
Packit |
030a23 |
bne a0, a3, $ua_smallCopy_loop
|
|
Packit |
030a23 |
sb v1, -1(a0)
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
j ra
|
|
Packit |
030a23 |
nop
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
END(pixman_mips_fast_memcpy)
|