Blame sysdeps/mips/memcpy.S

Packit Service 82fcde
/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library.  If not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#ifdef ANDROID_CHANGES
Packit Service 82fcde
# include "machine/asm.h"
Packit Service 82fcde
# include "machine/regdef.h"
Packit Service 82fcde
# define USE_MEMMOVE_FOR_OVERLAP
Packit Service 82fcde
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
Packit Service 82fcde
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
Packit Service 82fcde
#elif _LIBC
Packit Service 82fcde
# include <sysdep.h>
Packit Service 82fcde
# include <regdef.h>
Packit Service 82fcde
# include <sys/asm.h>
Packit Service 82fcde
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
Packit Service 82fcde
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
Packit Service 82fcde
#elif defined _COMPILING_NEWLIB
Packit Service 82fcde
# include "machine/asm.h"
Packit Service 82fcde
# include "machine/regdef.h"
Packit Service 82fcde
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
Packit Service 82fcde
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
Packit Service 82fcde
#else
Packit Service 82fcde
# include <regdef.h>
Packit Service 82fcde
# include <sys/asm.h>
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
#if (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS5) || \
Packit Service 82fcde
    (_MIPS_ISA == _MIPS_ISA_MIPS32) || (_MIPS_ISA == _MIPS_ISA_MIPS64)
Packit Service 82fcde
# ifndef DISABLE_PREFETCH
Packit Service 82fcde
#  define USE_PREFETCH
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
Packit Service 82fcde
# ifndef DISABLE_DOUBLE
Packit Service 82fcde
#  define USE_DOUBLE
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Some asm.h files do not have the L macro definition.  */
Packit Service 82fcde
#ifndef L
Packit Service 82fcde
# if _MIPS_SIM == _ABIO32
Packit Service 82fcde
#  define L(label) $L ## label
Packit Service 82fcde
# else
Packit Service 82fcde
#  define L(label) .L ## label
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
Packit Service 82fcde
#ifndef PTR_ADDIU
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
#  define PTR_ADDIU	daddiu
Packit Service 82fcde
# else
Packit Service 82fcde
#  define PTR_ADDIU	addiu
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Some asm.h files do not have the PTR_SRA macro definition.  */
Packit Service 82fcde
#ifndef PTR_SRA
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
#  define PTR_SRA		dsra
Packit Service 82fcde
# else
Packit Service 82fcde
#  define PTR_SRA		sra
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* New R6 instructions that may not be in asm.h.  */
Packit Service 82fcde
#ifndef PTR_LSA
Packit Service 82fcde
# if _MIPS_SIM == _ABI64
Packit Service 82fcde
#  define PTR_LSA	dlsa
Packit Service 82fcde
# else
Packit Service 82fcde
#  define PTR_LSA	lsa
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
Packit Service 82fcde
 * prefetches appears to offer a slight preformance advantage.
Packit Service 82fcde
 *
Packit Service 82fcde
 * Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
Packit Service 82fcde
 * or PREFETCH_STORE_STREAMED offers a large performance advantage
Packit Service 82fcde
 * but PREPAREFORSTORE has some special restrictions to consider.
Packit Service 82fcde
 *
Packit Service 82fcde
 * Prefetch with the 'prepare for store' hint does not copy a memory
Packit Service 82fcde
 * location into the cache, it just allocates a cache line and zeros
Packit Service 82fcde
 * it out.  This means that if you do not write to the entire cache
Packit Service 82fcde
 * line before writing it out to memory some data will get zero'ed out
Packit Service 82fcde
 * when the cache line is written back to memory and data will be lost.
Packit Service 82fcde
 *
Packit Service 82fcde
 * Also if you are using this memcpy to copy overlapping buffers it may
Packit Service 82fcde
 * not behave correctly when using the 'prepare for store' hint.  If you
Packit Service 82fcde
 * use the 'prepare for store' prefetch on a memory area that is in the
Packit Service 82fcde
 * memcpy source (as well as the memcpy destination), then you will get
Packit Service 82fcde
 * some data zero'ed out before you have a chance to read it and data will
Packit Service 82fcde
 * be lost.
Packit Service 82fcde
 *
Packit Service 82fcde
 * If you are going to use this memcpy routine with the 'prepare for store'
Packit Service 82fcde
 * prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid
Packit Service 82fcde
 * the problem of running memcpy on overlapping buffers.
Packit Service 82fcde
 *
Packit Service 82fcde
 * There are ifdef'ed sections of this memcpy to make sure that it does not
Packit Service 82fcde
 * do prefetches on cache lines that are not going to be completely written.
Packit Service 82fcde
 * This code is only needed and only used when PREFETCH_STORE_HINT is set to
Packit Service 82fcde
 * PREFETCH_HINT_PREPAREFORSTORE.  This code assumes that cache lines are
Packit Service 82fcde
 * 32 bytes and if the cache line is larger it will not work correctly.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
#ifdef USE_PREFETCH
Packit Service 82fcde
# define PREFETCH_HINT_LOAD		0
Packit Service 82fcde
# define PREFETCH_HINT_STORE		1
Packit Service 82fcde
# define PREFETCH_HINT_LOAD_STREAMED	4
Packit Service 82fcde
# define PREFETCH_HINT_STORE_STREAMED	5
Packit Service 82fcde
# define PREFETCH_HINT_LOAD_RETAINED	6
Packit Service 82fcde
# define PREFETCH_HINT_STORE_RETAINED	7
Packit Service 82fcde
# define PREFETCH_HINT_WRITEBACK_INVAL	25
Packit Service 82fcde
# define PREFETCH_HINT_PREPAREFORSTORE	30
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * If we have not picked out what hints to use at this point use the
Packit Service 82fcde
 * standard load and store prefetch hints.
Packit Service 82fcde
 */
Packit Service 82fcde
# ifndef PREFETCH_STORE_HINT
Packit Service 82fcde
#  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
Packit Service 82fcde
# endif
Packit Service 82fcde
# ifndef PREFETCH_LOAD_HINT
Packit Service 82fcde
#  define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * We double everything when USE_DOUBLE is true so we do 2 prefetches to
Packit Service 82fcde
 * get 64 bytes in that case.  The assumption is that each individual
Packit Service 82fcde
 * prefetch brings in 32 bytes.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
#  define PREFETCH_CHUNK 64
Packit Service 82fcde
#  define PREFETCH_FOR_LOAD(chunk, reg) \
Packit Service 82fcde
 pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \
Packit Service 82fcde
 pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg)
Packit Service 82fcde
#  define PREFETCH_FOR_STORE(chunk, reg) \
Packit Service 82fcde
 pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
Packit Service 82fcde
 pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
Packit Service 82fcde
# else
Packit Service 82fcde
#  define PREFETCH_CHUNK 32
Packit Service 82fcde
#  define PREFETCH_FOR_LOAD(chunk, reg) \
Packit Service 82fcde
 pref PREFETCH_LOAD_HINT, (chunk)*32(reg)
Packit Service 82fcde
#  define PREFETCH_FOR_STORE(chunk, reg) \
Packit Service 82fcde
 pref PREFETCH_STORE_HINT, (chunk)*32(reg)
Packit Service 82fcde
# endif
Packit Service 82fcde
/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
Packit Service 82fcde
 * than PREFETCH_CHUNK, the assumed size of each prefetch.  If the real size
Packit Service 82fcde
 * of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
Packit Service 82fcde
 * hint is used, the code will not work correctly.  If PREPAREFORSTORE is not
Packit Service 82fcde
 * used then MAX_PREFETCH_SIZE does not matter.  */
Packit Service 82fcde
# define MAX_PREFETCH_SIZE 128
Packit Service 82fcde
/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
Packit Service 82fcde
 * than 5 on a STORE prefetch and that a single prefetch can never be larger
Packit Service 82fcde
 * than MAX_PREFETCH_SIZE.  We add the extra 32 when USE_DOUBLE is set because
Packit Service 82fcde
 * we actually do two prefetches in that case, one 32 bytes after the other.  */
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
#  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
Packit Service 82fcde
# else
Packit Service 82fcde
#  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
Packit Service 82fcde
# endif
Packit Service 82fcde
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
Packit Service 82fcde
    && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
Packit Service 82fcde
/* We cannot handle this because the initial prefetches may fetch bytes that
Packit Service 82fcde
 * are before the buffer being copied.  We start copies with an offset
Packit Service 82fcde
 * of 4 so avoid this situation when using PREPAREFORSTORE.  */
Packit Service 82fcde
#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
Packit Service 82fcde
# endif
Packit Service 82fcde
#else /* USE_PREFETCH not defined */
Packit Service 82fcde
# define PREFETCH_FOR_LOAD(offset, reg)
Packit Service 82fcde
# define PREFETCH_FOR_STORE(offset, reg)
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
#if __mips_isa_rev > 5
Packit Service 82fcde
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
#  undef PREFETCH_STORE_HINT
Packit Service 82fcde
#  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
Packit Service 82fcde
# endif
Packit Service 82fcde
# define R6_CODE
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Allow the routine to be named something else if desired.  */
Packit Service 82fcde
#ifndef MEMCPY_NAME
Packit Service 82fcde
# define MEMCPY_NAME memcpy
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* We use these 32/64 bit registers as temporaries to do the copying.  */
Packit Service 82fcde
#define REG0 t0
Packit Service 82fcde
#define REG1 t1
Packit Service 82fcde
#define REG2 t2
Packit Service 82fcde
#define REG3 t3
Packit Service 82fcde
#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABIO32) || (_MIPS_SIM == _ABIO64))
Packit Service 82fcde
# define REG4 t4
Packit Service 82fcde
# define REG5 t5
Packit Service 82fcde
# define REG6 t6
Packit Service 82fcde
# define REG7 t7
Packit Service 82fcde
#else
Packit Service 82fcde
# define REG4 ta0
Packit Service 82fcde
# define REG5 ta1
Packit Service 82fcde
# define REG6 ta2
Packit Service 82fcde
# define REG7 ta3
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* We load/store 64 bits at a time when USE_DOUBLE is true.
Packit Service 82fcde
 * The C_ prefix stands for CHUNK and is used to avoid macro name
Packit Service 82fcde
 * conflicts with system header files.  */
Packit Service 82fcde
Packit Service 82fcde
#ifdef USE_DOUBLE
Packit Service 82fcde
# define C_ST	sd
Packit Service 82fcde
# define C_LD	ld
Packit Service 82fcde
# ifdef __MIPSEB
Packit Service 82fcde
#  define C_LDHI	ldl	/* high part is left in big-endian	*/
Packit Service 82fcde
#  define C_STHI	sdl	/* high part is left in big-endian	*/
Packit Service 82fcde
#  define C_LDLO	ldr	/* low part is right in big-endian	*/
Packit Service 82fcde
#  define C_STLO	sdr	/* low part is right in big-endian	*/
Packit Service 82fcde
# else
Packit Service 82fcde
#  define C_LDHI	ldr	/* high part is right in little-endian	*/
Packit Service 82fcde
#  define C_STHI	sdr	/* high part is right in little-endian	*/
Packit Service 82fcde
#  define C_LDLO	ldl	/* low part is left in little-endian	*/
Packit Service 82fcde
#  define C_STLO	sdl	/* low part is left in little-endian	*/
Packit Service 82fcde
# endif
Packit Service 82fcde
# define C_ALIGN	dalign	/* r6 align instruction			*/
Packit Service 82fcde
#else
Packit Service 82fcde
# define C_ST	sw
Packit Service 82fcde
# define C_LD	lw
Packit Service 82fcde
# ifdef __MIPSEB
Packit Service 82fcde
#  define C_LDHI	lwl	/* high part is left in big-endian	*/
Packit Service 82fcde
#  define C_STHI	swl	/* high part is left in big-endian	*/
Packit Service 82fcde
#  define C_LDLO	lwr	/* low part is right in big-endian	*/
Packit Service 82fcde
#  define C_STLO	swr	/* low part is right in big-endian	*/
Packit Service 82fcde
# else
Packit Service 82fcde
#  define C_LDHI	lwr	/* high part is right in little-endian	*/
Packit Service 82fcde
#  define C_STHI	swr	/* high part is right in little-endian	*/
Packit Service 82fcde
#  define C_LDLO	lwl	/* low part is left in little-endian	*/
Packit Service 82fcde
#  define C_STLO	swl	/* low part is left in little-endian	*/
Packit Service 82fcde
# endif
Packit Service 82fcde
# define C_ALIGN	align	/* r6 align instruction			*/
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Bookkeeping values for 32 vs. 64 bit mode.  */
Packit Service 82fcde
#ifdef USE_DOUBLE
Packit Service 82fcde
# define NSIZE 8
Packit Service 82fcde
# define NSIZEMASK 0x3f
Packit Service 82fcde
# define NSIZEDMASK 0x7f
Packit Service 82fcde
#else
Packit Service 82fcde
# define NSIZE 4
Packit Service 82fcde
# define NSIZEMASK 0x1f
Packit Service 82fcde
# define NSIZEDMASK 0x3f
Packit Service 82fcde
#endif
Packit Service 82fcde
#define UNIT(unit) ((unit)*NSIZE)
Packit Service 82fcde
#define UNITM1(unit) (((unit)*NSIZE)-1)
Packit Service 82fcde
Packit Service 82fcde
#ifdef ANDROID_CHANGES
Packit Service 82fcde
LEAF(MEMCPY_NAME, 0)
Packit Service 82fcde
#else
Packit Service 82fcde
LEAF(MEMCPY_NAME)
Packit Service 82fcde
#endif
Packit Service 82fcde
	.set	nomips16
Packit Service 82fcde
	.set	noreorder
Packit Service 82fcde
/*
Packit Service 82fcde
 * Below we handle the case where memcpy is called with overlapping src and dst.
Packit Service 82fcde
 * Although memcpy is not required to handle this case, some parts of Android
Packit Service 82fcde
 * like Skia rely on such usage. We call memmove to handle such cases.
Packit Service 82fcde
 */
Packit Service 82fcde
#ifdef USE_MEMMOVE_FOR_OVERLAP
Packit Service 82fcde
	PTR_SUBU t0,a0,a1
Packit Service 82fcde
	PTR_SRA	t2,t0,31
Packit Service 82fcde
	xor	t1,t0,t2
Packit Service 82fcde
	PTR_SUBU t0,t1,t2
Packit Service 82fcde
	sltu	t2,t0,a2
Packit Service 82fcde
	beq	t2,zero,L(memcpy)
Packit Service 82fcde
	la	t9,memmove
Packit Service 82fcde
	jr	t9
Packit Service 82fcde
	 nop
Packit Service 82fcde
L(memcpy):
Packit Service 82fcde
#endif
Packit Service 82fcde
/*
Packit Service 82fcde
 * If the size is less than 2*NSIZE (8 or 16), go to L(lastb).  Regardless of
Packit Service 82fcde
 * size, copy dst pointer to v0 for the return value.
Packit Service 82fcde
 */
Packit Service 82fcde
	slti	t2,a2,(2 * NSIZE)
Packit Service 82fcde
	bne	t2,zero,L(lasts)
Packit Service 82fcde
#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
Packit Service 82fcde
	move	v0,zero
Packit Service 82fcde
#else
Packit Service 82fcde
	move	v0,a0
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
#ifndef R6_CODE
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * If src and dst have different alignments, go to L(unaligned), if they
Packit Service 82fcde
 * have the same alignment (but are not actually aligned) do a partial
Packit Service 82fcde
 * load/store to make them aligned.  If they are both already aligned
Packit Service 82fcde
 * we can start copying at L(aligned).
Packit Service 82fcde
 */
Packit Service 82fcde
	xor	t8,a1,a0
Packit Service 82fcde
	andi	t8,t8,(NSIZE-1)		/* t8 is a0/a1 word-displacement */
Packit Service 82fcde
	bne	t8,zero,L(unaligned)
Packit Service 82fcde
	PTR_SUBU a3, zero, a0
Packit Service 82fcde
Packit Service 82fcde
	andi	a3,a3,(NSIZE-1)		/* copy a3 bytes to align a0/a1	  */
Packit Service 82fcde
	beq	a3,zero,L(aligned)	/* if a3=0, it is already aligned */
Packit Service 82fcde
	PTR_SUBU a2,a2,a3		/* a2 is the remining bytes count */
Packit Service 82fcde
Packit Service 82fcde
	C_LDHI	t8,0(a1)
Packit Service 82fcde
	PTR_ADDU a1,a1,a3
Packit Service 82fcde
	C_STHI	t8,0(a0)
Packit Service 82fcde
	PTR_ADDU a0,a0,a3
Packit Service 82fcde
Packit Service 82fcde
#else /* R6_CODE */
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * Align the destination and hope that the source gets aligned too.  If it
Packit Service 82fcde
 * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
Packit Service 82fcde
 * align instruction.
Packit Service 82fcde
 */
Packit Service 82fcde
	andi	t8,a0,7
Packit Service 82fcde
	lapc	t9,L(atable)
Packit Service 82fcde
	PTR_LSA	t9,t8,t9,2
Packit Service 82fcde
	jrc	t9
Packit Service 82fcde
L(atable):
Packit Service 82fcde
	bc	L(lb0)
Packit Service 82fcde
	bc	L(lb7)
Packit Service 82fcde
	bc	L(lb6)
Packit Service 82fcde
	bc	L(lb5)
Packit Service 82fcde
	bc	L(lb4)
Packit Service 82fcde
	bc	L(lb3)
Packit Service 82fcde
	bc	L(lb2)
Packit Service 82fcde
	bc	L(lb1)
Packit Service 82fcde
L(lb7):
Packit Service 82fcde
	lb	a3, 6(a1)
Packit Service 82fcde
	sb	a3, 6(a0)
Packit Service 82fcde
L(lb6):
Packit Service 82fcde
	lb	a3, 5(a1)
Packit Service 82fcde
	sb	a3, 5(a0)
Packit Service 82fcde
L(lb5):
Packit Service 82fcde
	lb	a3, 4(a1)
Packit Service 82fcde
	sb	a3, 4(a0)
Packit Service 82fcde
L(lb4):
Packit Service 82fcde
	lb	a3, 3(a1)
Packit Service 82fcde
	sb	a3, 3(a0)
Packit Service 82fcde
L(lb3):
Packit Service 82fcde
	lb	a3, 2(a1)
Packit Service 82fcde
	sb	a3, 2(a0)
Packit Service 82fcde
L(lb2):
Packit Service 82fcde
	lb	a3, 1(a1)
Packit Service 82fcde
	sb	a3, 1(a0)
Packit Service 82fcde
L(lb1):
Packit Service 82fcde
	lb	a3, 0(a1)
Packit Service 82fcde
	sb	a3, 0(a0)
Packit Service 82fcde
Packit Service 82fcde
	li	t9,8
Packit Service 82fcde
	subu	t8,t9,t8
Packit Service 82fcde
	PTR_SUBU a2,a2,t8
Packit Service 82fcde
	PTR_ADDU a0,a0,t8
Packit Service 82fcde
	PTR_ADDU a1,a1,t8
Packit Service 82fcde
L(lb0):
Packit Service 82fcde
Packit Service 82fcde
	andi	t8,a1,(NSIZE-1)
Packit Service 82fcde
	lapc	t9,L(jtable)
Packit Service 82fcde
	PTR_LSA	t9,t8,t9,2
Packit Service 82fcde
	jrc	t9
Packit Service 82fcde
L(jtable):
Packit Service 82fcde
        bc      L(aligned)
Packit Service 82fcde
        bc      L(r6_unaligned1)
Packit Service 82fcde
        bc      L(r6_unaligned2)
Packit Service 82fcde
        bc      L(r6_unaligned3)
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
        bc      L(r6_unaligned4)
Packit Service 82fcde
        bc      L(r6_unaligned5)
Packit Service 82fcde
        bc      L(r6_unaligned6)
Packit Service 82fcde
        bc      L(r6_unaligned7)
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif /* R6_CODE */
Packit Service 82fcde
Packit Service 82fcde
L(aligned):
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * Now dst/src are both aligned to (word or double word) aligned addresses
Packit Service 82fcde
 * Set a2 to count how many bytes we have to copy after all the 64/128 byte
Packit Service 82fcde
 * chunks are copied and a3 to the dst pointer after all the 64/128 byte
Packit Service 82fcde
 * chunks have been copied.  We will loop, incrementing a0 and a1 until a0
Packit Service 82fcde
 * equals a3.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
	andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
Packit Service 82fcde
	beq	a2,t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */
Packit Service 82fcde
	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
Packit Service 82fcde
	PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */
Packit Service 82fcde
Packit Service 82fcde
/* When in the loop we may prefetch with the 'prepare to store' hint,
Packit Service 82fcde
 * in this case the a0+x should not be past the "t0-32" address.  This
Packit Service 82fcde
 * means: for x=128 the last "safe" a0 address is "t0-160".  Alternatively,
Packit Service 82fcde
 * for x=64 the last "safe" a0 address is "t0-96" In the current version we
Packit Service 82fcde
 * will use "prefetch hint,128(a0)", so "t0-160" is the limit.
Packit Service 82fcde
 */
Packit Service 82fcde
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	PTR_ADDU t0,a0,a2		/* t0 is the "past the end" address */
Packit Service 82fcde
	PTR_SUBU t9,t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */
Packit Service 82fcde
#endif
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (0, a1)
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (1, a1)
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (2, a1)
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (3, a1)
Packit Service 82fcde
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	PREFETCH_FOR_STORE (1, a0)
Packit Service 82fcde
	PREFETCH_FOR_STORE (2, a0)
Packit Service 82fcde
	PREFETCH_FOR_STORE (3, a0)
Packit Service 82fcde
#endif
Packit Service 82fcde
#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
Packit Service 82fcde
# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
Packit Service 82fcde
	sltu    v1,t9,a0
Packit Service 82fcde
	bgtz    v1,L(skip_set)
Packit Service 82fcde
	nop
Packit Service 82fcde
	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
Packit Service 82fcde
L(skip_set):
Packit Service 82fcde
# else
Packit Service 82fcde
	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \
Packit Service 82fcde
    && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3)
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
	PTR_ADDIU v0,v0,32
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
L(loop16w):
Packit Service 82fcde
	C_LD	t0,UNIT(0)(a1)
Packit Service 82fcde
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	sltu	v1,t9,a0		/* If a0 > t9 don't use next prefetch */
Packit Service 82fcde
	bgtz	v1,L(skip_pref)
Packit Service 82fcde
#endif
Packit Service 82fcde
	C_LD	t1,UNIT(1)(a1)
Packit Service 82fcde
#ifdef R6_CODE
Packit Service 82fcde
	PREFETCH_FOR_STORE (2, a0)
Packit Service 82fcde
#else
Packit Service 82fcde
	PREFETCH_FOR_STORE (4, a0)
Packit Service 82fcde
	PREFETCH_FOR_STORE (5, a0)
Packit Service 82fcde
#endif
Packit Service 82fcde
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
Packit Service 82fcde
	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
	PTR_ADDIU v0,v0,32
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
L(skip_pref):
Packit Service 82fcde
	C_LD	REG2,UNIT(2)(a1)
Packit Service 82fcde
	C_LD	REG3,UNIT(3)(a1)
Packit Service 82fcde
	C_LD	REG4,UNIT(4)(a1)
Packit Service 82fcde
	C_LD	REG5,UNIT(5)(a1)
Packit Service 82fcde
	C_LD	REG6,UNIT(6)(a1)
Packit Service 82fcde
	C_LD	REG7,UNIT(7)(a1)
Packit Service 82fcde
#ifdef R6_CODE
Packit Service 82fcde
	PREFETCH_FOR_LOAD (3, a1)
Packit Service 82fcde
#else
Packit Service 82fcde
	PREFETCH_FOR_LOAD (4, a1)
Packit Service 82fcde
#endif
Packit Service 82fcde
	C_ST	t0,UNIT(0)(a0)
Packit Service 82fcde
	C_ST	t1,UNIT(1)(a0)
Packit Service 82fcde
	C_ST	REG2,UNIT(2)(a0)
Packit Service 82fcde
	C_ST	REG3,UNIT(3)(a0)
Packit Service 82fcde
	C_ST	REG4,UNIT(4)(a0)
Packit Service 82fcde
	C_ST	REG5,UNIT(5)(a0)
Packit Service 82fcde
	C_ST	REG6,UNIT(6)(a0)
Packit Service 82fcde
	C_ST	REG7,UNIT(7)(a0)
Packit Service 82fcde
Packit Service 82fcde
	C_LD	t0,UNIT(8)(a1)
Packit Service 82fcde
	C_LD	t1,UNIT(9)(a1)
Packit Service 82fcde
	C_LD	REG2,UNIT(10)(a1)
Packit Service 82fcde
	C_LD	REG3,UNIT(11)(a1)
Packit Service 82fcde
	C_LD	REG4,UNIT(12)(a1)
Packit Service 82fcde
	C_LD	REG5,UNIT(13)(a1)
Packit Service 82fcde
	C_LD	REG6,UNIT(14)(a1)
Packit Service 82fcde
	C_LD	REG7,UNIT(15)(a1)
Packit Service 82fcde
#ifndef R6_CODE
Packit Service 82fcde
        PREFETCH_FOR_LOAD (5, a1)
Packit Service 82fcde
#endif
Packit Service 82fcde
	C_ST	t0,UNIT(8)(a0)
Packit Service 82fcde
	C_ST	t1,UNIT(9)(a0)
Packit Service 82fcde
	C_ST	REG2,UNIT(10)(a0)
Packit Service 82fcde
	C_ST	REG3,UNIT(11)(a0)
Packit Service 82fcde
	C_ST	REG4,UNIT(12)(a0)
Packit Service 82fcde
	C_ST	REG5,UNIT(13)(a0)
Packit Service 82fcde
	C_ST	REG6,UNIT(14)(a0)
Packit Service 82fcde
	C_ST	REG7,UNIT(15)(a0)
Packit Service 82fcde
	PTR_ADDIU a0,a0,UNIT(16)	/* adding 64/128 to dest */
Packit Service 82fcde
	bne	a0,a3,L(loop16w)
Packit Service 82fcde
	PTR_ADDIU a1,a1,UNIT(16)	/* adding 64/128 to src */
Packit Service 82fcde
	move	a2,t8
Packit Service 82fcde
Packit Service 82fcde
/* Here we have src and dest word-aligned but less than 64-bytes or
Packit Service 82fcde
 * 128 bytes to go.  Check for a 32(64) byte chunk and copy if there
Packit Service 82fcde
 * is one.  Otherwise jump down to L(chk1w) to handle the tail end of
Packit Service 82fcde
 * the copy.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
L(chkw):
Packit Service 82fcde
	PREFETCH_FOR_LOAD (0, a1)
Packit Service 82fcde
	andi	t8,a2,NSIZEMASK	/* Is there a 32-byte/64-byte chunk.  */
Packit Service 82fcde
				/* The t8 is the reminder count past 32-bytes */
Packit Service 82fcde
	beq	a2,t8,L(chk1w)	/* When a2=t8, no 32-byte chunk  */
Packit Service 82fcde
	nop
Packit Service 82fcde
	C_LD	t0,UNIT(0)(a1)
Packit Service 82fcde
	C_LD	t1,UNIT(1)(a1)
Packit Service 82fcde
	C_LD	REG2,UNIT(2)(a1)
Packit Service 82fcde
	C_LD	REG3,UNIT(3)(a1)
Packit Service 82fcde
	C_LD	REG4,UNIT(4)(a1)
Packit Service 82fcde
	C_LD	REG5,UNIT(5)(a1)
Packit Service 82fcde
	C_LD	REG6,UNIT(6)(a1)
Packit Service 82fcde
	C_LD	REG7,UNIT(7)(a1)
Packit Service 82fcde
	PTR_ADDIU a1,a1,UNIT(8)
Packit Service 82fcde
	C_ST	t0,UNIT(0)(a0)
Packit Service 82fcde
	C_ST	t1,UNIT(1)(a0)
Packit Service 82fcde
	C_ST	REG2,UNIT(2)(a0)
Packit Service 82fcde
	C_ST	REG3,UNIT(3)(a0)
Packit Service 82fcde
	C_ST	REG4,UNIT(4)(a0)
Packit Service 82fcde
	C_ST	REG5,UNIT(5)(a0)
Packit Service 82fcde
	C_ST	REG6,UNIT(6)(a0)
Packit Service 82fcde
	C_ST	REG7,UNIT(7)(a0)
Packit Service 82fcde
	PTR_ADDIU a0,a0,UNIT(8)
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 * Here we have less than 32(64) bytes to copy.  Set up for a loop to
Packit Service 82fcde
 * copy one word (or double word) at a time.  Set a2 to count how many
Packit Service 82fcde
 * bytes we have to copy after all the word (or double word) chunks are
Packit Service 82fcde
 * copied and a3 to the dst pointer after all the (d)word chunks have
Packit Service 82fcde
 * been copied.  We will loop, incrementing a0 and a1 until a0 equals a3.
Packit Service 82fcde
 */
Packit Service 82fcde
L(chk1w):
Packit Service 82fcde
	andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
Packit Service 82fcde
	beq	a2,t8,L(lastw)
Packit Service 82fcde
	PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */
Packit Service 82fcde
	PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */
Packit Service 82fcde
Packit Service 82fcde
/* copying in words (4-byte or 8-byte chunks) */
Packit Service 82fcde
L(wordCopy_loop):
Packit Service 82fcde
	C_LD	REG3,UNIT(0)(a1)
Packit Service 82fcde
	PTR_ADDIU a0,a0,UNIT(1)
Packit Service 82fcde
	PTR_ADDIU a1,a1,UNIT(1)
Packit Service 82fcde
	bne	a0,a3,L(wordCopy_loop)
Packit Service 82fcde
	C_ST	REG3,UNIT(-1)(a0)
Packit Service 82fcde
Packit Service 82fcde
/* If we have been copying double words, see if we can copy a single word
Packit Service 82fcde
   before doing byte copies.  We can have, at most, one word to copy.  */
Packit Service 82fcde
Packit Service 82fcde
L(lastw):
Packit Service 82fcde
#ifdef USE_DOUBLE
Packit Service 82fcde
	andi    t8,a2,3		/* a2 is the remainder past 4 byte chunks.  */
Packit Service 82fcde
	beq	t8,a2,L(lastb)
Packit Service 82fcde
	move	a2,t8
Packit Service 82fcde
	lw	REG3,0(a1)
Packit Service 82fcde
	sw	REG3,0(a0)
Packit Service 82fcde
	PTR_ADDIU a0,a0,4
Packit Service 82fcde
	PTR_ADDIU a1,a1,4
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Copy the last 8 (or 16) bytes */
Packit Service 82fcde
L(lastb):
Packit Service 82fcde
	blez	a2,L(leave)
Packit Service 82fcde
	PTR_ADDU a3,a0,a2	/* a3 is the last dst address */
Packit Service 82fcde
L(lastbloop):
Packit Service 82fcde
	lb	v1,0(a1)
Packit Service 82fcde
	PTR_ADDIU a0,a0,1
Packit Service 82fcde
	PTR_ADDIU a1,a1,1
Packit Service 82fcde
	bne	a0,a3,L(lastbloop)
Packit Service 82fcde
	sb	v1,-1(a0)
Packit Service 82fcde
L(leave):
Packit Service 82fcde
	j	ra
Packit Service 82fcde
	nop
Packit Service 82fcde
Packit Service 82fcde
/* We jump here with a memcpy of less than 8 or 16 bytes, depending on
Packit Service 82fcde
   whether or not USE_DOUBLE is defined.  Instead of just doing byte
Packit Service 82fcde
   copies, check the alignment and size and use lw/sw if possible.
Packit Service 82fcde
   Otherwise, do byte copies.  */
Packit Service 82fcde
Packit Service 82fcde
L(lasts):
Packit Service 82fcde
	andi	t8,a2,3
Packit Service 82fcde
	beq	t8,a2,L(lastb)
Packit Service 82fcde
Packit Service 82fcde
	andi	t9,a0,3
Packit Service 82fcde
	bne	t9,zero,L(lastb)
Packit Service 82fcde
	andi	t9,a1,3
Packit Service 82fcde
	bne	t9,zero,L(lastb)
Packit Service 82fcde
Packit Service 82fcde
	PTR_SUBU a3,a2,t8
Packit Service 82fcde
	PTR_ADDU a3,a0,a3
Packit Service 82fcde
Packit Service 82fcde
L(wcopy_loop):
Packit Service 82fcde
	lw	REG3,0(a1)
Packit Service 82fcde
	PTR_ADDIU a0,a0,4
Packit Service 82fcde
	PTR_ADDIU a1,a1,4
Packit Service 82fcde
	bne	a0,a3,L(wcopy_loop)
Packit Service 82fcde
	sw	REG3,-4(a0)
Packit Service 82fcde
Packit Service 82fcde
	b	L(lastb)
Packit Service 82fcde
	move	a2,t8
Packit Service 82fcde
Packit Service 82fcde
#ifndef R6_CODE
Packit Service 82fcde
/*
Packit Service 82fcde
 * UNALIGNED case, got here with a3 = "negu a0"
Packit Service 82fcde
 * This code is nearly identical to the aligned code above
Packit Service 82fcde
 * but only the destination (not the source) gets aligned
Packit Service 82fcde
 * so we need to do partial loads of the source followed
Packit Service 82fcde
 * by normal stores to the destination (once we have aligned
Packit Service 82fcde
 * the destination).
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
L(unaligned):
Packit Service 82fcde
	andi	a3,a3,(NSIZE-1)	/* copy a3 bytes to align a0/a1 */
Packit Service 82fcde
	beqz	a3,L(ua_chk16w) /* if a3=0, it is already aligned */
Packit Service 82fcde
	PTR_SUBU a2,a2,a3	/* a2 is the remining bytes count */
Packit Service 82fcde
Packit Service 82fcde
	C_LDHI	v1,UNIT(0)(a1)
Packit Service 82fcde
	C_LDLO	v1,UNITM1(1)(a1)
Packit Service 82fcde
	PTR_ADDU a1,a1,a3
Packit Service 82fcde
	C_STHI	v1,UNIT(0)(a0)
Packit Service 82fcde
	PTR_ADDU a0,a0,a3
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
 *  Now the destination (but not the source) is aligned
Packit Service 82fcde
 * Set a2 to count how many bytes we have to copy after all the 64/128 byte
Packit Service 82fcde
 * chunks are copied and a3 to the dst pointer after all the 64/128 byte
Packit Service 82fcde
 * chunks have been copied.  We will loop, incrementing a0 and a1 until a0
Packit Service 82fcde
 * equals a3.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
L(ua_chk16w):
Packit Service 82fcde
	andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
Packit Service 82fcde
	beq	a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
Packit Service 82fcde
	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
Packit Service 82fcde
	PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */
Packit Service 82fcde
Packit Service 82fcde
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	PTR_ADDU t0,a0,a2	  /* t0 is the "past the end" address */
Packit Service 82fcde
	PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
Packit Service 82fcde
# endif
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (0, a1)
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (1, a1)
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (2, a1)
Packit Service 82fcde
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	PREFETCH_FOR_STORE (1, a0)
Packit Service 82fcde
	PREFETCH_FOR_STORE (2, a0)
Packit Service 82fcde
	PREFETCH_FOR_STORE (3, a0)
Packit Service 82fcde
# endif
Packit Service 82fcde
# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
Packit Service 82fcde
#  if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	sltu    v1,t9,a0
Packit Service 82fcde
	bgtz    v1,L(ua_skip_set)
Packit Service 82fcde
	nop
Packit Service 82fcde
	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
Packit Service 82fcde
L(ua_skip_set):
Packit Service 82fcde
#  else
Packit Service 82fcde
	PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
Packit Service 82fcde
#  endif
Packit Service 82fcde
# endif
Packit Service 82fcde
L(ua_loop16w):
Packit Service 82fcde
	PREFETCH_FOR_LOAD  (3, a1)
Packit Service 82fcde
	C_LDHI	t0,UNIT(0)(a1)
Packit Service 82fcde
	C_LDHI	t1,UNIT(1)(a1)
Packit Service 82fcde
	C_LDHI	REG2,UNIT(2)(a1)
Packit Service 82fcde
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
Packit Service 82fcde
	sltu	v1,t9,a0
Packit Service 82fcde
	bgtz	v1,L(ua_skip_pref)
Packit Service 82fcde
# endif
Packit Service 82fcde
	C_LDHI	REG3,UNIT(3)(a1)
Packit Service 82fcde
	PREFETCH_FOR_STORE (4, a0)
Packit Service 82fcde
	PREFETCH_FOR_STORE (5, a0)
Packit Service 82fcde
L(ua_skip_pref):
Packit Service 82fcde
	C_LDHI	REG4,UNIT(4)(a1)
Packit Service 82fcde
	C_LDHI	REG5,UNIT(5)(a1)
Packit Service 82fcde
	C_LDHI	REG6,UNIT(6)(a1)
Packit Service 82fcde
	C_LDHI	REG7,UNIT(7)(a1)
Packit Service 82fcde
	C_LDLO	t0,UNITM1(1)(a1)
Packit Service 82fcde
	C_LDLO	t1,UNITM1(2)(a1)
Packit Service 82fcde
	C_LDLO	REG2,UNITM1(3)(a1)
Packit Service 82fcde
	C_LDLO	REG3,UNITM1(4)(a1)
Packit Service 82fcde
	C_LDLO	REG4,UNITM1(5)(a1)
Packit Service 82fcde
	C_LDLO	REG5,UNITM1(6)(a1)
Packit Service 82fcde
	C_LDLO	REG6,UNITM1(7)(a1)
Packit Service 82fcde
	C_LDLO	REG7,UNITM1(8)(a1)
Packit Service 82fcde
        PREFETCH_FOR_LOAD (4, a1)
Packit Service 82fcde
	C_ST	t0,UNIT(0)(a0)
Packit Service 82fcde
	C_ST	t1,UNIT(1)(a0)
Packit Service 82fcde
	C_ST	REG2,UNIT(2)(a0)
Packit Service 82fcde
	C_ST	REG3,UNIT(3)(a0)
Packit Service 82fcde
	C_ST	REG4,UNIT(4)(a0)
Packit Service 82fcde
	C_ST	REG5,UNIT(5)(a0)
Packit Service 82fcde
	C_ST	REG6,UNIT(6)(a0)
Packit Service 82fcde
	C_ST	REG7,UNIT(7)(a0)
Packit Service 82fcde
	C_LDHI	t0,UNIT(8)(a1)
Packit Service 82fcde
	C_LDHI	t1,UNIT(9)(a1)
Packit Service 82fcde
	C_LDHI	REG2,UNIT(10)(a1)
Packit Service 82fcde
	C_LDHI	REG3,UNIT(11)(a1)
Packit Service 82fcde
	C_LDHI	REG4,UNIT(12)(a1)
Packit Service 82fcde
	C_LDHI	REG5,UNIT(13)(a1)
Packit Service 82fcde
	C_LDHI	REG6,UNIT(14)(a1)
Packit Service 82fcde
	C_LDHI	REG7,UNIT(15)(a1)
Packit Service 82fcde
	C_LDLO	t0,UNITM1(9)(a1)
Packit Service 82fcde
	C_LDLO	t1,UNITM1(10)(a1)
Packit Service 82fcde
	C_LDLO	REG2,UNITM1(11)(a1)
Packit Service 82fcde
	C_LDLO	REG3,UNITM1(12)(a1)
Packit Service 82fcde
	C_LDLO	REG4,UNITM1(13)(a1)
Packit Service 82fcde
	C_LDLO	REG5,UNITM1(14)(a1)
Packit Service 82fcde
	C_LDLO	REG6,UNITM1(15)(a1)
Packit Service 82fcde
	C_LDLO	REG7,UNITM1(16)(a1)
Packit Service 82fcde
        PREFETCH_FOR_LOAD (5, a1)
Packit Service 82fcde
	C_ST	t0,UNIT(8)(a0)
Packit Service 82fcde
	C_ST	t1,UNIT(9)(a0)
Packit Service 82fcde
	C_ST	REG2,UNIT(10)(a0)
Packit Service 82fcde
	C_ST	REG3,UNIT(11)(a0)
Packit Service 82fcde
	C_ST	REG4,UNIT(12)(a0)
Packit Service 82fcde
	C_ST	REG5,UNIT(13)(a0)
Packit Service 82fcde
	C_ST	REG6,UNIT(14)(a0)
Packit Service 82fcde
	C_ST	REG7,UNIT(15)(a0)
Packit Service 82fcde
	PTR_ADDIU a0,a0,UNIT(16)	/* adding 64/128 to dest */
Packit Service 82fcde
	bne	a0,a3,L(ua_loop16w)
Packit Service 82fcde
	PTR_ADDIU a1,a1,UNIT(16)	/* adding 64/128 to src */
Packit Service 82fcde
	move	a2,t8
Packit Service 82fcde
Packit Service 82fcde
/* Here we have src and dest word-aligned but less than 64-bytes or
Packit Service 82fcde
 * 128 bytes to go.  Check for a 32(64) byte chunk and copy if there
Packit Service 82fcde
 * is one.  Otherwise jump down to L(ua_chk1w) to handle the tail end of
Packit Service 82fcde
 * the copy.  */
Packit Service 82fcde
Packit Service 82fcde
L(ua_chkw):
Packit Service 82fcde
	PREFETCH_FOR_LOAD (0, a1)
Packit Service 82fcde
	andi	t8,a2,NSIZEMASK	  /* Is there a 32-byte/64-byte chunk.  */
Packit Service 82fcde
				  /* t8 is the reminder count past 32-bytes */
Packit Service 82fcde
	beq	a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
Packit Service 82fcde
	nop
Packit Service 82fcde
	C_LDHI	t0,UNIT(0)(a1)
Packit Service 82fcde
	C_LDHI	t1,UNIT(1)(a1)
Packit Service 82fcde
	C_LDHI	REG2,UNIT(2)(a1)
Packit Service 82fcde
	C_LDHI	REG3,UNIT(3)(a1)
Packit Service 82fcde
	C_LDHI	REG4,UNIT(4)(a1)
Packit Service 82fcde
	C_LDHI	REG5,UNIT(5)(a1)
Packit Service 82fcde
	C_LDHI	REG6,UNIT(6)(a1)
Packit Service 82fcde
	C_LDHI	REG7,UNIT(7)(a1)
Packit Service 82fcde
	C_LDLO	t0,UNITM1(1)(a1)
Packit Service 82fcde
	C_LDLO	t1,UNITM1(2)(a1)
Packit Service 82fcde
	C_LDLO	REG2,UNITM1(3)(a1)
Packit Service 82fcde
	C_LDLO	REG3,UNITM1(4)(a1)
Packit Service 82fcde
	C_LDLO	REG4,UNITM1(5)(a1)
Packit Service 82fcde
	C_LDLO	REG5,UNITM1(6)(a1)
Packit Service 82fcde
	C_LDLO	REG6,UNITM1(7)(a1)
Packit Service 82fcde
	C_LDLO	REG7,UNITM1(8)(a1)
Packit Service 82fcde
	PTR_ADDIU a1,a1,UNIT(8)
Packit Service 82fcde
	C_ST	t0,UNIT(0)(a0)
Packit Service 82fcde
	C_ST	t1,UNIT(1)(a0)
Packit Service 82fcde
	C_ST	REG2,UNIT(2)(a0)
Packit Service 82fcde
	C_ST	REG3,UNIT(3)(a0)
Packit Service 82fcde
	C_ST	REG4,UNIT(4)(a0)
Packit Service 82fcde
	C_ST	REG5,UNIT(5)(a0)
Packit Service 82fcde
	C_ST	REG6,UNIT(6)(a0)
Packit Service 82fcde
	C_ST	REG7,UNIT(7)(a0)
Packit Service 82fcde
	PTR_ADDIU a0,a0,UNIT(8)
Packit Service 82fcde
/*
Packit Service 82fcde
 * Here we have less than 32(64) bytes to copy.  Set up for a loop to
Packit Service 82fcde
 * copy one word (or double word) at a time.
Packit Service 82fcde
 */
Packit Service 82fcde
L(ua_chk1w):
Packit Service 82fcde
	andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
Packit Service 82fcde
	beq	a2,t8,L(ua_smallCopy)
Packit Service 82fcde
	PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */
Packit Service 82fcde
	PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */
Packit Service 82fcde
Packit Service 82fcde
/* copying in words (4-byte or 8-byte chunks) */
Packit Service 82fcde
L(ua_wordCopy_loop):
Packit Service 82fcde
	C_LDHI	v1,UNIT(0)(a1)
Packit Service 82fcde
	C_LDLO	v1,UNITM1(1)(a1)
Packit Service 82fcde
	PTR_ADDIU a0,a0,UNIT(1)
Packit Service 82fcde
	PTR_ADDIU a1,a1,UNIT(1)
Packit Service 82fcde
	bne	a0,a3,L(ua_wordCopy_loop)
Packit Service 82fcde
	C_ST	v1,UNIT(-1)(a0)
Packit Service 82fcde
Packit Service 82fcde
/* Copy the last 8 (or 16) bytes */
Packit Service 82fcde
L(ua_smallCopy):
Packit Service 82fcde
	beqz	a2,L(leave)
Packit Service 82fcde
	PTR_ADDU a3,a0,a2	/* a3 is the last dst address */
Packit Service 82fcde
L(ua_smallCopy_loop):
Packit Service 82fcde
	lb	v1,0(a1)
Packit Service 82fcde
	PTR_ADDIU a0,a0,1
Packit Service 82fcde
	PTR_ADDIU a1,a1,1
Packit Service 82fcde
	bne	a0,a3,L(ua_smallCopy_loop)
Packit Service 82fcde
	sb	v1,-1(a0)
Packit Service 82fcde
Packit Service 82fcde
	j	ra
Packit Service 82fcde
	nop
Packit Service 82fcde
Packit Service 82fcde
#else /* R6_CODE */
Packit Service 82fcde
Packit Service 82fcde
# ifdef __MIPSEB
Packit Service 82fcde
#  define SWAP_REGS(X,Y) X, Y
Packit Service 82fcde
#  define ALIGN_OFFSET(N) (N)
Packit Service 82fcde
# else
Packit Service 82fcde
#  define SWAP_REGS(X,Y) Y, X
Packit Service 82fcde
#  define ALIGN_OFFSET(N) (NSIZE-N)
Packit Service 82fcde
# endif
Packit Service 82fcde
# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
Packit Service 82fcde
	andi	REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes.     */ \
Packit Service 82fcde
	beq	REG7, a2, L(lastb); /* Check for bytes to copy by word	   */ \
Packit Service 82fcde
	PTR_SUBU a3, a2, REG7;	/* a3 is number of bytes to be copied in   */ \
Packit Service 82fcde
				/* (d)word chunks.			   */ \
Packit Service 82fcde
	move	a2, REG7;	/* a2 is # of bytes to copy byte by byte   */ \
Packit Service 82fcde
				/* after word loop is finished.		   */ \
Packit Service 82fcde
	PTR_ADDU REG6, a0, a3;	/* REG6 is the dst address after loop.	   */ \
Packit Service 82fcde
	PTR_SUBU REG2, a1, t8;	/* REG2 is the aligned src address.	   */ \
Packit Service 82fcde
	PTR_ADDU a1, a1, a3;	/* a1 is addr of source after word loop.   */ \
Packit Service 82fcde
	C_LD	t0, UNIT(0)(REG2);  /* Load first part of source.	   */ \
Packit Service 82fcde
L(r6_ua_wordcopy##BYTEOFFSET):						      \
Packit Service 82fcde
	C_LD	t1, UNIT(1)(REG2);  /* Load second part of source.	   */ \
Packit Service 82fcde
	C_ALIGN	REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET);	      \
Packit Service 82fcde
	PTR_ADDIU a0, a0, UNIT(1);  /* Increment destination pointer.	   */ \
Packit Service 82fcde
	PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
Packit Service 82fcde
	move	t0, t1;		/* Move second part of source to first.	   */ \
Packit Service 82fcde
	bne	a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET);			      \
Packit Service 82fcde
	C_ST	REG3, UNIT(-1)(a0);					      \
Packit Service 82fcde
	j	L(lastb);						      \
Packit Service 82fcde
	nop
Packit Service 82fcde
Packit Service 82fcde
	/* We are generating R6 code, the destination is 4 byte aligned and
Packit Service 82fcde
	   the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
Packit Service 82fcde
           alignment of the source.  */
Packit Service 82fcde
Packit Service 82fcde
L(r6_unaligned1):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(1)
Packit Service 82fcde
L(r6_unaligned2):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(2)
Packit Service 82fcde
L(r6_unaligned3):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(3)
Packit Service 82fcde
# ifdef USE_DOUBLE
Packit Service 82fcde
L(r6_unaligned4):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(4)
Packit Service 82fcde
L(r6_unaligned5):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(5)
Packit Service 82fcde
L(r6_unaligned6):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(6)
Packit Service 82fcde
L(r6_unaligned7):
Packit Service 82fcde
	R6_UNALIGNED_WORD_COPY(7)
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif /* R6_CODE */
Packit Service 82fcde
Packit Service 82fcde
	.set	at
Packit Service 82fcde
	.set	reorder
Packit Service 82fcde
END(MEMCPY_NAME)
Packit Service 82fcde
#ifndef ANDROID_CHANGES
Packit Service 82fcde
# ifdef _LIBC
Packit Service 82fcde
libc_hidden_builtin_def (MEMCPY_NAME)
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif