Blame sysdeps/alpha/stxcpy.S

Packit 6c4009
/* Copyright (C) 1996-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Richard Henderson (rth@tamu.edu)
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
/* Copy a null-terminated string from SRC to DST.
Packit 6c4009
Packit 6c4009
   This is an internal routine used by strcpy, stpcpy, and strcat.
Packit 6c4009
   As such, it uses special linkage conventions to make implementation
Packit 6c4009
   of these public functions more efficient.
Packit 6c4009
Packit 6c4009
   On input:
Packit 6c4009
	t9 = return address
Packit 6c4009
	a0 = DST
Packit 6c4009
	a1 = SRC
Packit 6c4009
Packit 6c4009
   On output:
Packit 6c4009
	t8  = bitmask (with one bit set) indicating the last byte written
Packit 6c4009
	a0  = unaligned address of the last *word* written
Packit 6c4009
Packit 6c4009
   Furthermore, v0, a3-a5, t11, and t12 are untouched.
Packit 6c4009
*/
Packit 6c4009
Packit 6c4009
/* This is generally scheduled for the EV5, but should still be pretty
Packit 6c4009
   good for the EV4 too.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.set noat
Packit 6c4009
	.set noreorder
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
	.type	__stxcpy, @function
Packit 6c4009
	.globl	__stxcpy
Packit 6c4009
	.usepv	__stxcpy, no
Packit 6c4009
Packit 6c4009
	cfi_startproc
Packit 6c4009
	cfi_return_column (t9)
Packit 6c4009
Packit 6c4009
	/* On entry to this basic block:
Packit 6c4009
	   t0 == the first destination word for masking back in
Packit 6c4009
	   t1 == the first source word.  */
Packit 6c4009
	.align 3
Packit 6c4009
stxcpy_aligned:
Packit 6c4009
	/* Create the 1st output word and detect 0's in the 1st input word.  */
Packit 6c4009
	lda	t2, -1		# e1    : build a mask against false zero
Packit 6c4009
	mskqh	t2, a1, t2	# e0    :   detection in the src word
Packit 6c4009
	mskqh	t1, a1, t3	# e0    :
Packit 6c4009
	ornot	t1, t2, t2	# .. e1 :
Packit 6c4009
	mskql	t0, a1, t0	# e0    : assemble the first output word
Packit 6c4009
	cmpbge	zero, t2, t7	# .. e1 : bits set iff null found
Packit 6c4009
	or	t0, t3, t1	# e0    :
Packit 6c4009
	bne	t7, $a_eos	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* On entry to this basic block:
Packit 6c4009
	   t0 == the first destination word for masking back in
Packit 6c4009
	   t1 == a source word not containing a null.  */
Packit 6c4009
$a_loop:
Packit 6c4009
	stq_u	t1, 0(a0)	# e0    :
Packit 6c4009
	addq	a0, 8, a0	# .. e1 :
Packit 6c4009
	ldq_u	t1, 0(a1)	# e0    :
Packit 6c4009
	addq	a1, 8, a1	# .. e1 :
Packit 6c4009
	cmpbge	zero, t1, t7	# e0 (stall)
Packit 6c4009
	beq	t7, $a_loop	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* Take care of the final (partial) word store.
Packit 6c4009
	   On entry to this basic block we have:
Packit 6c4009
	   t1 == the source word containing the null
Packit 6c4009
	   t7 == the cmpbge mask that found it.  */
Packit 6c4009
$a_eos:
Packit 6c4009
	negq	t7, t6		# e0    : find low bit set
Packit 6c4009
	and	t7, t6, t8	# e1 (stall)
Packit 6c4009
Packit 6c4009
	/* For the sake of the cache, don't read a destination word
Packit 6c4009
	   if we're not going to need it.  */
Packit 6c4009
	and	t8, 0x80, t6	# e0    :
Packit 6c4009
	bne	t6, 1f		# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* We're doing a partial word store and so need to combine
Packit 6c4009
	   our source and original destination words.  */
Packit 6c4009
	ldq_u	t0, 0(a0)	# e0    :
Packit 6c4009
	subq	t8, 1, t6	# .. e1 :
Packit 6c4009
	zapnot	t1, t6, t1	# e0    : clear src bytes >= null
Packit 6c4009
	or	t8, t6, t7	# .. e1 :
Packit 6c4009
	zap	t0, t7, t0	# e0    : clear dst bytes <= null
Packit 6c4009
	or	t0, t1, t1	# e1    :
Packit 6c4009
Packit 6c4009
1:	stq_u	t1, 0(a0)	# e0    :
Packit 6c4009
	ret	(t9)		# .. e1 :
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
__stxcpy:
Packit 6c4009
	/* Are source and destination co-aligned?  */
Packit 6c4009
	xor	a0, a1, t0	# e0    :
Packit 6c4009
	unop			#       :
Packit 6c4009
	and	t0, 7, t0	# e0    :
Packit 6c4009
	bne	t0, $unaligned	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* We are co-aligned; take care of a partial first word.  */
Packit 6c4009
	ldq_u	t1, 0(a1)	# e0    : load first src word
Packit 6c4009
	and	a0, 7, t0	# .. e1 : take care not to load a word ...
Packit 6c4009
	addq	a1, 8, a1		# e0    :
Packit 6c4009
	beq	t0, stxcpy_aligned	# .. e1 : ... if we wont need it
Packit 6c4009
	ldq_u	t0, 0(a0)	# e0    :
Packit 6c4009
	br	stxcpy_aligned	# .. e1 :
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* The source and destination are not co-aligned.  Align the destination
Packit 6c4009
   and cope.  We have to be very careful about not reading too much and
Packit 6c4009
   causing a SEGV.  */
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
$u_head:
Packit 6c4009
	/* We know just enough now to be able to assemble the first
Packit 6c4009
	   full source word.  We can still find a zero at the end of it
Packit 6c4009
	   that prevents us from outputting the whole thing.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == the first dest word, for masking back in, if needed else 0
Packit 6c4009
	   t1 == the low bits of the first source word
Packit 6c4009
	   t6 == bytemask that is -1 in dest word bytes */
Packit 6c4009
Packit 6c4009
	ldq_u	t2, 8(a1)	# e0    :
Packit 6c4009
	addq	a1, 8, a1	# .. e1 :
Packit 6c4009
Packit 6c4009
	extql	t1, a1, t1	# e0    :
Packit 6c4009
	extqh	t2, a1, t4	# e0    :
Packit 6c4009
	mskql	t0, a0, t0	# e0    :
Packit 6c4009
	or	t1, t4, t1	# .. e1 :
Packit 6c4009
	mskqh	t1, a0, t1	# e0    :
Packit 6c4009
	or	t0, t1, t1	# e1    :
Packit 6c4009
Packit 6c4009
	or	t1, t6, t6	# e0    :
Packit 6c4009
	cmpbge	zero, t6, t7	# .. e1 :
Packit 6c4009
	lda	t6, -1		# e0    : for masking just below
Packit 6c4009
	bne	t7, $u_final	# .. e1 :
Packit 6c4009
Packit 6c4009
	mskql	t6, a1, t6		# e0    : mask out the bits we have
Packit 6c4009
	or	t6, t2, t2		# e1    :   already extracted before
Packit 6c4009
	cmpbge	zero, t2, t7		# e0    :   testing eos
Packit 6c4009
	bne	t7, $u_late_head_exit	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* Finally, we've got all the stupid leading edge cases taken care
Packit 6c4009
	   of and we can set up to enter the main loop.  */
Packit 6c4009
Packit 6c4009
	stq_u	t1, 0(a0)	# e0    : store first output word
Packit 6c4009
	addq	a0, 8, a0	# .. e1 :
Packit 6c4009
	extql	t2, a1, t0	# e0    : position ho-bits of lo word
Packit 6c4009
	ldq_u	t2, 8(a1)	# .. e1 : read next high-order source word
Packit 6c4009
	addq	a1, 8, a1	# e0    :
Packit 6c4009
	cmpbge	zero, t2, t7	# .. e1 :
Packit 6c4009
	nop			# e0    :
Packit 6c4009
	bne	t7, $u_eos	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* Unaligned copy main loop.  In order to avoid reading too much,
Packit 6c4009
	   the loop is structured to detect zeros in aligned source words.
Packit 6c4009
	   This has, unfortunately, effectively pulled half of a loop
Packit 6c4009
	   iteration out into the head and half into the tail, but it does
Packit 6c4009
	   prevent nastiness from accumulating in the very thing we want
Packit 6c4009
	   to run as fast as possible.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == the shifted high-order bits from the previous source word
Packit 6c4009
	   t2 == the unshifted current source word
Packit 6c4009
Packit 6c4009
	   We further know that t2 does not contain a null terminator.  */
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
$u_loop:
Packit 6c4009
	extqh	t2, a1, t1	# e0    : extract high bits for current word
Packit 6c4009
	addq	a1, 8, a1	# .. e1 :
Packit 6c4009
	extql	t2, a1, t3	# e0    : extract low bits for next time
Packit 6c4009
	addq	a0, 8, a0	# .. e1 :
Packit 6c4009
	or	t0, t1, t1	# e0    : current dst word now complete
Packit 6c4009
	ldq_u	t2, 0(a1)	# .. e1 : load high word for next time
Packit 6c4009
	stq_u	t1, -8(a0)	# e0    : save the current word
Packit 6c4009
	mov	t3, t0		# .. e1 :
Packit 6c4009
	cmpbge	zero, t2, t7	# e0    : test new word for eos
Packit 6c4009
	beq	t7, $u_loop	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* We've found a zero somewhere in the source word we just read.
Packit 6c4009
	   If it resides in the lower half, we have one (probably partial)
Packit 6c4009
	   word to write out, and if it resides in the upper half, we
Packit 6c4009
	   have one full and one partial word left to write out.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == the shifted high-order bits from the previous source word
Packit 6c4009
	   t2 == the unshifted current source word.  */
Packit 6c4009
$u_eos:
Packit 6c4009
	extqh	t2, a1, t1	# e0    :
Packit 6c4009
	or	t0, t1, t1	# e1    : first (partial) source word complete
Packit 6c4009
Packit 6c4009
	cmpbge	zero, t1, t7	# e0    : is the null in this first bit?
Packit 6c4009
	bne	t7, $u_final	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
$u_late_head_exit:
Packit 6c4009
	stq_u	t1, 0(a0)	# e0    : the null was in the high-order bits
Packit 6c4009
	addq	a0, 8, a0	# .. e1 :
Packit 6c4009
	extql	t2, a1, t1	# e0    :
Packit 6c4009
	cmpbge	zero, t1, t7	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* Take care of a final (probably partial) result word.
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t1 == assembled source word
Packit 6c4009
	   t7 == cmpbge mask that found the null.  */
Packit 6c4009
$u_final:
Packit 6c4009
	negq	t7, t6		# e0    : isolate low bit set
Packit 6c4009
	and	t6, t7, t8	# e1    :
Packit 6c4009
Packit 6c4009
	and	t8, 0x80, t6	# e0    : avoid dest word load if we can
Packit 6c4009
	bne	t6, 1f		# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	ldq_u	t0, 0(a0)	# e0    :
Packit 6c4009
	subq	t8, 1, t6	# .. e1 :
Packit 6c4009
	or	t6, t8, t7	# e0    :
Packit 6c4009
	zapnot	t1, t6, t1	# .. e1 : kill source bytes >= null
Packit 6c4009
	zap	t0, t7, t0	# e0    : kill dest bytes <= null
Packit 6c4009
	or	t0, t1, t1	# e1    :
Packit 6c4009
Packit 6c4009
1:	stq_u	t1, 0(a0)	# e0    :
Packit 6c4009
	ret	(t9)		# .. e1 :
Packit 6c4009
Packit 6c4009
	/* Unaligned copy entry point.  */
Packit 6c4009
	.align 3
Packit 6c4009
$unaligned:
Packit 6c4009
Packit 6c4009
	ldq_u	t1, 0(a1)	# e0    : load first source word
Packit 6c4009
Packit 6c4009
	and	a0, 7, t4	# .. e1 : find dest misalignment
Packit 6c4009
	and	a1, 7, t5	# e0    : find src misalignment
Packit 6c4009
Packit 6c4009
	/* Conditionally load the first destination word and a bytemask
Packit 6c4009
	   with 0xff indicating that the destination byte is sacrosanct.  */
Packit 6c4009
Packit 6c4009
	mov	zero, t0	# .. e1 :
Packit 6c4009
	mov	zero, t6	# e0    :
Packit 6c4009
	beq	t4, 1f		# .. e1 :
Packit 6c4009
	ldq_u	t0, 0(a0)	# e0    :
Packit 6c4009
	lda	t6, -1		# .. e1 :
Packit 6c4009
	mskql	t6, a0, t6	# e0    :
Packit 6c4009
1:
Packit 6c4009
	subq	a1, t4, a1	# .. e1 : sub dest misalignment from src addr
Packit 6c4009
Packit 6c4009
	/* If source misalignment is larger than dest misalignment, we need
Packit 6c4009
	   extra startup checks to avoid SEGV.  */
Packit 6c4009
Packit 6c4009
	cmplt	t4, t5, t8	# e0    :
Packit 6c4009
	beq	t8, $u_head	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	lda	t2, -1		# e1    : mask out leading garbage in source
Packit 6c4009
	mskqh	t2, t5, t2	# e0    :
Packit 6c4009
	nop			# e0    :
Packit 6c4009
	ornot	t1, t2, t3	# .. e1 :
Packit 6c4009
	cmpbge	zero, t3, t7	# e0    : is there a zero?
Packit 6c4009
	beq	t7, $u_head	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* At this point we've found a zero in the first partial word of
Packit 6c4009
	   the source.  We need to isolate the valid source data and mask
Packit 6c4009
	   it into the original destination data.  (Incidentally, we know
Packit 6c4009
	   that we'll need at least one byte of that original dest word.) */
Packit 6c4009
Packit 6c4009
	ldq_u	t0, 0(a0)	# e0    :
Packit 6c4009
Packit 6c4009
	negq	t7, t6		# .. e1 : build bitmask of bytes <= zero
Packit 6c4009
	and	t6, t7, t8	# e0    :
Packit 6c4009
	and	a1, 7, t5	# .. e1 :
Packit 6c4009
	subq	t8, 1, t6	# e0    :
Packit 6c4009
	or	t6, t8, t7	# e1    :
Packit 6c4009
	srl	t8, t5, t8	# e0    : adjust final null return value
Packit 6c4009
Packit 6c4009
	zapnot	t2, t7, t2	# .. e1 : prepare source word; mirror changes
Packit 6c4009
	and	t1, t2, t1	# e1    : to source validity mask
Packit 6c4009
	extql	t2, a1, t2	# .. e0 :
Packit 6c4009
	extql	t1, a1, t1	# e0    :
Packit 6c4009
Packit 6c4009
	andnot	t0, t2, t0	# .. e1 : zero place for source to reside
Packit 6c4009
	or	t0, t1, t1	# e1    : and put it there
Packit 6c4009
	stq_u	t1, 0(a0)	# .. e0 :
Packit 6c4009
	ret	(t9)
Packit 6c4009
Packit 6c4009
	cfi_endproc