Blame sysdeps/alpha/strcmp.S

Packit 6c4009
/* Copyright (C) 1996-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Richard Henderson (rth@tamu.edu)
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
/* Bytewise compare two null-terminated strings.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.set noat
Packit 6c4009
	.set noreorder
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
Packit 6c4009
ENTRY(strcmp)
Packit 6c4009
#ifdef PROF
Packit 6c4009
	ldgp	gp, 0(pv)
Packit 6c4009
	lda	AT, _mcount
Packit 6c4009
	jmp	AT, (AT), _mcount
Packit 6c4009
	.prologue 1
Packit 6c4009
#else
Packit 6c4009
	.prologue 0
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	ldq_u	t0, 0(a0)	# e0    : give cache time to catch up
Packit 6c4009
	xor	a0, a1, t2	# .. e1 : are s1 and s2 co-aligned?
Packit 6c4009
	ldq_u	t1, 0(a1)	# e0    :
Packit 6c4009
	and	t2, 7, t2	# .. e1 :
Packit 6c4009
	lda	t3, -1		# e0    :
Packit 6c4009
	bne	t2, $unaligned	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* On entry to this basic block:
Packit 6c4009
	   t0 == the first destination word for masking back in
Packit 6c4009
	   t1 == the first source word.
Packit 6c4009
	   t3 == -1.  */
Packit 6c4009
Packit 6c4009
$aligned:
Packit 6c4009
	mskqh	t3, a0, t3	# e0    :
Packit 6c4009
	nop			# .. e1 :
Packit 6c4009
	ornot	t1, t3, t1	# e0    :
Packit 6c4009
	ornot	t0, t3, t0	# .. e1 :
Packit 6c4009
	cmpbge	zero, t1, t7	# e0    : bits set iff null found
Packit 6c4009
	bne	t7, $eos	# e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* Aligned compare main loop.
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == an s1 word.
Packit 6c4009
	   t1 == an s2 word not containing a null.  */
Packit 6c4009
Packit 6c4009
$a_loop:
Packit 6c4009
	xor	t0, t1, t2	# e0	:
Packit 6c4009
	bne	t2, $wordcmp	# .. e1 (zdb)
Packit 6c4009
	ldq_u	t1, 8(a1)	# e0    :
Packit 6c4009
	ldq_u	t0, 8(a0)	# .. e1 :
Packit 6c4009
	addq	a1, 8, a1	# e0    :
Packit 6c4009
	addq	a0, 8, a0	# .. e1 :
Packit 6c4009
	cmpbge	zero, t1, t7	# e0    :
Packit 6c4009
	beq	t7, $a_loop	# .. e1 (zdb)
Packit 6c4009
	br	$eos		# e1    :
Packit 6c4009
Packit 6c4009
	/* The two strings are not co-aligned.  Align s1 and cope.  */
Packit 6c4009
Packit 6c4009
$unaligned:
Packit 6c4009
	and	a0, 7, t4	# e0    : find s1 misalignment
Packit 6c4009
	and	a1, 7, t5	# .. e1 : find s2 misalignment
Packit 6c4009
	subq	a1, t4, a1	# e0    :
Packit 6c4009
Packit 6c4009
	/* If s2 misalignment is larger than s2 misalignment, we need
Packit 6c4009
	   extra startup checks to avoid SEGV.  */
Packit 6c4009
Packit 6c4009
	cmplt	t4, t5, t8	# .. e1 :
Packit 6c4009
	beq	t8, $u_head	# e1    :
Packit 6c4009
Packit 6c4009
	mskqh	t3, t5, t3	# e0    :
Packit 6c4009
	ornot	t1, t3, t3	# e0    :
Packit 6c4009
	cmpbge	zero, t3, t7	# e1    : is there a zero?
Packit 6c4009
	beq	t7, $u_head	# e1    :
Packit 6c4009
Packit 6c4009
	/* We've found a zero in the first partial word of s2.  Align
Packit 6c4009
	   our current s1 and s2 words and compare what we've got.  */
Packit 6c4009
Packit 6c4009
	extql	t1, t5, t1	# e0    :
Packit 6c4009
	extql	t0, a0, t0	# e0    :
Packit 6c4009
	cmpbge	zero, t1, t7	# .. e1 : find that zero again
Packit 6c4009
	br	$eos		# e1    : and finish up
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
$u_head:
Packit 6c4009
	/* We know just enough now to be able to assemble the first
Packit 6c4009
	   full word of s2.  We can still find a zero at the end of it.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == first word of s1
Packit 6c4009
	   t1 == first partial word of s2.  */
Packit 6c4009
Packit 6c4009
	ldq_u	t2, 8(a1)	# e0    : load second partial s2 word
Packit 6c4009
	lda	t3, -1		# .. e1 : create leading garbage mask
Packit 6c4009
	extql	t1, a1, t1	# e0    : create first s2 word
Packit 6c4009
	mskqh	t3, a0, t3	# e0    :
Packit 6c4009
	extqh	t2, a1, t4	# e0    :
Packit 6c4009
	ornot	t0, t3, t0	# .. e1 : kill s1 garbage
Packit 6c4009
	or	t1, t4, t1	# e0    : s2 word now complete
Packit 6c4009
	cmpbge	zero, t0, t7	# .. e1 : find zero in first s1 word
Packit 6c4009
	ornot	t1, t3, t1	# e0    : kill s2 garbage
Packit 6c4009
	lda	t3, -1		# .. e1 :
Packit 6c4009
	mskql	t3, a1, t3	# e0    : mask for s2[1] bits we have seen
Packit 6c4009
	bne	t7, $eos	# .. e1 :
Packit 6c4009
	xor	t0, t1, t4	# e0    : compare aligned words
Packit 6c4009
	bne	t4, $wordcmp	# .. e1 (zdb)
Packit 6c4009
	or	t2, t3, t3	# e0    :
Packit 6c4009
	cmpbge	zero, t3, t7	# e1    :
Packit 6c4009
	bne	t7, $u_final	# e1    :
Packit 6c4009
Packit 6c4009
	/* Unaligned copy main loop.  In order to avoid reading too much,
Packit 6c4009
	   the loop is structured to detect zeros in aligned words from s2.
Packit 6c4009
	   This has, unfortunately, effectively pulled half of a loop
Packit 6c4009
	   iteration out into the head and half into the tail, but it does
Packit 6c4009
	   prevent nastiness from accumulating in the very thing we want
Packit 6c4009
	   to run as fast as possible.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t2 == the unshifted low-bits from the next s2 word.  */
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
$u_loop:
Packit 6c4009
	extql	t2, a1, t3	# e0    :
Packit 6c4009
	ldq_u	t2, 16(a1)	# .. e1 : load next s2 high bits
Packit 6c4009
	ldq_u	t0, 8(a0)	# e0    : load next s1 word
Packit 6c4009
	addq	a1, 8, a1	# .. e1 :
Packit 6c4009
	addq	a0, 8, a0	# e0    :
Packit 6c4009
	nop			# .. e1 :
Packit 6c4009
	extqh	t2, a1, t1	# e0    :
Packit 6c4009
	cmpbge	zero, t0, t7	# .. e1 : find zero in current s1 word
Packit 6c4009
	or	t1, t3, t1	# e0    :
Packit 6c4009
	bne	t7, $eos	# .. e1 :
Packit 6c4009
	xor	t0, t1, t4	# e0    : compare the words
Packit 6c4009
	bne	t4, $wordcmp	# .. e1 (zdb)
Packit 6c4009
	cmpbge	zero, t2, t4	# e0    : find zero in next low bits
Packit 6c4009
	beq	t4, $u_loop	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* We've found a zero in the low bits of the last s2 word.  Get
Packit 6c4009
	   the next s1 word and align them.  */
Packit 6c4009
$u_final:
Packit 6c4009
	ldq_u	t0, 8(a0)	# e1    :
Packit 6c4009
	extql	t2, a1, t1	# .. e0 :
Packit 6c4009
	cmpbge	zero, t1, t7	# e0    :
Packit 6c4009
Packit 6c4009
	/* We've found a zero somewhere in a word we just read.
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == s1 word
Packit 6c4009
	   t1 == s2 word
Packit 6c4009
	   t7 == cmpbge mask containing the zero.  */
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
$eos:
Packit 6c4009
	negq	t7, t6		# e0    : create bytemask of valid data
Packit 6c4009
	and	t6, t7, t8	# e1    :
Packit 6c4009
	subq	t8, 1, t6	# e0    :
Packit 6c4009
	or	t6, t8, t7	# e1    :
Packit 6c4009
	zapnot	t0, t7, t0	# e0    : kill the garbage
Packit 6c4009
	zapnot	t1, t7, t1	# .. e1 :
Packit 6c4009
	xor	t0, t1, v0	# e0    : and compare
Packit 6c4009
	beq	v0, $done	# .. e1 :
Packit 6c4009
Packit 6c4009
	/* Here we have two differing co-aligned words in t0 & t1.
Packit 6c4009
	   Bytewise compare them and return (t0 > t1 ? 1 : -1).  */
Packit 6c4009
$wordcmp:
Packit 6c4009
	cmpbge	t0, t1, t2	# e0    : comparison yields bit mask of ge
Packit 6c4009
	cmpbge	t1, t0, t3	# .. e1 :
Packit 6c4009
	xor	t2, t3, t0	# e0    : bits set iff t0/t1 bytes differ
Packit 6c4009
	negq	t0, t1		# e1    : clear all but least bit
Packit 6c4009
	and	t0, t1, t0	# e0    :
Packit 6c4009
	lda	v0, -1		# .. e1 :
Packit 6c4009
	and	t0, t2, t1	# e0    : was bit set in t0 > t1?
Packit 6c4009
	cmovne	t1, 1, v0	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
$done:
Packit 6c4009
	ret			# e1    :
Packit 6c4009
Packit 6c4009
	END(strcmp)
Packit 6c4009
libc_hidden_builtin_def (strcmp)