Blame sysdeps/alpha/strncmp.S

Packit 6c4009
/* Copyright (C) 1996-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Richard Henderson (rth@tamu.edu)
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
/* Bytewise compare two null-terminated strings of length no longer than N.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.set noat
Packit 6c4009
	.set noreorder
Packit 6c4009
Packit 6c4009
/* EV6 only predicts one branch per octaword.  We'll use these to push
Packit 6c4009
   subsequent branches back to the next bundle.  This will generally add
Packit 6c4009
   a fetch+decode cycle to older machines, so skip in that case.  */
Packit 6c4009
#ifdef __alpha_fix__
Packit 6c4009
# define ev6_unop	unop
Packit 6c4009
#else
Packit 6c4009
# define ev6_unop
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
Packit 6c4009
ENTRY(strncmp)
Packit 6c4009
#ifdef PROF
Packit 6c4009
	ldgp	gp, 0(pv)
Packit 6c4009
	lda	AT, _mcount
Packit 6c4009
	jsr	AT, (AT), _mcount
Packit 6c4009
	.prologue 1
Packit 6c4009
#else
Packit 6c4009
	.prologue 0
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	xor	a0, a1, t2	# are s1 and s2 co-aligned?
Packit 6c4009
	beq	a2, $zerolength
Packit 6c4009
	ldq_u	t0, 0(a0)	# load asap to give cache time to catch up
Packit 6c4009
	ldq_u	t1, 0(a1)
Packit 6c4009
	lda	t3, -1
Packit 6c4009
	and	t2, 7, t2
Packit 6c4009
	srl	t3, 1, t6
Packit 6c4009
	and	a0, 7, t4	# find s1 misalignment
Packit 6c4009
	and	a1, 7, t5	# find s2 misalignment
Packit 6c4009
	cmovlt	a2, t6, a2	# bound neg count to LONG_MAX
Packit 6c4009
	addq	a1, a2, a3	# s2+count
Packit 6c4009
	addq	a2, t4, a2	# bias count by s1 misalignment
Packit 6c4009
	and	a2, 7, t10	# ofs of last byte in s1 last word
Packit 6c4009
	srl	a2, 3, a2	# remaining full words in s1 count
Packit 6c4009
	bne	t2, $unaligned
Packit 6c4009
Packit 6c4009
	/* On entry to this basic block:
Packit 6c4009
	   t0 == the first word of s1.
Packit 6c4009
	   t1 == the first word of s2.
Packit 6c4009
	   t3 == -1.  */
Packit 6c4009
$aligned:
Packit 6c4009
	mskqh	t3, a1, t8	# mask off leading garbage
Packit 6c4009
	ornot	t1, t8, t1
Packit 6c4009
	ornot	t0, t8, t0
Packit 6c4009
	cmpbge	zero, t1, t7	# bits set iff null found
Packit 6c4009
	beq	a2, $eoc	# check end of count
Packit 6c4009
	bne	t7, $eos
Packit 6c4009
	beq	t10, $ant_loop
Packit 6c4009
Packit 6c4009
	/* Aligned compare main loop.
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == an s1 word.
Packit 6c4009
	   t1 == an s2 word not containing a null.  */
Packit 6c4009
Packit 6c4009
	.align 4
Packit 6c4009
$a_loop:
Packit 6c4009
	xor	t0, t1, t2	# e0	:
Packit 6c4009
	bne	t2, $wordcmp	# .. e1 (zdb)
Packit 6c4009
	ldq_u	t1, 8(a1)	# e0    :
Packit 6c4009
	ldq_u	t0, 8(a0)	# .. e1 :
Packit 6c4009
Packit 6c4009
	subq	a2, 1, a2	# e0    :
Packit 6c4009
	addq	a1, 8, a1	# .. e1 :
Packit 6c4009
	addq	a0, 8, a0	# e0    :
Packit 6c4009
	beq	a2, $eoc	# .. e1 :
Packit 6c4009
Packit 6c4009
	cmpbge	zero, t1, t7	# e0    :
Packit 6c4009
	beq	t7, $a_loop	# .. e1 :
Packit 6c4009
Packit 6c4009
	br	$eos
Packit 6c4009
Packit 6c4009
	/* Alternate aligned compare loop, for when there's no trailing
Packit 6c4009
	   bytes on the count.  We have to avoid reading too much data.  */
Packit 6c4009
	.align 4
Packit 6c4009
$ant_loop:
Packit 6c4009
	xor	t0, t1, t2	# e0	:
Packit 6c4009
	ev6_unop
Packit 6c4009
	ev6_unop
Packit 6c4009
	bne	t2, $wordcmp	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	subq	a2, 1, a2	# e0    :
Packit 6c4009
	beq	a2, $zerolength	# .. e1 :
Packit 6c4009
	ldq_u	t1, 8(a1)	# e0    :
Packit 6c4009
	ldq_u	t0, 8(a0)	# .. e1 :
Packit 6c4009
Packit 6c4009
	addq	a1, 8, a1	# e0    :
Packit 6c4009
	addq	a0, 8, a0	# .. e1 :
Packit 6c4009
	cmpbge	zero, t1, t7	# e0    :
Packit 6c4009
	beq	t7, $ant_loop	# .. e1 :
Packit 6c4009
Packit 6c4009
	br	$eos
Packit 6c4009
Packit 6c4009
	/* The two strings are not co-aligned.  Align s1 and cope.  */
Packit 6c4009
	/* On entry to this basic block:
Packit 6c4009
	   t0 == the first word of s1.
Packit 6c4009
	   t1 == the first word of s2.
Packit 6c4009
	   t3 == -1.
Packit 6c4009
	   t4 == misalignment of s1.
Packit 6c4009
	   t5 == misalignment of s2.
Packit 6c4009
	  t10 == misalignment of s1 end.  */
Packit 6c4009
	.align	4
Packit 6c4009
$unaligned:
Packit 6c4009
	/* If s1 misalignment is larger than s2 misalignment, we need
Packit 6c4009
	   extra startup checks to avoid SEGV.  */
Packit 6c4009
	subq	a1, t4, a1	# adjust s2 for s1 misalignment
Packit 6c4009
	cmpult	t4, t5, t9
Packit 6c4009
	subq	a3, 1, a3	# last byte of s2
Packit 6c4009
	bic	a1, 7, t8
Packit 6c4009
	mskqh	t3, t5, t7	# mask garbage in s2
Packit 6c4009
	subq	a3, t8, a3
Packit 6c4009
	ornot	t1, t7, t7
Packit 6c4009
	srl	a3, 3, a3	# remaining full words in s2 count
Packit 6c4009
	beq	t9, $u_head
Packit 6c4009
Packit 6c4009
	/* Failing that, we need to look for both eos and eoc within the
Packit 6c4009
	   first word of s2.  If we find either, we can continue by
Packit 6c4009
	   pretending that the next word of s2 is all zeros.  */
Packit 6c4009
	lda	t2, 0		# next = zero
Packit 6c4009
	cmpeq	a3, 0, t8	# eoc in the first word of s2?
Packit 6c4009
	cmpbge	zero, t7, t7	# eos in the first word of s2?
Packit 6c4009
	or	t7, t8, t8
Packit 6c4009
	bne	t8, $u_head_nl
Packit 6c4009
Packit 6c4009
	/* We know just enough now to be able to assemble the first
Packit 6c4009
	   full word of s2.  We can still find a zero at the end of it.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == first word of s1
Packit 6c4009
	   t1 == first partial word of s2.
Packit 6c4009
	   t3 == -1.
Packit 6c4009
	   t10 == ofs of last byte in s1 last word.
Packit 6c4009
	   t11 == ofs of last byte in s2 last word.  */
Packit 6c4009
$u_head:
Packit 6c4009
	ldq_u	t2, 8(a1)	# load second partial s2 word
Packit 6c4009
	subq	a3, 1, a3
Packit 6c4009
$u_head_nl:
Packit 6c4009
	extql	t1, a1, t1	# create first s2 word
Packit 6c4009
	mskqh	t3, a0, t8
Packit 6c4009
	extqh	t2, a1, t4
Packit 6c4009
	ornot	t0, t8, t0	# kill s1 garbage
Packit 6c4009
	or	t1, t4, t1	# s2 word now complete
Packit 6c4009
	cmpbge	zero, t0, t7	# find eos in first s1 word
Packit 6c4009
	ornot	t1, t8, t1	# kill s2 garbage
Packit 6c4009
	beq	a2, $eoc
Packit 6c4009
	subq	a2, 1, a2
Packit 6c4009
	bne	t7, $eos
Packit 6c4009
	mskql	t3, a1, t8	# mask out s2[1] bits we have seen
Packit 6c4009
	xor	t0, t1, t4	# compare aligned words
Packit 6c4009
	or	t2, t8, t8
Packit 6c4009
	bne	t4, $wordcmp
Packit 6c4009
	cmpbge	zero, t8, t7	# eos in high bits of s2[1]?
Packit 6c4009
	cmpeq	a3, 0, t8	# eoc in s2[1]?
Packit 6c4009
	or	t7, t8, t7
Packit 6c4009
	bne	t7, $u_final
Packit 6c4009
Packit 6c4009
	/* Unaligned copy main loop.  In order to avoid reading too much,
Packit 6c4009
	   the loop is structured to detect zeros in aligned words from s2.
Packit 6c4009
	   This has, unfortunately, effectively pulled half of a loop
Packit 6c4009
	   iteration out into the head and half into the tail, but it does
Packit 6c4009
	   prevent nastiness from accumulating in the very thing we want
Packit 6c4009
	   to run as fast as possible.
Packit 6c4009
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t2 == the unshifted low-bits from the next s2 word.
Packit 6c4009
	   t10 == ofs of last byte in s1 last word.
Packit 6c4009
	   t11 == ofs of last byte in s2 last word.  */
Packit 6c4009
	.align 4
Packit 6c4009
$u_loop:
Packit 6c4009
	extql	t2, a1, t3	# e0    :
Packit 6c4009
	ldq_u	t2, 16(a1)	# .. e1 : load next s2 high bits
Packit 6c4009
	ldq_u	t0, 8(a0)	# e0    : load next s1 word
Packit 6c4009
	addq	a1, 8, a1	# .. e1 :
Packit 6c4009
Packit 6c4009
	addq	a0, 8, a0	# e0    :
Packit 6c4009
	subq	a3, 1, a3	# .. e1 :
Packit 6c4009
	extqh	t2, a1, t1	# e0    :
Packit 6c4009
	cmpbge	zero, t0, t7	# .. e1 : eos in current s1 word
Packit 6c4009
Packit 6c4009
	or	t1, t3, t1	# e0    :
Packit 6c4009
	beq	a2, $eoc	# .. e1 : eoc in current s1 word
Packit 6c4009
	subq	a2, 1, a2	# e0    :
Packit 6c4009
	cmpbge	zero, t2, t4	# .. e1 : eos in s2[1]
Packit 6c4009
Packit 6c4009
	xor	t0, t1, t3	# e0    : compare the words
Packit 6c4009
	ev6_unop
Packit 6c4009
	ev6_unop
Packit 6c4009
	bne	t7, $eos	# .. e1 :
Packit 6c4009
Packit 6c4009
	cmpeq	a3, 0, t5	# e0    : eoc in s2[1]
Packit 6c4009
	ev6_unop
Packit 6c4009
	ev6_unop
Packit 6c4009
	bne	t3, $wordcmp	# .. e1 :
Packit 6c4009
Packit 6c4009
	or	t4, t5, t4	# e0    : eos or eoc in s2[1].
Packit 6c4009
	beq	t4, $u_loop	# .. e1 (zdb)
Packit 6c4009
Packit 6c4009
	/* We've found a zero in the low bits of the last s2 word.  Get
Packit 6c4009
	   the next s1 word and align them.  */
Packit 6c4009
	.align 3
Packit 6c4009
$u_final:
Packit 6c4009
	ldq_u	t0, 8(a0)
Packit 6c4009
	extql	t2, a1, t1
Packit 6c4009
	cmpbge	zero, t1, t7
Packit 6c4009
	bne	a2, $eos
Packit 6c4009
Packit 6c4009
	/* We've hit end of count.  Zero everything after the count
Packit 6c4009
	   and compare whats left.  */
Packit 6c4009
	.align 3
Packit 6c4009
$eoc:
Packit 6c4009
	mskql	t0, t10, t0
Packit 6c4009
	mskql	t1, t10, t1
Packit 6c4009
	cmpbge	zero, t1, t7
Packit 6c4009
Packit 6c4009
	/* We've found a zero somewhere in a word we just read.
Packit 6c4009
	   On entry to this basic block:
Packit 6c4009
	   t0 == s1 word
Packit 6c4009
	   t1 == s2 word
Packit 6c4009
	   t7 == cmpbge mask containing the zero.  */
Packit 6c4009
	.align 3
Packit 6c4009
$eos:
Packit 6c4009
	negq	t7, t6		# create bytemask of valid data
Packit 6c4009
	and	t6, t7, t8
Packit 6c4009
	subq	t8, 1, t6
Packit 6c4009
	or	t6, t8, t7
Packit 6c4009
	zapnot	t0, t7, t0	# kill the garbage
Packit 6c4009
	zapnot	t1, t7, t1
Packit 6c4009
	xor	t0, t1, v0	# ... and compare
Packit 6c4009
	beq	v0, $done
Packit 6c4009
Packit 6c4009
	/* Here we have two differing co-aligned words in t0 & t1.
Packit 6c4009
	   Bytewise compare them and return (t0 > t1 ? 1 : -1).  */
Packit 6c4009
	.align 3
Packit 6c4009
$wordcmp:
Packit 6c4009
	cmpbge	t0, t1, t2	# comparison yields bit mask of ge
Packit 6c4009
	cmpbge	t1, t0, t3
Packit 6c4009
	xor	t2, t3, t0	# bits set iff t0/t1 bytes differ
Packit 6c4009
	negq	t0, t1		# clear all but least bit
Packit 6c4009
	and	t0, t1, t0
Packit 6c4009
	lda	v0, -1
Packit 6c4009
	and	t0, t2, t1	# was bit set in t0 > t1?
Packit 6c4009
	cmovne	t1, 1, v0
Packit 6c4009
$done:
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.align 3
Packit 6c4009
$zerolength:
Packit 6c4009
	clr	v0
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	END(strncmp)
Packit 6c4009
libc_hidden_builtin_def (strncmp)