Blame sysdeps/alpha/alphaev67/strchr.S

Packit 6c4009
/* Copyright (C) 2000-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Richard Henderson <rth@tamu.edu>, 1996.
Packit 6c4009
   EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
/* Return the address of a given character within a null-terminated
Packit 6c4009
   string, or null if it is not found.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.arch ev6
Packit 6c4009
	.set noreorder
Packit 6c4009
	.set noat
Packit 6c4009
Packit 6c4009
ENTRY(strchr)
Packit 6c4009
#ifdef PROF
Packit 6c4009
	ldgp	gp, 0(pv)
Packit 6c4009
	lda	AT, _mcount
Packit 6c4009
	jsr	AT, (AT), _mcount
Packit 6c4009
	.prologue 1
Packit 6c4009
#else
Packit 6c4009
	.prologue 0
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	ldq_u   t0, 0(a0)	# L : load first quadword Latency=3
Packit 6c4009
	and	a1, 0xff, t3	# E : 00000000000000ch
Packit 6c4009
	insbl	a1, 1, t5	# U : 000000000000ch00
Packit 6c4009
	insbl	a1, 7, a2	# U : ch00000000000000
Packit 6c4009
Packit 6c4009
	insbl	t3, 6, a3	# U : 00ch000000000000
Packit 6c4009
	or	t5, t3, a1	# E : 000000000000chch
Packit 6c4009
	andnot  a0, 7, v0	# E : align our loop pointer
Packit 6c4009
	lda	t4, -1		# E : build garbage mask
Packit 6c4009
Packit 6c4009
	mskqh	t4, a0, t4	# U : only want relevant part of first quad
Packit 6c4009
	or	a2, a3, a2	# E : chch000000000000
Packit 6c4009
	inswl	a1, 2, t5	# E : 00000000chch0000
Packit 6c4009
	inswl	a1, 4, a3	# E : 0000chch00000000
Packit 6c4009
Packit 6c4009
	or	a1, a2, a1	# E : chch00000000chch
Packit 6c4009
	or	a3, t5, t5	# E : 0000chchchch0000
Packit 6c4009
	cmpbge  zero, t0, t2	# E : bits set iff byte == zero
Packit 6c4009
	cmpbge	zero, t4, t4	# E : bits set iff byte is garbage
Packit 6c4009
Packit 6c4009
	/* This quad is _very_ serialized.  Lots of stalling happens */
Packit 6c4009
	or	t5, a1, a1	# E : chchchchchchchch
Packit 6c4009
	xor	t0, a1, t1	# E : make bytes == c zero
Packit 6c4009
	cmpbge  zero, t1, t3	# E : bits set iff byte == c
Packit 6c4009
	or	t2, t3, t0	# E : bits set iff char match or zero match
Packit 6c4009
Packit 6c4009
	andnot	t0, t4, t0	# E : clear garbage bits
Packit 6c4009
	cttz	t0, a2		# U0 : speculative (in case we get a match)
Packit 6c4009
	nop			# E :
Packit 6c4009
	bne	t0, $found	# U :
Packit 6c4009
Packit 6c4009
	/*
Packit 6c4009
	 * Yuk.  This loop is going to stall like crazy waiting for the
Packit 6c4009
	 * data to be loaded.  Not much can be done about it unless it's
Packit 6c4009
	 * unrolled multiple times, which is generally unsafe.
Packit 6c4009
	 */
Packit 6c4009
$loop:
Packit 6c4009
	ldq	t0, 8(v0)	# L : Latency=3
Packit 6c4009
	addq	v0, 8, v0	# E :
Packit 6c4009
	xor	t0, a1, t1	# E :
Packit 6c4009
	cmpbge	zero, t0, t2	# E : bits set iff byte == 0
Packit 6c4009
Packit 6c4009
	cmpbge	zero, t1, t3	# E : bits set iff byte == c
Packit 6c4009
	or	t2, t3, t0	# E :
Packit 6c4009
	cttz	t3, a2		# U0 : speculative (in case we get a match)
Packit 6c4009
	beq	t0, $loop	# U :
Packit 6c4009
Packit 6c4009
$found:
Packit 6c4009
	negq    t0, t1		# E : clear all but least set bit
Packit 6c4009
	and     t0, t1, t0	# E :
Packit 6c4009
	and	t0, t3, t1	# E : bit set iff byte was the char
Packit 6c4009
	addq	v0, a2, v0	# E : Add in the bit number from above
Packit 6c4009
Packit 6c4009
	cmoveq	t1, $31, v0	# E : Two mapping slots, latency = 2
Packit 6c4009
	nop
Packit 6c4009
	nop
Packit 6c4009
	ret			# L0 :
Packit 6c4009
Packit 6c4009
	END(strchr)
Packit 6c4009
Packit 6c4009
weak_alias (strchr, index)
Packit 6c4009
libc_hidden_builtin_def (strchr)