Blame sysdeps/ia64/memchr.S

Packit Service 82fcde
/* Optimized version of the standard memchr() function.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
   Copyright (C) 2000-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   Contributed by Dan Pop <Dan.Pop@cern.ch>.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
/* Return: the address of the first occurence of chr in str or NULL
Packit Service 82fcde
Packit Service 82fcde
   Inputs:
Packit Service 82fcde
	in0:	str
Packit Service 82fcde
	in1:	chr
Packit Service 82fcde
	in2:	byte count
Packit Service 82fcde
Packit Service 82fcde
   This implementation assumes little endian mode.  For big endian mode,
Packit Service 82fcde
   the instruction czx1.r should be replaced by czx1.l.
Packit Service 82fcde
Packit Service 82fcde
   The algorithm is fairly straightforward: search byte by byte until we
Packit Service 82fcde
   we get to a word aligned address, then search word by word as much as
Packit Service 82fcde
   possible; the remaining few bytes are searched one at a time.
Packit Service 82fcde
Packit Service 82fcde
   The word by word search is performed by xor-ing the word with a word
Packit Service 82fcde
   containing chr in every byte.  If there is a hit, the result will
Packit Service 82fcde
   contain a zero byte in the corresponding position.  The presence and
Packit Service 82fcde
   position of that zero byte is detected with a czx instruction.
Packit Service 82fcde
Packit Service 82fcde
   All the loops in this function could have had the internal branch removed
Packit Service 82fcde
   if br.ctop and br.cloop could be predicated :-(.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
#undef ret
Packit Service 82fcde
Packit Service 82fcde
#define saved_pr	r15
Packit Service 82fcde
#define saved_lc	r16
Packit Service 82fcde
#define	chr		r17
Packit Service 82fcde
#define len		r18
Packit Service 82fcde
#define last		r20
Packit Service 82fcde
#define val		r21
Packit Service 82fcde
#define tmp		r24
Packit Service 82fcde
#define chrx8		r25
Packit Service 82fcde
#define loopcnt		r30
Packit Service 82fcde
Packit Service 82fcde
#define str		in0
Packit Service 82fcde
Packit Service 82fcde
ENTRY(__memchr)
Packit Service 82fcde
	.prologue
Packit Service 82fcde
	alloc r2 = ar.pfs, 3, 0, 29, 32
Packit Service 82fcde
#include "softpipe.h"
Packit Service 82fcde
	.rotr	value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2]
Packit Service 82fcde
	.rotp	p[MEMLAT+3]
Packit Service 82fcde
	.save ar.lc, saved_lc
Packit Service 82fcde
	mov	saved_lc = ar.lc	// save the loop counter
Packit Service 82fcde
	.save pr, saved_pr
Packit Service 82fcde
	mov	saved_pr = pr		// save the predicates
Packit Service 82fcde
	.body
Packit Service 82fcde
	mov	ret0 = str
Packit Service 82fcde
	add	last = str, in2		// last byte
Packit Service 82fcde
	;;
Packit Service 82fcde
	cmp.ltu	p6, p0 = last, str
Packit Service 82fcde
	;;
Packit Service 82fcde
(p6)	mov	last = -1
Packit Service 82fcde
	and	tmp = 7, str		// tmp = str % 8
Packit Service 82fcde
	cmp.ne	p7, p0 = r0, r0		// clear p7
Packit Service 82fcde
	extr.u	chr = in1, 0, 8		// chr = (unsigned char) in1
Packit Service 82fcde
	mov	len = in2
Packit Service 82fcde
	cmp.gtu	p6, p0 = 16, in2	// use a simple loop for short
Packit Service 82fcde
(p6)	br.cond.spnt .srchfew ;;	// searches
Packit Service 82fcde
	sub	loopcnt = 8, tmp	// loopcnt = 8 - tmp
Packit Service 82fcde
	cmp.eq	p6, p0 = tmp, r0
Packit Service 82fcde
(p6)	br.cond.sptk	.str_aligned;;
Packit Service 82fcde
	sub	len = len, loopcnt
Packit Service 82fcde
	adds	loopcnt = -1, loopcnt;;
Packit Service 82fcde
	mov	ar.lc = loopcnt
Packit Service 82fcde
.l1:
Packit Service 82fcde
	ld1	val = [ret0], 1
Packit Service 82fcde
	;;
Packit Service 82fcde
	cmp.eq	p6, p0 = val, chr
Packit Service 82fcde
(p6)	br.cond.spnt	.foundit
Packit Service 82fcde
	br.cloop.sptk	.l1 ;;
Packit Service 82fcde
.str_aligned:
Packit Service 82fcde
	cmp.ne	p6, p0 = r0, r0		// clear p6
Packit Service 82fcde
	shr.u	loopcnt = len, 3	// loopcnt = len / 8
Packit Service 82fcde
	and	len = 7, len ;;		// remaining len = len & 7
Packit Service 82fcde
	adds	loopcnt = -1, loopcnt
Packit Service 82fcde
	mov	ar.ec = MEMLAT + 3
Packit Service 82fcde
	mux1	chrx8 = chr, @brcst ;;	// get a word full of chr
Packit Service 82fcde
	mov	ar.lc = loopcnt
Packit Service 82fcde
	mov	pr.rot = 1 << 16 ;;
Packit Service 82fcde
.l2:
Packit Service 82fcde
(p[0])		mov	addr[0] = ret0
Packit Service 82fcde
(p[0])		ld8.s	value[0] = [ret0], 8	 // speculative load
Packit Service 82fcde
(p[MEMLAT])	chk.s	value[MEMLAT], .recovery // check and recovery
Packit Service 82fcde
(p[MEMLAT])	xor	aux[0] = value[MEMLAT], chrx8
Packit Service 82fcde
(p[MEMLAT+1])	czx1.r	poschr[0] = aux[1]
Packit Service 82fcde
(p[MEMLAT+2])	cmp.ne	p7, p0 = 8, poschr[1]
Packit Service 82fcde
(p7)		br.cond.dpnt .foundit
Packit Service 82fcde
		br.ctop.dptk .l2
Packit Service 82fcde
.srchfew:
Packit Service 82fcde
	adds	loopcnt = -1, len
Packit Service 82fcde
	cmp.eq	p6, p0 = len, r0
Packit Service 82fcde
(p6)	br.cond.spnt .notfound ;;
Packit Service 82fcde
	mov	ar.lc = loopcnt
Packit Service 82fcde
.l3:
Packit Service 82fcde
	ld1	val = [ret0], 1
Packit Service 82fcde
	;;
Packit Service 82fcde
	cmp.eq	p6, p0 = val, chr
Packit Service 82fcde
(p6)	br.cond.dpnt	.foundit
Packit Service 82fcde
	br.cloop.sptk	.l3 ;;
Packit Service 82fcde
.notfound:
Packit Service 82fcde
	cmp.ne	p6, p0 = r0, r0	// clear p6 (p7 was already 0 when we got here)
Packit Service 82fcde
	mov	ret0 = r0 ;;	// return NULL
Packit Service 82fcde
.foundit:
Packit Service 82fcde
	.pred.rel "mutex" p6, p7
Packit Service 82fcde
(p6)	adds	ret0 = -1, ret0			   // if we got here from l1 or l3
Packit Service 82fcde
(p7)	add	ret0 = addr[MEMLAT+2], poschr[1]   // if we got here from l2
Packit Service 82fcde
	mov	pr = saved_pr, -1
Packit Service 82fcde
	mov	ar.lc = saved_lc
Packit Service 82fcde
	br.ret.sptk.many b0
Packit Service 82fcde
Packit Service 82fcde
.recovery:
Packit Service 82fcde
#if MEMLAT != 6
Packit Service 82fcde
# error "MEMLAT must be 6!"
Packit Service 82fcde
#endif
Packit Service 82fcde
(p[MEMLAT-6])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT-5])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT-4])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT-3])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT-2])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT-1])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT+1])	add	ret0 = -8, ret0;;
Packit Service 82fcde
(p[MEMLAT+2])	add	ret0 = -8, ret0;;
Packit Service 82fcde
.l4:
Packit Service 82fcde
	mov     addr[MEMLAT+2] = ret0
Packit Service 82fcde
	ld8	tmp = [ret0];;		// load the first unchecked 8byte
Packit Service 82fcde
	xor	aux[1] = tmp, chrx8;;
Packit Service 82fcde
	czx1.r	poschr[1] = aux[1];;
Packit Service 82fcde
	cmp.ne	p7, p0 = 8, poschr[1];;
Packit Service 82fcde
(p7)	add	ret0 = addr[MEMLAT+2], poschr[1];;
Packit Service 82fcde
(p7)	cmp.geu	p6, p7 = ret0, last	// don't go over the last byte
Packit Service 82fcde
(p6)	br.cond.spnt	.notfound;;
Packit Service 82fcde
(p7)	br.cond.spnt	.foundit;;
Packit Service 82fcde
	adds	ret0 = 8, ret0		// load the next unchecked 8byte
Packit Service 82fcde
	br.sptk	.l4;;
Packit Service 82fcde
Packit Service 82fcde
END(__memchr)
Packit Service 82fcde
Packit Service 82fcde
weak_alias (__memchr, memchr)
Packit Service 82fcde
libc_hidden_builtin_def (memchr)