Blame sysdeps/powerpc/powerpc32/strchr.S

Packit 6c4009
/* Optimized strchr implementation for PowerPC.
Packit 6c4009
   Copyright (C) 1997-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
/* See strlen.s for comments on how this works.  */
Packit 6c4009
Packit 6c4009
/* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
Packit 6c4009
Packit 6c4009
ENTRY (strchr)
Packit 6c4009
Packit 6c4009
#define rTMP1	r0
Packit 6c4009
#define rRTN	r3	/* outgoing result */
Packit 6c4009
#define rSTR	r8	/* current word pointer */
Packit 6c4009
#define rCHR	r4	/* byte we're looking for, spread over the whole word */
Packit 6c4009
#define rWORD	r5	/* the current word */
Packit 6c4009
#define rCLZB	rCHR	/* leading zero byte count */
Packit 6c4009
#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
Packit 6c4009
#define r7F7F	r7	/* constant 0x7f7f7f7f */
Packit 6c4009
#define rTMP2	r9
Packit 6c4009
#define rIGN	r10	/* number of bits we should ignore in the first word */
Packit 6c4009
#define rMASK	r11	/* mask with the bits to ignore set to 0 */
Packit 6c4009
#define rTMP3	r12
Packit 6c4009
#define rTMP4	rIGN
Packit 6c4009
#define rTMP5	rMASK
Packit 6c4009
Packit 6c4009
Packit 6c4009
	rlwimi	rCHR, rCHR, 8, 16, 23
Packit 6c4009
	li	rMASK, -1
Packit 6c4009
	rlwimi	rCHR, rCHR, 16, 0, 15
Packit 6c4009
	rlwinm	rIGN, rRTN, 3, 27, 28
Packit 6c4009
	lis	rFEFE, -0x101
Packit 6c4009
	lis	r7F7F, 0x7f7f
Packit 6c4009
	clrrwi	rSTR, rRTN, 2
Packit 6c4009
	addi	rFEFE, rFEFE, -0x101
Packit 6c4009
	addi	r7F7F, r7F7F, 0x7f7f
Packit 6c4009
/* Test the first (partial?) word.  */
Packit 6c4009
	lwz	rWORD, 0(rSTR)
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	slw	rMASK, rMASK, rIGN
Packit 6c4009
#else
Packit 6c4009
	srw	rMASK, rMASK, rIGN
Packit 6c4009
#endif
Packit 6c4009
	orc	rWORD, rWORD, rMASK
Packit 6c4009
	add	rTMP1, rFEFE, rWORD
Packit 6c4009
	nor	rTMP2, r7F7F, rWORD
Packit 6c4009
	and.	rTMP4, rTMP1, rTMP2
Packit 6c4009
	xor	rTMP3, rCHR, rWORD
Packit 6c4009
	orc	rTMP3, rTMP3, rMASK
Packit 6c4009
	b	L(loopentry)
Packit 6c4009
Packit 6c4009
/* The loop.  */
Packit 6c4009
Packit 6c4009
L(loop):
Packit 6c4009
	lwzu	rWORD, 4(rSTR)
Packit 6c4009
	and.	rTMP5, rTMP1, rTMP2
Packit 6c4009
/* Test for 0.	*/
Packit 6c4009
	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
Packit 6c4009
	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
Packit 6c4009
	bne	L(foundit)
Packit 6c4009
	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
Packit 6c4009
/* Start test for the bytes we're looking for.  */
Packit 6c4009
	xor	rTMP3, rCHR, rWORD
Packit 6c4009
L(loopentry):
Packit 6c4009
	add	rTMP1, rFEFE, rTMP3
Packit 6c4009
	nor	rTMP2, r7F7F, rTMP3
Packit 6c4009
	beq	L(loop)
Packit 6c4009
Packit 6c4009
/* There is a zero byte in the word, but may also be a matching byte (either
Packit 6c4009
   before or after the zero byte).  In fact, we may be looking for a
Packit 6c4009
   zero byte, in which case we return a match.  */
Packit 6c4009
	and.	rTMP5, rTMP1, rTMP2
Packit 6c4009
	li	rRTN, 0
Packit 6c4009
	beqlr
Packit 6c4009
/* At this point:
Packit 6c4009
   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
Packit 6c4009
   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
Packit 6c4009
   But there may be false matches in the next most significant byte from
Packit 6c4009
   a true match due to carries.  This means we need to recalculate the
Packit 6c4009
   matches using a longer method for big-endian.  */
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	addi	rTMP1, rTMP5, -1
Packit 6c4009
	andc	rTMP1, rTMP1, rTMP5
Packit 6c4009
	cntlzw	rCLZB, rTMP1
Packit 6c4009
	addi	rTMP2, rTMP4, -1
Packit 6c4009
	andc	rTMP2, rTMP2, rTMP4
Packit 6c4009
	cmplw	rTMP1, rTMP2
Packit 6c4009
	bgtlr
Packit 6c4009
	subfic	rCLZB, rCLZB, 32-7
Packit 6c4009
#else
Packit 6c4009
/* I think we could reduce this by two instructions by keeping the "nor"
Packit 6c4009
   results from the loop for reuse here.  See strlen.S tail.  Similarly
Packit 6c4009
   one instruction could be pruned from L(foundit).  */
Packit 6c4009
	and	rFEFE, r7F7F, rWORD
Packit 6c4009
	or	rTMP5, r7F7F, rWORD
Packit 6c4009
	and	rTMP1, r7F7F, rTMP3
Packit 6c4009
	or	rTMP4, r7F7F, rTMP3
Packit 6c4009
	add	rFEFE, rFEFE, r7F7F
Packit 6c4009
	add	rTMP1, rTMP1, r7F7F
Packit 6c4009
	nor	rWORD, rTMP5, rFEFE
Packit 6c4009
	nor	rTMP2, rTMP4, rTMP1
Packit 6c4009
	cntlzw	rCLZB, rTMP2
Packit 6c4009
	cmplw	rWORD, rTMP2
Packit 6c4009
	bgtlr
Packit 6c4009
#endif
Packit 6c4009
	srwi	rCLZB, rCLZB, 3
Packit 6c4009
	add	rRTN, rSTR, rCLZB
Packit 6c4009
	blr
Packit 6c4009
Packit 6c4009
L(foundit):
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	addi	rTMP1, rTMP5, -1
Packit 6c4009
	andc	rTMP1, rTMP1, rTMP5
Packit 6c4009
	cntlzw	rCLZB, rTMP1
Packit 6c4009
	subfic	rCLZB, rCLZB, 32-7-32
Packit 6c4009
	srawi	rCLZB, rCLZB, 3
Packit 6c4009
#else
Packit 6c4009
	and	rTMP1, r7F7F, rTMP3
Packit 6c4009
	or	rTMP4, r7F7F, rTMP3
Packit 6c4009
	add	rTMP1, rTMP1, r7F7F
Packit 6c4009
	nor	rTMP2, rTMP4, rTMP1
Packit 6c4009
	cntlzw	rCLZB, rTMP2
Packit 6c4009
	subi	rSTR, rSTR, 4
Packit 6c4009
	srwi	rCLZB, rCLZB, 3
Packit 6c4009
#endif
Packit 6c4009
	add	rRTN, rSTR, rCLZB
Packit 6c4009
	blr
Packit 6c4009
END (strchr)
Packit 6c4009
Packit 6c4009
weak_alias (strchr, index)
Packit 6c4009
libc_hidden_builtin_def (strchr)