Blame sysdeps/powerpc/powerpc64/power8/strrchr.S

Packit 6c4009
/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn.
Packit 6c4009
   Copyright (C) 2017-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
/* char *[r3] strrchr (char *s [r3], int c [r4])  */
Packit 6c4009
/* TODO: change these to the actual instructions when the minimum required
Packit 6c4009
   binutils allows it.  */
Packit 6c4009
#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
Packit 6c4009
#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
Packit 6c4009
#define VBPERMQ(t,a,b)  .long (0x1000054c \
Packit 6c4009
				| ((t)<<(32-11)) \
Packit 6c4009
				| ((a)<<(32-16)) \
Packit 6c4009
				| ((b)<<(32-21)) )
Packit 6c4009
#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21)))
Packit 6c4009
#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21)))
Packit 6c4009
#define VADDUQM(t,a,b)  .long (0x10000100 \
Packit 6c4009
				| ((t)<<(32-11)) \
Packit 6c4009
				| ((a)<<(32-16)) \
Packit 6c4009
				| ((b)<<(32-21)) )
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
/* Find the match position from v6 and place result in r6.  */
Packit 6c4009
# define CALCULATE_MATCH() \
Packit 6c4009
	VBPERMQ(v6, v6, v10); \
Packit 6c4009
	vsldoi	v6, v6, v6, 6; \
Packit 6c4009
	MFVRD(r7, v6); \
Packit 6c4009
	cntlzd	r6, r7; \
Packit 6c4009
	subfic	r6, r6, 15;
Packit 6c4009
/*
Packit 6c4009
 * Find the first null position to mask bytes after null.
Packit 6c4009
 * (reg): vcmpequb result: v2 for 1st qw v3 for 2nd qw.
Packit 6c4009
 * Result placed at v2.
Packit 6c4009
 */
Packit 6c4009
# define FIND_NULL_POS(reg) \
Packit 6c4009
	vspltisb	v11, -1; \
Packit 6c4009
	VADDUQM(v11, reg, v11); \
Packit 6c4009
	vandc	v11, v11, reg; \
Packit 6c4009
	VPOPCNTD(v2, v11); \
Packit 6c4009
	vspltb	v11, v2, 15; \
Packit 6c4009
	vcmpequb.	v11, v11, v9; \
Packit 6c4009
	blt	cr6, 1f; \
Packit 6c4009
	vsldoi	v9, v0, v9, 1; \
Packit 6c4009
	vslo	v2, v2, v9; \
Packit 6c4009
1: \
Packit 6c4009
	vsumsws	v2, v2, v0;
Packit 6c4009
#else
Packit 6c4009
# define CALCULATE_MATCH() \
Packit 6c4009
	VBPERMQ(v6, v6, v10); \
Packit 6c4009
	MFVRD(r7, v6); \
Packit 6c4009
	addi	r6, r7, -1; \
Packit 6c4009
	andc	r6, r6, r7; \
Packit 6c4009
	popcntd	r6, r6; \
Packit 6c4009
	subfic	r6, r6, 15;
Packit 6c4009
# define FIND_NULL_POS(reg) \
Packit 6c4009
	VCLZD(v2, reg); \
Packit 6c4009
	vspltb	v11, v2, 7; \
Packit 6c4009
	vcmpequb.	v11, v11, v9; \
Packit 6c4009
	blt	cr6, 1f; \
Packit 6c4009
	vsldoi	v9, v0, v9, 1; \
Packit 6c4009
	vsro	v2, v2, v9; \
Packit 6c4009
1: \
Packit 6c4009
	vsumsws	v2, v2, v0;
Packit 6c4009
#endif	/* !__LITTLE_ENDIAN__  */
Packit 6c4009
Packit 6c4009
#ifndef STRRCHR
Packit 6c4009
# define STRRCHR strrchr
Packit 6c4009
#endif
Packit 6c4009
	.machine  power7
Packit 6c4009
ENTRY_TOCLESS (STRRCHR)
Packit 6c4009
	CALL_MCOUNT 2
Packit 6c4009
	dcbt	0,r3
Packit 6c4009
	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
Packit 6c4009
	cmpdi	cr7,r4,0
Packit 6c4009
	ld	r12,0(r8)     /* Load doubleword from memory.  */
Packit 6c4009
	li	r9,0	      /* Used to store last occurence.  */
Packit 6c4009
	li	r0,0	      /* Doubleword with null chars to use
Packit 6c4009
				 with cmpb.  */
Packit 6c4009
Packit 6c4009
	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
Packit 6c4009
Packit 6c4009
	beq	cr7,L(null_match)
Packit 6c4009
Packit 6c4009
	/* Replicate byte to doubleword.  */
Packit 6c4009
	insrdi	r4,r4,8,48
Packit 6c4009
	insrdi	r4,r4,16,32
Packit 6c4009
	insrdi	r4,r4,32,0
Packit 6c4009
Packit 6c4009
	/* r4 is changed now.  If it's passed more chars, then
Packit 6c4009
	   check for null again.  */
Packit 6c4009
	cmpdi	cr7,r4,0
Packit 6c4009
	beq	cr7,L(null_match)
Packit 6c4009
	/* Now r4 has a doubleword of c bytes and r0 has
Packit 6c4009
	   a doubleword of null bytes.  */
Packit 6c4009
Packit 6c4009
	cmpb	r10,r12,r4     /* Compare each byte against c byte.  */
Packit 6c4009
	cmpb	r11,r12,r0     /* Compare each byte against null byte.  */
Packit 6c4009
Packit 6c4009
	/* Move the doublewords left and right to discard the bits that are
Packit 6c4009
	   not part of the string and bring them back as zeros.  */
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	srd	r10,r10,r6
Packit 6c4009
	srd	r11,r11,r6
Packit 6c4009
	sld	r10,r10,r6
Packit 6c4009
	sld	r11,r11,r6
Packit 6c4009
#else
Packit 6c4009
	sld	r10,r10,r6
Packit 6c4009
	sld	r11,r11,r6
Packit 6c4009
	srd	r10,r10,r6
Packit 6c4009
	srd	r11,r11,r6
Packit 6c4009
#endif
Packit 6c4009
	or	r5,r10,r11    /* OR the results to speed things up.  */
Packit 6c4009
	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
Packit 6c4009
				 have been found.  */
Packit 6c4009
	bne	cr7,L(done)
Packit 6c4009
Packit 6c4009
L(align):
Packit 6c4009
	andi.	r12, r8, 15
Packit 6c4009
Packit 6c4009
	/* Are we now aligned to a doubleword boundary?  If so, skip to
Packit 6c4009
	   the main loop.  Otherwise, go through the alignment code.  */
Packit 6c4009
Packit 6c4009
	bne	cr0, L(loop)
Packit 6c4009
Packit 6c4009
	/* Handle WORD2 of pair.  */
Packit 6c4009
	ldu	r12,8(r8)
Packit 6c4009
	cmpb	r10,r12,r4
Packit 6c4009
	cmpb	r11,r12,r0
Packit 6c4009
	or	r5,r10,r11
Packit 6c4009
	cmpdi	cr7,r5,0
Packit 6c4009
	bne	cr7,L(done)
Packit 6c4009
	b	L(loop)	      /* We branch here (rather than falling through)
Packit 6c4009
				 to skip the nops due to heavy alignment
Packit 6c4009
				 of the loop below.  */
Packit 6c4009
	.p2align  5
Packit 6c4009
L(loop):
Packit 6c4009
	/* Load two doublewords, compare and merge in a
Packit 6c4009
	   single register for speed.  This is an attempt
Packit 6c4009
	   to speed up the null-checking process for bigger strings.  */
Packit 6c4009
	ld	r12,8(r8)
Packit 6c4009
	ldu	r7,16(r8)
Packit 6c4009
	cmpb	r10,r12,r4
Packit 6c4009
	cmpb	r11,r12,r0
Packit 6c4009
	cmpb	r6,r7,r4
Packit 6c4009
	cmpb	r7,r7,r0
Packit 6c4009
	or	r12,r10,r11
Packit 6c4009
	or	r5,r6,r7
Packit 6c4009
	or	r5,r12,r5
Packit 6c4009
	cmpdi	cr7,r5,0
Packit 6c4009
	beq	cr7,L(vector)
Packit 6c4009
Packit 6c4009
	/* OK, one (or both) of the doublewords contains a c/null byte.  Check
Packit 6c4009
	   the first doubleword and decrement the address in case the first
Packit 6c4009
	   doubleword really contains a c/null byte.  */
Packit 6c4009
	cmpdi	cr6,r12,0
Packit 6c4009
	addi	r8,r8,-8
Packit 6c4009
	bne	cr6,L(done)
Packit 6c4009
Packit 6c4009
	/* The c/null byte must be in the second doubleword.  Adjust the
Packit 6c4009
	   address again and move the result of cmpb to r10 so we can calculate
Packit 6c4009
	   the pointer.  */
Packit 6c4009
Packit 6c4009
	mr	r10,r6
Packit 6c4009
	mr	r11,r7
Packit 6c4009
	addi	r8,r8,8
Packit 6c4009
Packit 6c4009
	/* r10/r11 have the output of the cmpb instructions, that is,
Packit 6c4009
	   0xff in the same position as the c/null byte in the original
Packit 6c4009
	   doubleword from the string.  Use that to calculate the pointer.  */
Packit 6c4009
Packit 6c4009
L(done):
Packit 6c4009
	/* If there are more than one 0xff in r11, find the first position of
Packit 6c4009
	   0xff in r11 and fill r10 with 0 from that position.  */
Packit 6c4009
	cmpdi	cr7,r11,0
Packit 6c4009
	beq	cr7,L(no_null)
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	addi	r3,r11,-1
Packit 6c4009
	andc	r3,r3,r11
Packit 6c4009
	popcntd r0,r3
Packit 6c4009
#else
Packit 6c4009
	cntlzd	r0,r11
Packit 6c4009
#endif
Packit 6c4009
	subfic	r0,r0,63
Packit 6c4009
	li	r6,-1
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	srd	r0,r6,r0
Packit 6c4009
#else
Packit 6c4009
	sld	r0,r6,r0
Packit 6c4009
#endif
Packit 6c4009
	and	r10,r0,r10
Packit 6c4009
L(no_null):
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	cntlzd	r0,r10		/* Count leading zeros before c matches.  */
Packit 6c4009
	addi	r3,r10,-1
Packit 6c4009
	andc	r3,r3,r10
Packit 6c4009
	addi	r10,r11,-1
Packit 6c4009
	andc	r10,r10,r11
Packit 6c4009
	cmpld	cr7,r3,r10
Packit 6c4009
	bgt	cr7,L(no_match)
Packit 6c4009
#else
Packit 6c4009
	addi	r3,r10,-1	/* Count trailing zeros before c matches.  */
Packit 6c4009
	andc	r3,r3,r10
Packit 6c4009
	popcntd	r0,r3
Packit 6c4009
	cmpld	cr7,r11,r10
Packit 6c4009
	bgt	cr7,L(no_match)
Packit 6c4009
#endif
Packit 6c4009
	srdi	r0,r0,3		/* Convert trailing zeros to bytes.  */
Packit 6c4009
	subfic	r0,r0,7
Packit 6c4009
	add	r9,r8,r0      /* Return address of the matching c byte
Packit 6c4009
				 or null in case c was not found.  */
Packit 6c4009
	li	r0,0
Packit 6c4009
	cmpdi	cr7,r11,0     /* If r11 == 0, no null's have been found.  */
Packit 6c4009
	beq	cr7,L(align)
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
L(no_match):
Packit 6c4009
	mr	r3,r9
Packit 6c4009
	blr
Packit 6c4009
Packit 6c4009
/* Check the first 32B in GPR's and move to vectorized loop.  */
Packit 6c4009
	.p2align  5
Packit 6c4009
L(vector):
Packit 6c4009
	addi	r3, r8, 8
Packit 6c4009
	/* Make sure 32B aligned.  */
Packit 6c4009
	andi.	r10, r3, 31
Packit 6c4009
	bne	cr0, L(loop)
Packit 6c4009
	vspltisb	v0, 0
Packit 6c4009
	/* Precompute vbpermq constant.  */
Packit 6c4009
	vspltisb	v10, 3
Packit 6c4009
	lvsl	v11, r0, r0
Packit 6c4009
	vslb	v10, v11, v10
Packit 6c4009
	MTVRD(v1, r4)
Packit 6c4009
	li	r5, 16
Packit 6c4009
	vspltb	v1, v1, 7
Packit 6c4009
	/* Compare 32 bytes in each loop.  */
Packit 6c4009
L(continue):
Packit 6c4009
	lvx	v4, 0, r3
Packit 6c4009
	lvx	v5, r3, r5
Packit 6c4009
	vcmpequb	v2, v0, v4
Packit 6c4009
	vcmpequb	v3, v0, v5
Packit 6c4009
	vcmpequb	v6, v1, v4
Packit 6c4009
	vcmpequb	v7, v1, v5
Packit 6c4009
	vor	v8, v2, v3
Packit 6c4009
	vor	v9, v6, v7
Packit 6c4009
	vor	v11, v8, v9
Packit 6c4009
	vcmpequb.	v11, v0, v11
Packit 6c4009
	addi	r3, r3, 32
Packit 6c4009
	blt	cr6, L(continue)
Packit 6c4009
	vcmpequb.	v8, v0, v8
Packit 6c4009
	blt	cr6, L(match)
Packit 6c4009
Packit 6c4009
	/* One (or both) of the quadwords contains c/null.  */
Packit 6c4009
	vspltisb	v8, 2
Packit 6c4009
	vspltisb	v9, 5
Packit 6c4009
	/* Precompute values used for comparison.  */
Packit 6c4009
	vsl	v9, v8, v9	/* v9 = 0x4040404040404040.  */
Packit 6c4009
	vaddubm	v8, v9, v9
Packit 6c4009
	vsldoi	v8, v0, v8, 1	/* v8 = 0x80.  */
Packit 6c4009
Packit 6c4009
	/* Check if null is in second qw.  */
Packit 6c4009
	vcmpequb.	v11, v0, v2
Packit 6c4009
	blt	cr6, L(secondqw)
Packit 6c4009
Packit 6c4009
	/* Null found in first qw.  */
Packit 6c4009
	addi	r8, r3, -32
Packit 6c4009
	/* Calculate the null position.  */
Packit 6c4009
	FIND_NULL_POS(v2)
Packit 6c4009
	/* Check if null is in the first byte.  */
Packit 6c4009
	vcmpequb.	v11, v0, v2
Packit 6c4009
	blt	cr6, L(no_match)
Packit 6c4009
	vsububm	v2, v8, v2
Packit 6c4009
	/* Mask unwanted bytes after null.  */
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	vslo	v6, v6, v2
Packit 6c4009
	vsro	v6, v6, v2
Packit 6c4009
#else
Packit 6c4009
	vsro	v6, v6, v2
Packit 6c4009
	vslo	v6, v6, v2
Packit 6c4009
#endif
Packit 6c4009
	vcmpequb.	v11, v0, v6
Packit 6c4009
	blt	cr6, L(no_match)
Packit 6c4009
	/* Found a match before null.  */
Packit 6c4009
	CALCULATE_MATCH()
Packit 6c4009
	add	r3, r8, r6
Packit 6c4009
	blr
Packit 6c4009
Packit 6c4009
L(secondqw):
Packit 6c4009
	addi	r8, r3, -16
Packit 6c4009
	FIND_NULL_POS(v3)
Packit 6c4009
	vcmpequb.	v11, v0, v2
Packit 6c4009
	blt	cr6, L(no_match1)
Packit 6c4009
	vsububm	v2, v8, v2
Packit 6c4009
	/* Mask unwanted bytes after null.  */
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	vslo	v7, v7, v2
Packit 6c4009
	vsro	v7, v7, v2
Packit 6c4009
#else
Packit 6c4009
	vsro	v7, v7, v2
Packit 6c4009
	vslo	v7, v7, v2
Packit 6c4009
#endif
Packit 6c4009
	vcmpequb.	v11, v0, v7
Packit 6c4009
	blt	cr6, L(no_match1)
Packit 6c4009
	addi	r8, r8, 16
Packit 6c4009
	vor	v6, v0, v7
Packit 6c4009
L(no_match1):
Packit 6c4009
	addi	r8, r8, -16
Packit 6c4009
	vcmpequb.	v11, v0, v6
Packit 6c4009
	blt	cr6, L(no_match)
Packit 6c4009
	/* Found a match before null.  */
Packit 6c4009
	CALCULATE_MATCH()
Packit 6c4009
	add	r3, r8, r6
Packit 6c4009
	blr
Packit 6c4009
Packit 6c4009
L(match):
Packit 6c4009
	/* One (or both) of the quadwords contains a match.  */
Packit 6c4009
	mr	r8, r3
Packit 6c4009
	vcmpequb.	v8, v0, v7
Packit 6c4009
	blt	cr6, L(firstqw)
Packit 6c4009
	/* Match found in second qw.  */
Packit 6c4009
	addi	r8, r8, 16
Packit 6c4009
	vor	v6, v0, v7
Packit 6c4009
L(firstqw):
Packit 6c4009
	addi	r8, r8, -32
Packit 6c4009
	CALCULATE_MATCH()
Packit 6c4009
	add	r9, r8, r6      /* Compute final length.  */
Packit 6c4009
	b	L(continue)
Packit 6c4009
/* We are here because strrchr was called with a null byte.  */
Packit 6c4009
	.align	4
Packit 6c4009
L(null_match):
Packit 6c4009
	/* r0 has a doubleword of null bytes.  */
Packit 6c4009
Packit 6c4009
	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
Packit 6c4009
Packit 6c4009
	/* Move the doublewords left and right to discard the bits that are
Packit 6c4009
	   not part of the string and bring them back as zeros.  */
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	srd	r5,r5,r6
Packit 6c4009
	sld	r5,r5,r6
Packit 6c4009
#else
Packit 6c4009
	sld	r5,r5,r6
Packit 6c4009
	srd	r5,r5,r6
Packit 6c4009
#endif
Packit 6c4009
	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
Packit 6c4009
				 have been found.  */
Packit 6c4009
	bne	cr7,L(done_null)
Packit 6c4009
Packit 6c4009
	andi.	r12, r8, 15
Packit 6c4009
Packit 6c4009
	/* Are we now aligned to a quadword boundary?  If so, skip to
Packit 6c4009
	   the main loop.  Otherwise, go through the alignment code.  */
Packit 6c4009
Packit 6c4009
	bne	cr0, L(loop_null)
Packit 6c4009
Packit 6c4009
	/* Handle WORD2 of pair.  */
Packit 6c4009
	ldu	r12,8(r8)
Packit 6c4009
	cmpb	r5,r12,r0
Packit 6c4009
	cmpdi	cr7,r5,0
Packit 6c4009
	bne	cr7,L(done_null)
Packit 6c4009
	b	L(loop_null)  /* We branch here (rather than falling through)
Packit 6c4009
				 to skip the nops due to heavy alignment
Packit 6c4009
				 of the loop below.  */
Packit 6c4009
Packit 6c4009
	/* Main loop to look for the end of the string.  Since it's a
Packit 6c4009
	   small loop (< 8 instructions), align it to 32-bytes.  */
Packit 6c4009
	.p2align  5
Packit 6c4009
L(loop_null):
Packit 6c4009
	/* Load two doublewords, compare and merge in a
Packit 6c4009
	   single register for speed.  This is an attempt
Packit 6c4009
	   to speed up the null-checking process for bigger strings.  */
Packit 6c4009
	ld	r12,8(r8)
Packit 6c4009
	ldu	r11,16(r8)
Packit 6c4009
	cmpb	r5,r12,r0
Packit 6c4009
	cmpb	r10,r11,r0
Packit 6c4009
	or	r6,r5,r10
Packit 6c4009
	cmpdi	cr7,r6,0
Packit 6c4009
	beq	cr7,L(vector1)
Packit 6c4009
Packit 6c4009
	/* OK, one (or both) of the doublewords contains a null byte.  Check
Packit 6c4009
	   the first doubleword and decrement the address in case the first
Packit 6c4009
	   doubleword really contains a null byte.  */
Packit 6c4009
Packit 6c4009
	cmpdi	cr6,r5,0
Packit 6c4009
	addi	r8,r8,-8
Packit 6c4009
	bne	cr6,L(done_null)
Packit 6c4009
Packit 6c4009
	/* The null byte must be in the second doubleword.  Adjust the address
Packit 6c4009
	   again and move the result of cmpb to r10 so we can calculate the
Packit 6c4009
	   pointer.  */
Packit 6c4009
Packit 6c4009
	mr	r5,r10
Packit 6c4009
	addi	r8,r8,8
Packit 6c4009
Packit 6c4009
	/* r5 has the output of the cmpb instruction, that is, it contains
Packit 6c4009
	   0xff in the same position as the null byte in the original
Packit 6c4009
	   doubleword from the string.  Use that to calculate the pointer.  */
Packit 6c4009
L(done_null):
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	addi	r0,r5,-1
Packit 6c4009
	andc	r0,r0,r5
Packit 6c4009
	popcntd	r0,r0
Packit 6c4009
#else
Packit 6c4009
	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
Packit 6c4009
#endif
Packit 6c4009
	srdi	r0,r0,3	      /* Convert trailing zeros to bytes.  */
Packit 6c4009
	add	r3,r8,r0      /* Return address of the matching null byte.  */
Packit 6c4009
	blr
Packit 6c4009
/* Check the first 32B in GPR's and move to vectorized loop.  */
Packit 6c4009
	.p2align  5
Packit 6c4009
L(vector1):
Packit 6c4009
	addi	r3, r8, 8
Packit 6c4009
	/* Make sure 32B aligned.  */
Packit 6c4009
	andi.	r10, r3, 31
Packit 6c4009
	bne	cr0, L(loop_null)
Packit 6c4009
	vspltisb	v0, 0
Packit 6c4009
	/* Precompute vbpermq constant.  */
Packit 6c4009
	vspltisb	v10, 3
Packit 6c4009
	lvsl	v11, r0, r0
Packit 6c4009
	vslb	v10, v11, v10
Packit 6c4009
	li	r5, 16
Packit 6c4009
	/* Compare 32 bytes in each loop.  */
Packit 6c4009
L(continue1):
Packit 6c4009
	lvx	v4, 0, r3
Packit 6c4009
	lvx	v5, r3, r5
Packit 6c4009
	vcmpequb	v2, v0, v4
Packit 6c4009
	vcmpequb	v3, v0, v5
Packit 6c4009
	vor	v8, v2, v3
Packit 6c4009
	vcmpequb.	v11, v0, v8
Packit 6c4009
	addi	r3, r3, 32
Packit 6c4009
	blt	cr6, L(continue1)
Packit 6c4009
	addi	r3, r3, -32
Packit 6c4009
	VBPERMQ(v2, v2, v10)
Packit 6c4009
	VBPERMQ(v3, v3, v10)
Packit 6c4009
	/* Shift each component into its correct position for merging.  */
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	vsldoi	v3, v3, v3, 2
Packit 6c4009
#else
Packit 6c4009
	vsldoi	v2, v2, v2, 6
Packit 6c4009
	vsldoi	v3, v3, v3, 4
Packit 6c4009
#endif
Packit 6c4009
	/* Merge the results and move to a GPR.  */
Packit 6c4009
	vor	v4, v3, v2
Packit 6c4009
	MFVRD(r5, v4)
Packit 6c4009
#ifdef __LITTLE_ENDIAN__
Packit 6c4009
	addi	r6, r5, -1
Packit 6c4009
	andc	r6, r6, r5
Packit 6c4009
	popcntd	r6, r6
Packit 6c4009
#else
Packit 6c4009
	cntlzd	r6, r5  /* Count leading zeros before the match.  */
Packit 6c4009
#endif
Packit 6c4009
	add	r3, r3, r6      /* Compute final length.  */
Packit 6c4009
	blr
Packit 6c4009
END_GEN_TB (STRRCHR, TB_TOCLESS)
Packit 6c4009
weak_alias (strrchr, rindex)
Packit 6c4009
libc_hidden_builtin_def (strrchr)