Blame sysdeps/aarch64/strrchr.S

Packit Service 82fcde
/* strrchr: find the last instance of a character in a string.
Packit Service 82fcde
Packit Service 82fcde
   Copyright (C) 2014-2018 Free Software Foundation, Inc.
Packit Service 82fcde
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library.  If not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
Packit Service 82fcde
/* Assumptions:
Packit Service 82fcde
 *
Packit Service 82fcde
 * ARMv8-a, AArch64
Packit Service 82fcde
 * Neon Available.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
/* Arguments and results.  */
Packit Service 82fcde
#define srcin		x0
Packit Service 82fcde
#define chrin		w1
Packit Service 82fcde
Packit Service 82fcde
#define result		x0
Packit Service 82fcde
Packit Service 82fcde
#define src		x2
Packit Service 82fcde
#define	tmp1		x3
Packit Service 82fcde
#define wtmp2		w4
Packit Service 82fcde
#define tmp3		x5
Packit Service 82fcde
#define src_match	x6
Packit Service 82fcde
#define src_offset	x7
Packit Service 82fcde
#define const_m1	x8
Packit Service 82fcde
#define tmp4		x9
Packit Service 82fcde
#define nul_match	x10
Packit Service 82fcde
#define chr_match	x11
Packit Service 82fcde
Packit Service 82fcde
#define vrepchr		v0
Packit Service 82fcde
#define vdata1		v1
Packit Service 82fcde
#define vdata2		v2
Packit Service 82fcde
#define vhas_nul1	v3
Packit Service 82fcde
#define vhas_nul2	v4
Packit Service 82fcde
#define vhas_chr1	v5
Packit Service 82fcde
#define vhas_chr2	v6
Packit Service 82fcde
#define vrepmask_0	v7
Packit Service 82fcde
#define vrepmask_c	v16
Packit Service 82fcde
#define vend1		v17
Packit Service 82fcde
#define vend2		v18
Packit Service 82fcde
Packit Service 82fcde
/* Core algorithm.
Packit Service 82fcde
Packit Service 82fcde
   For each 32-byte hunk we calculate a 64-bit syndrome value, with
Packit Service 82fcde
   two bits per byte (LSB is always in bits 0 and 1, for both big
Packit Service 82fcde
   and little-endian systems).  For each tuple, bit 0 is set iff
Packit Service 82fcde
   the relevant byte matched the requested character; bit 1 is set
Packit Service 82fcde
   iff the relevant byte matched the NUL end of string (we trigger
Packit Service 82fcde
   off bit0 for the special case of looking for NUL).  Since the bits
Packit Service 82fcde
   in the syndrome reflect exactly the order in which things occur
Packit Service 82fcde
   in the original string a count_trailing_zeros() operation will
Packit Service 82fcde
   identify exactly which byte is causing the termination, and why.  */
Packit Service 82fcde
Packit Service 82fcde
ENTRY(strrchr)
Packit Service 82fcde
	DELOUSE (0)
Packit Service 82fcde
	cbz	x1, L(null_search)
Packit Service 82fcde
	/* Magic constant 0x40100401 to allow us to identify which lane
Packit Service 82fcde
	   matches the requested byte.  Magic constant 0x80200802 used
Packit Service 82fcde
	   similarly for NUL termination.  */
Packit Service 82fcde
	mov	wtmp2, #0x0401
Packit Service 82fcde
	movk	wtmp2, #0x4010, lsl #16
Packit Service 82fcde
	dup	vrepchr.16b, chrin
Packit Service 82fcde
	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
Packit Service 82fcde
	dup	vrepmask_c.4s, wtmp2
Packit Service 82fcde
	mov	src_offset, #0
Packit Service 82fcde
	ands	tmp1, srcin, #31
Packit Service 82fcde
	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
Packit Service 82fcde
	b.eq	L(aligned)
Packit Service 82fcde
Packit Service 82fcde
	/* Input string is not 32-byte aligned.  Rather than forcing
Packit Service 82fcde
	   the padding bytes to a safe value, we calculate the syndrome
Packit Service 82fcde
	   for all the bytes, but then mask off those bits of the
Packit Service 82fcde
	   syndrome that are related to the padding.  */
Packit Service 82fcde
	ld1	{vdata1.16b, vdata2.16b}, [src], #32
Packit Service 82fcde
	neg	tmp1, tmp1
Packit Service 82fcde
	cmeq	vhas_nul1.16b, vdata1.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
Packit Service 82fcde
	cmeq	vhas_nul2.16b, vdata2.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
Packit Service 82fcde
	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
Packit Service 82fcde
	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
Packit Service 82fcde
	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
Packit Service 82fcde
	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
Packit Service 82fcde
	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
Packit Service 82fcde
	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
Packit Service 82fcde
	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b	// 128->64
Packit Service 82fcde
	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b	// 128->64
Packit Service 82fcde
	mov	nul_match, vhas_nul1.2d[0]
Packit Service 82fcde
	lsl	tmp1, tmp1, #1
Packit Service 82fcde
	mov	const_m1, #~0
Packit Service 82fcde
	mov	chr_match, vhas_chr1.2d[0]
Packit Service 82fcde
	lsr	tmp3, const_m1, tmp1
Packit Service 82fcde
Packit Service 82fcde
	bic	nul_match, nul_match, tmp3	// Mask padding bits.
Packit Service 82fcde
	bic	chr_match, chr_match, tmp3	// Mask padding bits.
Packit Service 82fcde
	cbnz	nul_match, L(tail)
Packit Service 82fcde
Packit Service 82fcde
L(loop):
Packit Service 82fcde
	cmp	chr_match, #0
Packit Service 82fcde
	csel	src_match, src, src_match, ne
Packit Service 82fcde
	csel	src_offset, chr_match, src_offset, ne
Packit Service 82fcde
L(aligned):
Packit Service 82fcde
	ld1	{vdata1.16b, vdata2.16b}, [src], #32
Packit Service 82fcde
	cmeq	vhas_nul1.16b, vdata1.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
Packit Service 82fcde
	cmeq	vhas_nul2.16b, vdata2.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
Packit Service 82fcde
	addp	vend1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
Packit Service 82fcde
	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
Packit Service 82fcde
	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
Packit Service 82fcde
	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
Packit Service 82fcde
	addp	vend1.16b, vend1.16b, vend1.16b	// 128->64
Packit Service 82fcde
	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b	// 128->64
Packit Service 82fcde
	mov	nul_match, vend1.2d[0]
Packit Service 82fcde
	mov	chr_match, vhas_chr1.2d[0]
Packit Service 82fcde
	cbz	nul_match, L(loop)
Packit Service 82fcde
Packit Service 82fcde
	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
Packit Service 82fcde
	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
Packit Service 82fcde
	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
Packit Service 82fcde
	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
Packit Service 82fcde
	mov	nul_match, vhas_nul1.2d[0]
Packit Service 82fcde
Packit Service 82fcde
L(tail):
Packit Service 82fcde
	/* Work out exactly where the string ends.  */
Packit Service 82fcde
	sub	tmp4, nul_match, #1
Packit Service 82fcde
	eor	tmp4, tmp4, nul_match
Packit Service 82fcde
	ands	chr_match, chr_match, tmp4
Packit Service 82fcde
	/* And pick the values corresponding to the last match.  */
Packit Service 82fcde
	csel	src_match, src, src_match, ne
Packit Service 82fcde
	csel	src_offset, chr_match, src_offset, ne
Packit Service 82fcde
Packit Service 82fcde
	/* Count down from the top of the syndrome to find the last match.  */
Packit Service 82fcde
	clz	tmp3, src_offset
Packit Service 82fcde
	/* Src_match points beyond the word containing the match, so we can
Packit Service 82fcde
	   simply subtract half the bit-offset into the syndrome.  Because
Packit Service 82fcde
	   we are counting down, we need to go back one more character.  */
Packit Service 82fcde
	add	tmp3, tmp3, #2
Packit Service 82fcde
	sub	result, src_match, tmp3, lsr #1
Packit Service 82fcde
	/* But if the syndrome shows no match was found, then return NULL.  */
Packit Service 82fcde
	cmp	src_offset, #0
Packit Service 82fcde
	csel	result, result, xzr, ne
Packit Service 82fcde
Packit Service 82fcde
	ret
Packit Service 82fcde
L(null_search):
Packit Service 82fcde
	b	__strchrnul
Packit Service 82fcde
Packit Service 82fcde
END(strrchr)
Packit Service 82fcde
weak_alias (strrchr, rindex)
Packit Service 82fcde
libc_hidden_builtin_def (strrchr)