Blame sysdeps/aarch64/strchrnul.S

Packit Service 82fcde
/* strchrnul - find a character or nul in a string
Packit Service 82fcde
Packit Service 82fcde
   Copyright (C) 2014-2018 Free Software Foundation, Inc.
Packit Service 82fcde
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library.  If not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
Packit Service 82fcde
/* Assumptions:
Packit Service 82fcde
 *
Packit Service 82fcde
 * ARMv8-a, AArch64
Packit Service 82fcde
 * Neon Available.
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
/* Arguments and results.  */
Packit Service 82fcde
#define srcin		x0
Packit Service 82fcde
#define chrin		w1
Packit Service 82fcde
Packit Service 82fcde
#define result		x0
Packit Service 82fcde
Packit Service 82fcde
/* Locals and temporaries.  */
Packit Service 82fcde
Packit Service 82fcde
#define src		x2
Packit Service 82fcde
#define tmp1		x3
Packit Service 82fcde
#define wtmp2		w4
Packit Service 82fcde
#define tmp3		x5
Packit Service 82fcde
Packit Service 82fcde
#define vrepchr		v0
Packit Service 82fcde
#define vdata1		v1
Packit Service 82fcde
#define vdata2		v2
Packit Service 82fcde
#define vhas_nul1	v3
Packit Service 82fcde
#define vhas_nul2	v4
Packit Service 82fcde
#define vhas_chr1	v5
Packit Service 82fcde
#define vhas_chr2	v6
Packit Service 82fcde
#define vrepmask	v7
Packit Service 82fcde
#define vend1		v16
Packit Service 82fcde
Packit Service 82fcde
/* Core algorithm.
Packit Service 82fcde
Packit Service 82fcde
   For each 32-byte hunk we calculate a 64-bit syndrome value, with
Packit Service 82fcde
   two bits per byte (LSB is always in bits 0 and 1, for both big
Packit Service 82fcde
   and little-endian systems).  For each tuple, bit 0 is set iff
Packit Service 82fcde
   the relevant byte matched the requested character or nul.  Since the
Packit Service 82fcde
   bits in the syndrome reflect exactly the order in which things occur
Packit Service 82fcde
   in the original string a count_trailing_zeros() operation will
Packit Service 82fcde
   identify exactly which byte is causing the termination.  */
Packit Service 82fcde
Packit Service 82fcde
ENTRY (__strchrnul)
Packit Service 82fcde
	DELOUSE (0)
Packit Service 82fcde
	/* Magic constant 0x40100401 to allow us to identify which lane
Packit Service 82fcde
	   matches the termination condition.  */
Packit Service 82fcde
	mov	wtmp2, #0x0401
Packit Service 82fcde
	movk	wtmp2, #0x4010, lsl #16
Packit Service 82fcde
	dup	vrepchr.16b, chrin
Packit Service 82fcde
	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
Packit Service 82fcde
	dup	vrepmask.4s, wtmp2
Packit Service 82fcde
	ands	tmp1, srcin, #31
Packit Service 82fcde
	b.eq	L(loop)
Packit Service 82fcde
Packit Service 82fcde
	/* Input string is not 32-byte aligned.  Rather than forcing
Packit Service 82fcde
	   the padding bytes to a safe value, we calculate the syndrome
Packit Service 82fcde
	   for all the bytes, but then mask off those bits of the
Packit Service 82fcde
	   syndrome that are related to the padding.  */
Packit Service 82fcde
	ld1	{vdata1.16b, vdata2.16b}, [src], #32
Packit Service 82fcde
	neg	tmp1, tmp1
Packit Service 82fcde
	cmeq	vhas_nul1.16b, vdata1.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
Packit Service 82fcde
	cmeq	vhas_nul2.16b, vdata2.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
Packit Service 82fcde
	orr	vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
Packit Service 82fcde
	orr	vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
Packit Service 82fcde
	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
Packit Service 82fcde
	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
Packit Service 82fcde
	lsl	tmp1, tmp1, #1
Packit Service 82fcde
	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
Packit Service 82fcde
	mov	tmp3, #~0
Packit Service 82fcde
	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
Packit Service 82fcde
	lsr	tmp1, tmp3, tmp1
Packit Service 82fcde
Packit Service 82fcde
	mov	tmp3, vend1.2d[0]
Packit Service 82fcde
	bic	tmp1, tmp3, tmp1	// Mask padding bits.
Packit Service 82fcde
	cbnz	tmp1, L(tail)
Packit Service 82fcde
Packit Service 82fcde
L(loop):
Packit Service 82fcde
	ld1	{vdata1.16b, vdata2.16b}, [src], #32
Packit Service 82fcde
	cmeq	vhas_nul1.16b, vdata1.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
Packit Service 82fcde
	cmeq	vhas_nul2.16b, vdata2.16b, #0
Packit Service 82fcde
	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
Packit Service 82fcde
	/* Use a fast check for the termination condition.  */
Packit Service 82fcde
	orr	vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
Packit Service 82fcde
	orr	vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
Packit Service 82fcde
	orr	vend1.16b, vhas_chr1.16b, vhas_chr2.16b
Packit Service 82fcde
	addp	vend1.2d, vend1.2d, vend1.2d
Packit Service 82fcde
	mov	tmp1, vend1.2d[0]
Packit Service 82fcde
	cbz	tmp1, L(loop)
Packit Service 82fcde
Packit Service 82fcde
	/* Termination condition found.  Now need to establish exactly why
Packit Service 82fcde
	   we terminated.  */
Packit Service 82fcde
	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
Packit Service 82fcde
	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
Packit Service 82fcde
	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
Packit Service 82fcde
	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
Packit Service 82fcde
Packit Service 82fcde
	mov	tmp1, vend1.2d[0]
Packit Service 82fcde
L(tail):
Packit Service 82fcde
	/* Count the trailing zeros, by bit reversing...  */
Packit Service 82fcde
	rbit	tmp1, tmp1
Packit Service 82fcde
	/* Re-bias source.  */
Packit Service 82fcde
	sub	src, src, #32
Packit Service 82fcde
	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
Packit Service 82fcde
	/* tmp1 is twice the offset into the fragment.  */
Packit Service 82fcde
	add	result, src, tmp1, lsr #1
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
END(__strchrnul)
Packit Service 82fcde
weak_alias (__strchrnul, strchrnul)