Blame sysdeps/aarch64/strnlen.S

Packit Service 82fcde
/* strnlen - calculate the length of a string with limit.
Packit Service 82fcde
Packit Service 82fcde
   Copyright (C) 2013-2018 Free Software Foundation, Inc.
Packit Service 82fcde
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library.  If not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
Packit Service 82fcde
/* Assumptions:
Packit Service 82fcde
 *
Packit Service 82fcde
 * ARMv8-a, AArch64
Packit Service 82fcde
 */
Packit Service 82fcde
Packit Service 82fcde
/* Arguments and results.  */
Packit Service 82fcde
#define srcin		x0
Packit Service 82fcde
#define len		x0
Packit Service 82fcde
#define limit		x1
Packit Service 82fcde
Packit Service 82fcde
/* Locals and temporaries.  */
Packit Service 82fcde
#define src		x2
Packit Service 82fcde
#define data1		x3
Packit Service 82fcde
#define data2		x4
Packit Service 82fcde
#define data2a		x5
Packit Service 82fcde
#define has_nul1	x6
Packit Service 82fcde
#define has_nul2	x7
Packit Service 82fcde
#define tmp1		x8
Packit Service 82fcde
#define tmp2		x9
Packit Service 82fcde
#define tmp3		x10
Packit Service 82fcde
#define tmp4		x11
Packit Service 82fcde
#define zeroones	x12
Packit Service 82fcde
#define pos		x13
Packit Service 82fcde
#define limit_wd	x14
Packit Service 82fcde
Packit Service 82fcde
#define REP8_01 0x0101010101010101
Packit Service 82fcde
#define REP8_7f 0x7f7f7f7f7f7f7f7f
Packit Service 82fcde
#define REP8_80 0x8080808080808080
Packit Service 82fcde
Packit Service 82fcde
ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9)
Packit Service 82fcde
	DELOUSE (0)
Packit Service 82fcde
	DELOUSE (1)
Packit Service 82fcde
	DELOUSE (2)
Packit Service 82fcde
	cbz	limit, L(hit_limit)
Packit Service 82fcde
	mov	zeroones, #REP8_01
Packit Service 82fcde
	bic	src, srcin, #15
Packit Service 82fcde
	ands	tmp1, srcin, #15
Packit Service 82fcde
	b.ne	L(misaligned)
Packit Service 82fcde
	/* Calculate the number of full and partial words -1.  */
Packit Service 82fcde
	sub	limit_wd, limit, #1	/* Limit != 0, so no underflow.  */
Packit Service 82fcde
	lsr	limit_wd, limit_wd, #4	/* Convert to Qwords.  */
Packit Service 82fcde
Packit Service 82fcde
	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
Packit Service 82fcde
	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
Packit Service 82fcde
	   can be done in parallel across the entire word.  */
Packit Service 82fcde
	/* The inner loop deals with two Dwords at a time.  This has a
Packit Service 82fcde
	   slightly higher start-up cost, but we should win quite quickly,
Packit Service 82fcde
	   especially on cores with a high number of issue slots per
Packit Service 82fcde
	   cycle, as we get much better parallelism out of the operations.  */
Packit Service 82fcde
Packit Service 82fcde
	/* Start of critial section -- keep to one 64Byte cache line.  */
Packit Service 82fcde
L(loop):
Packit Service 82fcde
	ldp	data1, data2, [src], #16
Packit Service 82fcde
L(realigned):
Packit Service 82fcde
	sub	tmp1, data1, zeroones
Packit Service 82fcde
	orr	tmp2, data1, #REP8_7f
Packit Service 82fcde
	sub	tmp3, data2, zeroones
Packit Service 82fcde
	orr	tmp4, data2, #REP8_7f
Packit Service 82fcde
	bic	has_nul1, tmp1, tmp2
Packit Service 82fcde
	bic	has_nul2, tmp3, tmp4
Packit Service 82fcde
	subs	limit_wd, limit_wd, #1
Packit Service 82fcde
	orr	tmp1, has_nul1, has_nul2
Packit Service 82fcde
	ccmp	tmp1, #0, #0, pl	/* NZCV = 0000  */
Packit Service 82fcde
	b.eq	L(loop)
Packit Service 82fcde
	/* End of critical section -- keep to one 64Byte cache line.  */
Packit Service 82fcde
Packit Service 82fcde
	orr	tmp1, has_nul1, has_nul2
Packit Service 82fcde
	cbz	tmp1, L(hit_limit)	/* No null in final Qword.  */
Packit Service 82fcde
Packit Service 82fcde
	/* We know there's a null in the final Qword.  The easiest thing
Packit Service 82fcde
	   to do now is work out the length of the string and return
Packit Service 82fcde
	   MIN (len, limit).  */
Packit Service 82fcde
Packit Service 82fcde
	sub	len, src, srcin
Packit Service 82fcde
	cbz	has_nul1, L(nul_in_data2)
Packit Service 82fcde
#ifdef __AARCH64EB__
Packit Service 82fcde
	mov	data2, data1
Packit Service 82fcde
#endif
Packit Service 82fcde
	sub	len, len, #8
Packit Service 82fcde
	mov	has_nul2, has_nul1
Packit Service 82fcde
L(nul_in_data2):
Packit Service 82fcde
#ifdef __AARCH64EB__
Packit Service 82fcde
	/* For big-endian, carry propagation (if the final byte in the
Packit Service 82fcde
	   string is 0x01) means we cannot use has_nul directly.  The
Packit Service 82fcde
	   easiest way to get the correct byte is to byte-swap the data
Packit Service 82fcde
	   and calculate the syndrome a second time.  */
Packit Service 82fcde
	rev	data2, data2
Packit Service 82fcde
	sub	tmp1, data2, zeroones
Packit Service 82fcde
	orr	tmp2, data2, #REP8_7f
Packit Service 82fcde
	bic	has_nul2, tmp1, tmp2
Packit Service 82fcde
#endif
Packit Service 82fcde
	sub	len, len, #8
Packit Service 82fcde
	rev	has_nul2, has_nul2
Packit Service 82fcde
	clz	pos, has_nul2
Packit Service 82fcde
	add	len, len, pos, lsr #3		/* Bits to bytes.  */
Packit Service 82fcde
	cmp	len, limit
Packit Service 82fcde
	csel	len, len, limit, ls		/* Return the lower value.  */
Packit Service 82fcde
	RET
Packit Service 82fcde
Packit Service 82fcde
L(misaligned):
Packit Service 82fcde
	/* Deal with a partial first word.
Packit Service 82fcde
	   We're doing two things in parallel here;
Packit Service 82fcde
	   1) Calculate the number of words (but avoiding overflow if
Packit Service 82fcde
	      limit is near ULONG_MAX) - to do this we need to work out
Packit Service 82fcde
	      limit + tmp1 - 1 as a 65-bit value before shifting it;
Packit Service 82fcde
	   2) Load and mask the initial data words - we force the bytes
Packit Service 82fcde
	      before the ones we are interested in to 0xff - this ensures
Packit Service 82fcde
	      early bytes will not hit any zero detection.  */
Packit Service 82fcde
	sub	limit_wd, limit, #1
Packit Service 82fcde
	neg	tmp4, tmp1
Packit Service 82fcde
	cmp	tmp1, #8
Packit Service 82fcde
Packit Service 82fcde
	and	tmp3, limit_wd, #15
Packit Service 82fcde
	lsr	limit_wd, limit_wd, #4
Packit Service 82fcde
	mov	tmp2, #~0
Packit Service 82fcde
Packit Service 82fcde
	ldp	data1, data2, [src], #16
Packit Service 82fcde
	lsl	tmp4, tmp4, #3		/* Bytes beyond alignment -> bits.  */
Packit Service 82fcde
	add	tmp3, tmp3, tmp1
Packit Service 82fcde
Packit Service 82fcde
#ifdef __AARCH64EB__
Packit Service 82fcde
	/* Big-endian.  Early bytes are at MSB.  */
Packit Service 82fcde
	lsl	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
Packit Service 82fcde
#else
Packit Service 82fcde
	/* Little-endian.  Early bytes are at LSB.  */
Packit Service 82fcde
	lsr	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
Packit Service 82fcde
#endif
Packit Service 82fcde
	add	limit_wd, limit_wd, tmp3, lsr #4
Packit Service 82fcde
Packit Service 82fcde
	orr	data1, data1, tmp2
Packit Service 82fcde
	orr	data2a, data2, tmp2
Packit Service 82fcde
Packit Service 82fcde
	csinv	data1, data1, xzr, le
Packit Service 82fcde
	csel	data2, data2, data2a, le
Packit Service 82fcde
	b	L(realigned)
Packit Service 82fcde
Packit Service 82fcde
L(hit_limit):
Packit Service 82fcde
	mov	len, limit
Packit Service 82fcde
	RET
Packit Service 82fcde
END (__strnlen)
Packit Service 82fcde
libc_hidden_def (__strnlen)
Packit Service 82fcde
weak_alias (__strnlen, strnlen)
Packit Service 82fcde
libc_hidden_def (strnlen)