Blame sysdeps/aarch64/memcmp.S

Packit 6c4009
/* memcmp - compare memory
Packit 6c4009
Packit 6c4009
   Copyright (C) 2013-2018 Free Software Foundation, Inc.
Packit 6c4009
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
/* Assumptions:
Packit 6c4009
 *
Packit 6c4009
 * ARMv8-a, AArch64, unaligned accesses.
Packit 6c4009
 */
Packit 6c4009
Packit 6c4009
/* Parameters and result.  */
Packit 6c4009
#define src1		x0
Packit 6c4009
#define src2		x1
Packit 6c4009
#define limit		x2
Packit 6c4009
#define result		w0
Packit 6c4009
Packit 6c4009
/* Internal variables.  */
Packit 6c4009
#define data1		x3
Packit 6c4009
#define data1w		w3
Packit 6c4009
#define data1h		x4
Packit 6c4009
#define data2		x5
Packit 6c4009
#define data2w		w5
Packit 6c4009
#define data2h		x6
Packit 6c4009
#define tmp1		x7
Packit 6c4009
#define tmp2		x8
Packit 6c4009
Packit 6c4009
ENTRY_ALIGN (memcmp, 6)
Packit 6c4009
	DELOUSE (0)
Packit 6c4009
	DELOUSE (1)
Packit 6c4009
	DELOUSE (2)
Packit 6c4009
Packit 6c4009
	subs	limit, limit, 8
Packit 6c4009
	b.lo	L(less8)
Packit 6c4009
Packit 6c4009
	ldr	data1, [src1], 8
Packit 6c4009
	ldr	data2, [src2], 8
Packit 6c4009
	cmp	data1, data2
Packit 6c4009
	b.ne	L(return)
Packit 6c4009
Packit 6c4009
	subs	limit, limit, 8
Packit 6c4009
	b.gt	L(more16)
Packit 6c4009
Packit 6c4009
	ldr	data1, [src1, limit]
Packit 6c4009
	ldr	data2, [src2, limit]
Packit 6c4009
	b	L(return)
Packit 6c4009
Packit 6c4009
L(more16):
Packit 6c4009
	ldr	data1, [src1], 8
Packit 6c4009
	ldr	data2, [src2], 8
Packit 6c4009
	cmp	data1, data2
Packit 6c4009
	bne	L(return)
Packit 6c4009
Packit 6c4009
	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
Packit 6c4009
	   strings.  */
Packit 6c4009
	subs	limit, limit, 16
Packit 6c4009
	b.ls	L(last_bytes)
Packit 6c4009
Packit 6c4009
	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
Packit 6c4009
	   try to align, so limit it only to strings larger than 128 bytes.  */
Packit 6c4009
	cmp	limit, 96
Packit 6c4009
	b.ls	L(loop16)
Packit 6c4009
Packit 6c4009
	/* Align src1 and adjust src2 with bytes not yet done.  */
Packit 6c4009
	and	tmp1, src1, 15
Packit 6c4009
	add	limit, limit, tmp1
Packit 6c4009
	sub	src1, src1, tmp1
Packit 6c4009
	sub	src2, src2, tmp1
Packit 6c4009
Packit 6c4009
	/* Loop performing 16 bytes per iteration using aligned src1.
Packit 6c4009
	   Limit is pre-decremented by 16 and must be larger than zero.
Packit 6c4009
	   Exit if <= 16 bytes left to do or if the data is not equal.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
L(loop16):
Packit 6c4009
	ldp	data1, data1h, [src1], 16
Packit 6c4009
	ldp	data2, data2h, [src2], 16
Packit 6c4009
	subs	limit, limit, 16
Packit 6c4009
	ccmp	data1, data2, 0, hi
Packit 6c4009
	ccmp	data1h, data2h, 0, eq
Packit 6c4009
	b.eq	L(loop16)
Packit 6c4009
Packit 6c4009
	cmp	data1, data2
Packit 6c4009
	bne	L(return)
Packit 6c4009
	mov	data1, data1h
Packit 6c4009
	mov	data2, data2h
Packit 6c4009
	cmp	data1, data2
Packit 6c4009
	bne	L(return)
Packit 6c4009
Packit 6c4009
	/* Compare last 1-16 bytes using unaligned access.  */
Packit 6c4009
L(last_bytes):
Packit 6c4009
	add	src1, src1, limit
Packit 6c4009
	add	src2, src2, limit
Packit 6c4009
	ldp	data1, data1h, [src1]
Packit 6c4009
	ldp	data2, data2h, [src2]
Packit 6c4009
	cmp     data1, data2
Packit 6c4009
	bne	L(return)
Packit 6c4009
	mov	data1, data1h
Packit 6c4009
	mov	data2, data2h
Packit 6c4009
	cmp	data1, data2
Packit 6c4009
Packit 6c4009
	/* Compare data bytes and set return value to 0, -1 or 1.  */
Packit 6c4009
L(return):
Packit 6c4009
#ifndef __AARCH64EB__
Packit 6c4009
	rev	data1, data1
Packit 6c4009
	rev	data2, data2
Packit 6c4009
#endif
Packit 6c4009
	cmp     data1, data2
Packit 6c4009
L(ret_eq):
Packit 6c4009
	cset	result, ne
Packit 6c4009
	cneg	result, result, lo
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
Packit 6c4009
L(less8):
Packit 6c4009
	adds	limit, limit, 4
Packit 6c4009
	b.lo	L(less4)
Packit 6c4009
	ldr	data1w, [src1], 4
Packit 6c4009
	ldr	data2w, [src2], 4
Packit 6c4009
	cmp	data1w, data2w
Packit 6c4009
	b.ne	L(return)
Packit 6c4009
	sub	limit, limit, 4
Packit 6c4009
L(less4):
Packit 6c4009
	adds	limit, limit, 4
Packit 6c4009
	beq	L(ret_eq)
Packit 6c4009
L(byte_loop):
Packit 6c4009
	ldrb	data1w, [src1], 1
Packit 6c4009
	ldrb	data2w, [src2], 1
Packit 6c4009
	subs	limit, limit, 1
Packit 6c4009
	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
Packit 6c4009
	b.eq	L(byte_loop)
Packit 6c4009
	sub	result, data1w, data2w
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
END (memcmp)
Packit 6c4009
#undef bcmp
Packit 6c4009
weak_alias (memcmp, bcmp)
Packit 6c4009
libc_hidden_builtin_def (memcmp)