Blame sysdeps/x86_64/strchr.S

Packit 6c4009
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
Packit 6c4009
   For AMD x86-64.
Packit 6c4009
   Copyright (C) 2009-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (strchr)
Packit 6c4009
	movd	%esi, %xmm1
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	andl	$4095, %eax
Packit 6c4009
	punpcklbw %xmm1, %xmm1
Packit 6c4009
	cmpl	$4032, %eax
Packit 6c4009
	punpcklwd %xmm1, %xmm1
Packit 6c4009
	pshufd	$0, %xmm1, %xmm1
Packit 6c4009
	jg	L(cross_page)
Packit 6c4009
	movdqu	(%rdi), %xmm0
Packit 6c4009
	pxor	%xmm3, %xmm3
Packit 6c4009
	movdqa	%xmm0, %xmm4
Packit 6c4009
	pcmpeqb	%xmm1, %xmm0
Packit 6c4009
	pcmpeqb	%xmm3, %xmm4
Packit 6c4009
	por	%xmm4, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	je	L(next_48_bytes)
Packit 6c4009
	bsf	%eax, %eax
Packit 6c4009
#ifdef AS_STRCHRNUL
Packit 6c4009
	leaq	(%rdi,%rax), %rax
Packit 6c4009
#else
Packit 6c4009
	movl	$0, %edx
Packit 6c4009
	leaq	(%rdi,%rax), %rax
Packit 6c4009
	cmpb	%sil, (%rax)
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
#endif
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 3
Packit 6c4009
	L(next_48_bytes):
Packit 6c4009
	movdqu	16(%rdi), %xmm0
Packit 6c4009
	movdqa	%xmm0, %xmm4
Packit 6c4009
	pcmpeqb	%xmm1, %xmm0
Packit 6c4009
	pcmpeqb	%xmm3, %xmm4
Packit 6c4009
	por	%xmm4, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %ecx
Packit 6c4009
	movdqu	32(%rdi), %xmm0
Packit 6c4009
	movdqa	%xmm0, %xmm4
Packit 6c4009
	pcmpeqb	%xmm1, %xmm0
Packit 6c4009
	salq	$16, %rcx
Packit 6c4009
	pcmpeqb	%xmm3, %xmm4
Packit 6c4009
	por	%xmm4, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	movdqu	48(%rdi), %xmm0
Packit 6c4009
	pcmpeqb	%xmm0, %xmm3
Packit 6c4009
	salq	$32, %rax
Packit 6c4009
	pcmpeqb	%xmm1, %xmm0
Packit 6c4009
	orq	%rcx, %rax
Packit 6c4009
	por	%xmm3, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %ecx
Packit 6c4009
	salq	$48, %rcx
Packit 6c4009
	orq	%rcx, %rax
Packit 6c4009
	testq	%rax, %rax
Packit 6c4009
	jne	L(return)
Packit 6c4009
L(loop_start):
Packit 6c4009
	/* We use this alignment to force loop be aligned to 8 but not
Packit 6c4009
	   16 bytes.  This gives better sheduling on AMD processors.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
	pxor	%xmm6, %xmm6
Packit 6c4009
	andq	$-64, %rdi
Packit 6c4009
	.p2align 3
Packit 6c4009
L(loop64):
Packit 6c4009
	addq	$64, %rdi
Packit 6c4009
	movdqa	(%rdi), %xmm5
Packit 6c4009
	movdqa	16(%rdi), %xmm2
Packit 6c4009
	movdqa	32(%rdi), %xmm3
Packit 6c4009
	pxor	%xmm1, %xmm5
Packit 6c4009
	movdqa	48(%rdi), %xmm4
Packit 6c4009
	pxor	%xmm1, %xmm2
Packit 6c4009
	pxor	%xmm1, %xmm3
Packit 6c4009
	pminub	(%rdi), %xmm5
Packit 6c4009
	pxor	%xmm1, %xmm4
Packit 6c4009
	pminub	16(%rdi), %xmm2
Packit 6c4009
	pminub	32(%rdi), %xmm3
Packit 6c4009
	pminub	%xmm2, %xmm5
Packit 6c4009
	pminub	48(%rdi), %xmm4
Packit 6c4009
	pminub	%xmm3, %xmm5
Packit 6c4009
	pminub	%xmm4, %xmm5
Packit 6c4009
	pcmpeqb %xmm6, %xmm5
Packit 6c4009
	pmovmskb %xmm5, %eax
Packit 6c4009
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	je	L(loop64)
Packit 6c4009
Packit 6c4009
	movdqa	(%rdi), %xmm5
Packit 6c4009
	movdqa	%xmm5, %xmm0
Packit 6c4009
	pcmpeqb	%xmm1, %xmm5
Packit 6c4009
	pcmpeqb	%xmm6, %xmm0
Packit 6c4009
	por	%xmm0, %xmm5
Packit 6c4009
	pcmpeqb %xmm6, %xmm2
Packit 6c4009
	pcmpeqb %xmm6, %xmm3
Packit 6c4009
	pcmpeqb %xmm6, %xmm4
Packit 6c4009
Packit 6c4009
	pmovmskb %xmm5, %ecx
Packit 6c4009
	pmovmskb %xmm2, %eax
Packit 6c4009
	salq	$16, %rax
Packit 6c4009
	pmovmskb %xmm3, %r8d
Packit 6c4009
	pmovmskb %xmm4, %edx
Packit 6c4009
	salq	$32, %r8
Packit 6c4009
	orq	%r8, %rax
Packit 6c4009
	orq	%rcx, %rax
Packit 6c4009
	salq	$48, %rdx
Packit 6c4009
	orq	%rdx, %rax
Packit 6c4009
	.p2align 3
Packit 6c4009
L(return):
Packit 6c4009
	bsfq	%rax, %rax
Packit 6c4009
#ifdef AS_STRCHRNUL
Packit 6c4009
	leaq	(%rdi,%rax), %rax
Packit 6c4009
#else
Packit 6c4009
	movl	$0, %edx
Packit 6c4009
	leaq	(%rdi,%rax), %rax
Packit 6c4009
	cmpb	%sil, (%rax)
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
#endif
Packit 6c4009
	ret
Packit 6c4009
	.p2align 4
Packit 6c4009
Packit 6c4009
L(cross_page):
Packit 6c4009
	movq	%rdi, %rdx
Packit 6c4009
	pxor	%xmm2, %xmm2
Packit 6c4009
	andq	$-64, %rdx
Packit 6c4009
	movdqa	%xmm1, %xmm0
Packit 6c4009
	movdqa	(%rdx), %xmm3
Packit 6c4009
	movdqa	%xmm3, %xmm4
Packit 6c4009
	pcmpeqb	%xmm1, %xmm3
Packit 6c4009
	pcmpeqb	%xmm2, %xmm4
Packit 6c4009
	por	%xmm4, %xmm3
Packit 6c4009
	pmovmskb %xmm3, %r8d
Packit 6c4009
	movdqa	16(%rdx), %xmm3
Packit 6c4009
	movdqa	%xmm3, %xmm4
Packit 6c4009
	pcmpeqb	%xmm1, %xmm3
Packit 6c4009
	pcmpeqb	%xmm2, %xmm4
Packit 6c4009
	por	%xmm4, %xmm3
Packit 6c4009
	pmovmskb %xmm3, %eax
Packit 6c4009
	movdqa	32(%rdx), %xmm3
Packit 6c4009
	movdqa	%xmm3, %xmm4
Packit 6c4009
	pcmpeqb	%xmm1, %xmm3
Packit 6c4009
	salq	$16, %rax
Packit 6c4009
	pcmpeqb	%xmm2, %xmm4
Packit 6c4009
	por	%xmm4, %xmm3
Packit 6c4009
	pmovmskb %xmm3, %r9d
Packit 6c4009
	movdqa	48(%rdx), %xmm3
Packit 6c4009
	pcmpeqb	%xmm3, %xmm2
Packit 6c4009
	salq	$32, %r9
Packit 6c4009
	pcmpeqb	%xmm3, %xmm0
Packit 6c4009
	orq	%r9, %rax
Packit 6c4009
	orq	%r8, %rax
Packit 6c4009
	por	%xmm2, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %ecx
Packit 6c4009
	salq	$48, %rcx
Packit 6c4009
	orq	%rcx, %rax
Packit 6c4009
	movl	%edi, %ecx
Packit 6c4009
	subb	%dl, %cl
Packit 6c4009
	shrq	%cl, %rax
Packit 6c4009
	testq	%rax, %rax
Packit 6c4009
	jne	L(return)
Packit 6c4009
	jmp	L(loop_start)
Packit 6c4009
Packit 6c4009
END (strchr)
Packit 6c4009
Packit 6c4009
#ifndef AS_STRCHRNUL
Packit 6c4009
weak_alias (strchr, index)
Packit 6c4009
libc_hidden_builtin_def (strchr)
Packit 6c4009
#endif