Blame sysdeps/x86_64/multiarch/strchr-avx2.S

Packit 6c4009
/* strchr/strchrnul optimized with AVX2.
Packit 6c4009
   Copyright (C) 2017-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#if IS_IN (libc)
Packit 6c4009
Packit 6c4009
# include <sysdep.h>
Packit 6c4009
Packit 6c4009
# ifndef STRCHR
Packit 6c4009
#  define STRCHR	__strchr_avx2
Packit 6c4009
# endif
Packit 6c4009
Packit 6c4009
# ifdef USE_AS_WCSCHR
Packit 6c4009
#  define VPBROADCAST	vpbroadcastd
Packit 6c4009
#  define VPCMPEQ	vpcmpeqd
Packit 6c4009
#  define CHAR_REG	esi
Packit 6c4009
# else
Packit 6c4009
#  define VPBROADCAST	vpbroadcastb
Packit 6c4009
#  define VPCMPEQ	vpcmpeqb
Packit 6c4009
#  define CHAR_REG	sil
Packit 6c4009
# endif
Packit 6c4009
Packit 6c4009
# ifndef VZEROUPPER
Packit 6c4009
#  define VZEROUPPER	vzeroupper
Packit 6c4009
# endif
Packit 6c4009
Packit 6c4009
# define VEC_SIZE 32
Packit 6c4009
Packit 6c4009
	.section .text.avx,"ax",@progbits
Packit 6c4009
ENTRY (STRCHR)
Packit 6c4009
	movl	%edi, %ecx
Packit 6c4009
	/* Broadcast CHAR to YMM0.  */
Packit 6c4009
	vmovd	%esi, %xmm0
Packit 6c4009
	vpxor	%xmm9, %xmm9, %xmm9
Packit 6c4009
	VPBROADCAST %xmm0, %ymm0
Packit 6c4009
	/* Check if we may cross page boundary with one vector load.  */
Packit 6c4009
	andl	$(2 * VEC_SIZE - 1), %ecx
Packit 6c4009
	cmpl	$VEC_SIZE, %ecx
Packit 6c4009
	ja	L(cros_page_boundary)
Packit 6c4009
Packit 6c4009
	/* Check the first VEC_SIZE bytes.  Search for both CHAR and the
Packit 6c4009
	   null byte.  */
Packit 6c4009
	vmovdqu	(%rdi), %ymm8
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm2
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm1
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x0)
Packit 6c4009
Packit 6c4009
	/* Align data for aligned loads in the loop.  */
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
	andl	$(VEC_SIZE - 1), %ecx
Packit 6c4009
	andq	$-VEC_SIZE, %rdi
Packit 6c4009
Packit 6c4009
	jmp	L(more_4x_vec)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(cros_page_boundary):
Packit 6c4009
	andl	$(VEC_SIZE - 1), %ecx
Packit 6c4009
	andq	$-VEC_SIZE, %rdi
Packit 6c4009
	vmovdqu	(%rdi), %ymm8
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm2
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm1
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	/* Remove the leading bytes.  */
Packit 6c4009
	sarl	%cl, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jz	L(aligned_more)
Packit 6c4009
	/* Found CHAR or the null byte.  */
Packit 6c4009
	tzcntl	%eax, %eax
Packit 6c4009
	addq	%rcx, %rax
Packit 6c4009
# ifdef USE_AS_STRCHRNUL
Packit 6c4009
	addq	%rdi, %rax
Packit 6c4009
# else
Packit 6c4009
	xorl	%edx, %edx
Packit 6c4009
	leaq	(%rdi, %rax), %rax
Packit 6c4009
	cmp	(%rax), %CHAR_REG
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
# endif
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(aligned_more):
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
Packit 6c4009
L(more_4x_vec):
Packit 6c4009
	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
Packit 6c4009
	   since data is only aligned to VEC_SIZE.  */
Packit 6c4009
	vmovdqa	(%rdi), %ymm8
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm2
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm1
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x0)
Packit 6c4009
Packit 6c4009
	vmovdqa	VEC_SIZE(%rdi), %ymm8
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm2
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm1
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x1)
Packit 6c4009
Packit 6c4009
	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm8
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm2
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm1
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x2)
Packit 6c4009
Packit 6c4009
	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm2
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm1
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x3)
Packit 6c4009
Packit 6c4009
	addq	$(VEC_SIZE * 4), %rdi
Packit 6c4009
Packit 6c4009
	/* Align data to 4 * VEC_SIZE.  */
Packit 6c4009
	movq	%rdi, %rcx
Packit 6c4009
	andl	$(4 * VEC_SIZE - 1), %ecx
Packit 6c4009
	andq	$-(4 * VEC_SIZE), %rdi
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(loop_4x_vec):
Packit 6c4009
	/* Compare 4 * VEC at a time forward.  */
Packit 6c4009
	vmovdqa	(%rdi), %ymm5
Packit 6c4009
	vmovdqa	VEC_SIZE(%rdi), %ymm6
Packit 6c4009
	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm7
Packit 6c4009
	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
Packit 6c4009
Packit 6c4009
	VPCMPEQ %ymm5, %ymm0, %ymm1
Packit 6c4009
	VPCMPEQ %ymm6, %ymm0, %ymm2
Packit 6c4009
	VPCMPEQ %ymm7, %ymm0, %ymm3
Packit 6c4009
	VPCMPEQ %ymm8, %ymm0, %ymm4
Packit 6c4009
Packit 6c4009
	VPCMPEQ %ymm5, %ymm9, %ymm5
Packit 6c4009
	VPCMPEQ %ymm6, %ymm9, %ymm6
Packit 6c4009
	VPCMPEQ %ymm7, %ymm9, %ymm7
Packit 6c4009
	VPCMPEQ %ymm8, %ymm9, %ymm8
Packit 6c4009
Packit 6c4009
	vpor	%ymm1, %ymm5, %ymm1
Packit 6c4009
	vpor	%ymm2, %ymm6, %ymm2
Packit 6c4009
	vpor	%ymm3, %ymm7, %ymm3
Packit 6c4009
	vpor	%ymm4, %ymm8, %ymm4
Packit 6c4009
Packit 6c4009
	vpor	%ymm1, %ymm2, %ymm5
Packit 6c4009
	vpor	%ymm3, %ymm4, %ymm6
Packit 6c4009
Packit 6c4009
	vpor	%ymm5, %ymm6, %ymm5
Packit 6c4009
Packit 6c4009
	vpmovmskb %ymm5, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(4x_vec_end)
Packit 6c4009
Packit 6c4009
	addq	$(VEC_SIZE * 4), %rdi
Packit 6c4009
Packit 6c4009
	jmp	L(loop_4x_vec)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(first_vec_x0):
Packit 6c4009
	/* Found CHAR or the null byte.  */
Packit 6c4009
	tzcntl	%eax, %eax
Packit 6c4009
# ifdef USE_AS_STRCHRNUL
Packit 6c4009
	addq	%rdi, %rax
Packit 6c4009
# else
Packit 6c4009
	xorl	%edx, %edx
Packit 6c4009
	leaq	(%rdi, %rax), %rax
Packit 6c4009
	cmp	(%rax), %CHAR_REG
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
# endif
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(first_vec_x1):
Packit 6c4009
	tzcntl	%eax, %eax
Packit 6c4009
# ifdef USE_AS_STRCHRNUL
Packit 6c4009
	addq	$VEC_SIZE, %rax
Packit 6c4009
	addq	%rdi, %rax
Packit 6c4009
# else
Packit 6c4009
	xorl	%edx, %edx
Packit 6c4009
	leaq	VEC_SIZE(%rdi, %rax), %rax
Packit 6c4009
	cmp	(%rax), %CHAR_REG
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
# endif
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(first_vec_x2):
Packit 6c4009
	tzcntl	%eax, %eax
Packit 6c4009
# ifdef USE_AS_STRCHRNUL
Packit 6c4009
	addq	$(VEC_SIZE * 2), %rax
Packit 6c4009
	addq	%rdi, %rax
Packit 6c4009
# else
Packit 6c4009
	xorl	%edx, %edx
Packit 6c4009
	leaq	(VEC_SIZE * 2)(%rdi, %rax), %rax
Packit 6c4009
	cmp	(%rax), %CHAR_REG
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
# endif
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(4x_vec_end):
Packit 6c4009
	vpmovmskb %ymm1, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x0)
Packit 6c4009
	vpmovmskb %ymm2, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x1)
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec_x2)
Packit 6c4009
	vpmovmskb %ymm4, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
L(first_vec_x3):
Packit 6c4009
	tzcntl	%eax, %eax
Packit 6c4009
# ifdef USE_AS_STRCHRNUL
Packit 6c4009
	addq	$(VEC_SIZE * 3), %rax
Packit 6c4009
	addq	%rdi, %rax
Packit 6c4009
# else
Packit 6c4009
	xorl	%edx, %edx
Packit 6c4009
	leaq	(VEC_SIZE * 3)(%rdi, %rax), %rax
Packit 6c4009
	cmp	(%rax), %CHAR_REG
Packit 6c4009
	cmovne	%rdx, %rax
Packit 6c4009
# endif
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
END (STRCHR)
Packit 6c4009
#endif