Blame sysdeps/x86_64/multiarch/strrchr-avx2.S

Packit 6c4009
/* strrchr/wcsrchr optimized with AVX2.
Packit 6c4009
   Copyright (C) 2017-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#if IS_IN (libc)
Packit 6c4009
Packit 6c4009
# include <sysdep.h>
Packit 6c4009
Packit 6c4009
# ifndef STRRCHR
Packit 6c4009
#  define STRRCHR	__strrchr_avx2
Packit 6c4009
# endif
Packit 6c4009
Packit 6c4009
# ifdef USE_AS_WCSRCHR
Packit 6c4009
#  define VPBROADCAST	vpbroadcastd
Packit 6c4009
#  define VPCMPEQ	vpcmpeqd
Packit 6c4009
# else
Packit 6c4009
#  define VPBROADCAST	vpbroadcastb
Packit 6c4009
#  define VPCMPEQ	vpcmpeqb
Packit 6c4009
# endif
Packit 6c4009
Packit 6c4009
# ifndef VZEROUPPER
Packit 6c4009
#  define VZEROUPPER	vzeroupper
Packit 6c4009
# endif
Packit 6c4009
Packit 6c4009
# define VEC_SIZE	32
Packit 6c4009
Packit 6c4009
	.section .text.avx,"ax",@progbits
Packit 6c4009
ENTRY (STRRCHR)
Packit 6c4009
	movd	%esi, %xmm4
Packit 6c4009
	movl	%edi, %ecx
Packit 6c4009
	/* Broadcast CHAR to YMM4.  */
Packit 6c4009
	VPBROADCAST %xmm4, %ymm4
Packit 6c4009
	vpxor	%ymm0, %ymm0, %ymm0
Packit 6c4009
Packit 6c4009
	/* Check if we may cross page boundary with one vector load.  */
Packit 6c4009
	andl	$(2 * VEC_SIZE - 1), %ecx
Packit 6c4009
	cmpl	$VEC_SIZE, %ecx
Packit 6c4009
	ja	L(cros_page_boundary)
Packit 6c4009
Packit 6c4009
	vmovdqu	(%rdi), %ymm1
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm0, %ymm2
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm4, %ymm3
Packit 6c4009
	vpmovmskb %ymm2, %ecx
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(first_vec)
Packit 6c4009
Packit 6c4009
	testl	%ecx, %ecx
Packit 6c4009
	jnz	L(return_null)
Packit 6c4009
Packit 6c4009
	andq	$-VEC_SIZE, %rdi
Packit 6c4009
	xorl	%edx, %edx
Packit 6c4009
	jmp	L(aligned_loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(first_vec):
Packit 6c4009
	/* Check if there is a nul CHAR.  */
Packit 6c4009
	testl	%ecx, %ecx
Packit 6c4009
	jnz	L(char_and_nul_in_first_vec)
Packit 6c4009
Packit 6c4009
	/* Remember the match and keep searching.  */
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	movq	%rdi, %rsi
Packit 6c4009
	andq	$-VEC_SIZE, %rdi
Packit 6c4009
	jmp	L(aligned_loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(cros_page_boundary):
Packit 6c4009
	andl	$(VEC_SIZE - 1), %ecx
Packit 6c4009
	andq	$-VEC_SIZE, %rdi
Packit 6c4009
	vmovdqa	(%rdi), %ymm1
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm0, %ymm2
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm4, %ymm3
Packit 6c4009
	vpmovmskb %ymm2, %edx
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	shrl	%cl, %edx
Packit 6c4009
	shrl	%cl, %eax
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
Packit 6c4009
	/* Check if there is a CHAR.  */
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(found_char)
Packit 6c4009
Packit 6c4009
	testl	%edx, %edx
Packit 6c4009
	jnz	L(return_null)
Packit 6c4009
Packit 6c4009
	jmp	L(aligned_loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(found_char):
Packit 6c4009
	testl	%edx, %edx
Packit 6c4009
	jnz	L(char_and_nul)
Packit 6c4009
Packit 6c4009
	/* Remember the match and keep searching.  */
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	leaq	(%rdi, %rcx), %rsi
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(aligned_loop):
Packit 6c4009
	vmovdqa	(%rdi), %ymm1
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm0, %ymm2
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm4, %ymm3
Packit 6c4009
	vpmovmskb %ymm2, %ecx
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	orl	%eax, %ecx
Packit 6c4009
	jnz	L(char_nor_null)
Packit 6c4009
Packit 6c4009
	vmovdqa	(%rdi), %ymm1
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm0, %ymm2
Packit 6c4009
	add	$VEC_SIZE, %rdi
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm4, %ymm3
Packit 6c4009
	vpmovmskb %ymm2, %ecx
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	orl	%eax, %ecx
Packit 6c4009
	jnz	L(char_nor_null)
Packit 6c4009
Packit 6c4009
	vmovdqa	(%rdi), %ymm1
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm0, %ymm2
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm4, %ymm3
Packit 6c4009
	vpmovmskb %ymm2, %ecx
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	orl	%eax, %ecx
Packit 6c4009
	jnz	L(char_nor_null)
Packit 6c4009
Packit 6c4009
	vmovdqa	(%rdi), %ymm1
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm0, %ymm2
Packit 6c4009
	addq	$VEC_SIZE, %rdi
Packit 6c4009
	VPCMPEQ	%ymm1, %ymm4, %ymm3
Packit 6c4009
	vpmovmskb %ymm2, %ecx
Packit 6c4009
	vpmovmskb %ymm3, %eax
Packit 6c4009
	orl	%eax, %ecx
Packit 6c4009
	jz	L(aligned_loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(char_nor_null):
Packit 6c4009
	/* Find a CHAR or a nul CHAR in a loop.  */
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	jnz	L(match)
Packit 6c4009
L(return_value):
Packit 6c4009
	testl	%edx, %edx
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
	movl	%edx, %eax
Packit 6c4009
	movq	%rsi, %rdi
Packit 6c4009
Packit 6c4009
# ifdef USE_AS_WCSRCHR
Packit 6c4009
	/* Keep the first bit for each matching CHAR for bsr.  */
Packit 6c4009
	andl	$0x11111111, %eax
Packit 6c4009
# endif
Packit 6c4009
	bsrl	%eax, %eax
Packit 6c4009
	leaq	-VEC_SIZE(%rdi, %rax), %rax
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match):
Packit 6c4009
	/* Find a CHAR.  Check if there is a nul CHAR.  */
Packit 6c4009
	vpmovmskb %ymm2, %ecx
Packit 6c4009
	testl	%ecx, %ecx
Packit 6c4009
	jnz	L(find_nul)
Packit 6c4009
Packit 6c4009
	/* Remember the match and keep searching.  */
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	movq	%rdi, %rsi
Packit 6c4009
	jmp	L(aligned_loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_nul):
Packit 6c4009
# ifdef USE_AS_WCSRCHR
Packit 6c4009
	/* Keep the first bit for each matching CHAR for bsr.  */
Packit 6c4009
	andl	$0x11111111, %ecx
Packit 6c4009
	andl	$0x11111111, %eax
Packit 6c4009
# endif
Packit 6c4009
	/* Mask out any matching bits after the nul CHAR.  */
Packit 6c4009
	movl	%ecx, %r8d
Packit 6c4009
	subl	$1, %r8d
Packit 6c4009
	xorl	%ecx, %r8d
Packit 6c4009
	andl	%r8d, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	/* If there is no CHAR here, return the remembered one.  */
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
	bsrl	%eax, %eax
Packit 6c4009
	leaq	-VEC_SIZE(%rdi, %rax), %rax
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(char_and_nul):
Packit 6c4009
	/* Find both a CHAR and a nul CHAR.  */
Packit 6c4009
	addq	%rcx, %rdi
Packit 6c4009
	movl	%edx, %ecx
Packit 6c4009
L(char_and_nul_in_first_vec):
Packit 6c4009
# ifdef USE_AS_WCSRCHR
Packit 6c4009
	/* Keep the first bit for each matching CHAR for bsr.  */
Packit 6c4009
	andl	$0x11111111, %ecx
Packit 6c4009
	andl	$0x11111111, %eax
Packit 6c4009
# endif
Packit 6c4009
	/* Mask out any matching bits after the nul CHAR.  */
Packit 6c4009
	movl	%ecx, %r8d
Packit 6c4009
	subl	$1, %r8d
Packit 6c4009
	xorl	%ecx, %r8d
Packit 6c4009
	andl	%r8d, %eax
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	/* Return null pointer if the nul CHAR comes first.  */
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
	bsrl	%eax, %eax
Packit 6c4009
	leaq	-VEC_SIZE(%rdi, %rax), %rax
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(return_null):
Packit 6c4009
	xorl	%eax, %eax
Packit 6c4009
	VZEROUPPER
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
END (STRRCHR)
Packit 6c4009
#endif