Blame sysdeps/x86_64/multiarch/strlen-avx2.S

Packit Service 82fcde
/* strlen/strnlen/wcslen/wcsnlen optimized with AVX2.
Packit Service 82fcde
   Copyright (C) 2017-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#if IS_IN (libc)
Packit Service 82fcde
Packit Service 82fcde
# include <sysdep.h>
Packit Service 82fcde
Packit Service 82fcde
# ifndef STRLEN
Packit Service 82fcde
#  define STRLEN	__strlen_avx2
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_AS_WCSLEN
Packit Service 82fcde
#  define VPCMPEQ	vpcmpeqd
Packit Service 82fcde
#  define VPMINU	vpminud
Packit Service 82fcde
# else
Packit Service 82fcde
#  define VPCMPEQ	vpcmpeqb
Packit Service 82fcde
#  define VPMINU	vpminub
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
# ifndef VZEROUPPER
Packit Service 82fcde
#  define VZEROUPPER	vzeroupper
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
# define VEC_SIZE 32
Packit Service 82fcde
Packit Service 82fcde
	.section .text.avx,"ax",@progbits
Packit Service 82fcde
ENTRY (STRLEN)
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	/* Check for zero length.  */
Packit Service 82fcde
	testq	%rsi, %rsi
Packit Service 82fcde
	jz	L(zero)
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shl	$2, %rsi
Packit Service 82fcde
#  endif
Packit Service 82fcde
	movq	%rsi, %r8
Packit Service 82fcde
# endif
Packit Service 82fcde
	movl	%edi, %ecx
Packit Service 82fcde
	movq	%rdi, %rdx
Packit Service 82fcde
	vpxor	%xmm0, %xmm0, %xmm0
Packit Service 82fcde
Packit Service 82fcde
	/* Check if we may cross page boundary with one vector load.  */
Packit Service 82fcde
	andl	$(2 * VEC_SIZE - 1), %ecx
Packit Service 82fcde
	cmpl	$VEC_SIZE, %ecx
Packit Service 82fcde
	ja	L(cros_page_boundary)
Packit Service 82fcde
Packit Service 82fcde
	/* Check the first VEC_SIZE bytes.  */
Packit Service 82fcde
	VPCMPEQ (%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	jnz	L(first_vec_x0_check)
Packit Service 82fcde
	/* Adjust length and check the end of data.  */
Packit Service 82fcde
	subq	$VEC_SIZE, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
# else
Packit Service 82fcde
	jnz	L(first_vec_x0)
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
	/* Align data for aligned loads in the loop.  */
Packit Service 82fcde
	addq	$VEC_SIZE, %rdi
Packit Service 82fcde
	andl	$(VEC_SIZE - 1), %ecx
Packit Service 82fcde
	andq	$-VEC_SIZE, %rdi
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	/* Adjust length.  */
Packit Service 82fcde
	addq	%rcx, %rsi
Packit Service 82fcde
Packit Service 82fcde
	subq	$(VEC_SIZE * 4), %rsi
Packit Service 82fcde
	jbe	L(last_4x_vec_or_less)
Packit Service 82fcde
# endif
Packit Service 82fcde
	jmp	L(more_4x_vec)
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(cros_page_boundary):
Packit Service 82fcde
	andl	$(VEC_SIZE - 1), %ecx
Packit Service 82fcde
	andq	$-VEC_SIZE, %rdi
Packit Service 82fcde
	VPCMPEQ (%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	/* Remove the leading bytes.  */
Packit Service 82fcde
	sarl	%cl, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jz	L(aligned_more)
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	/* Check the end of data.  */
Packit Service 82fcde
	cmpq	%rax, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
# endif
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	addq	%rcx, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
# ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
# endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(aligned_more):
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
        /* "rcx" is less than VEC_SIZE.  Calculate "rdx + rcx - VEC_SIZE"
Packit Service 82fcde
	    with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
Packit Service 82fcde
	    to void possible addition overflow.  */
Packit Service 82fcde
	negq	%rcx
Packit Service 82fcde
	addq	$VEC_SIZE, %rcx
Packit Service 82fcde
Packit Service 82fcde
	/* Check the end of data.  */
Packit Service 82fcde
	subq	%rcx, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
	addq	$VEC_SIZE, %rdi
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	subq	$(VEC_SIZE * 4), %rsi
Packit Service 82fcde
	jbe	L(last_4x_vec_or_less)
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
L(more_4x_vec):
Packit Service 82fcde
	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
Packit Service 82fcde
	   since data is only aligned to VEC_SIZE.  */
Packit Service 82fcde
	VPCMPEQ (%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x0)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x1)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x2)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x3)
Packit Service 82fcde
Packit Service 82fcde
	addq	$(VEC_SIZE * 4), %rdi
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	subq	$(VEC_SIZE * 4), %rsi
Packit Service 82fcde
	jbe	L(last_4x_vec_or_less)
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
	/* Align data to 4 * VEC_SIZE.  */
Packit Service 82fcde
	movq	%rdi, %rcx
Packit Service 82fcde
	andl	$(4 * VEC_SIZE - 1), %ecx
Packit Service 82fcde
	andq	$-(4 * VEC_SIZE), %rdi
Packit Service 82fcde
Packit Service 82fcde
# ifdef USE_AS_STRNLEN
Packit Service 82fcde
	/* Adjust length.  */
Packit Service 82fcde
	addq	%rcx, %rsi
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(loop_4x_vec):
Packit Service 82fcde
	/* Compare 4 * VEC at a time forward.  */
Packit Service 82fcde
	vmovdqa (%rdi), %ymm1
Packit Service 82fcde
	vmovdqa	VEC_SIZE(%rdi), %ymm2
Packit Service 82fcde
	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm3
Packit Service 82fcde
	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm4
Packit Service 82fcde
	VPMINU	%ymm1, %ymm2, %ymm5
Packit Service 82fcde
	VPMINU	%ymm3, %ymm4, %ymm6
Packit Service 82fcde
	VPMINU	%ymm5, %ymm6, %ymm5
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ	%ymm5, %ymm0, %ymm5
Packit Service 82fcde
	vpmovmskb %ymm5, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(4x_vec_end)
Packit Service 82fcde
Packit Service 82fcde
	addq	$(VEC_SIZE * 4), %rdi
Packit Service 82fcde
Packit Service 82fcde
# ifndef USE_AS_STRNLEN
Packit Service 82fcde
	jmp	L(loop_4x_vec)
Packit Service 82fcde
# else
Packit Service 82fcde
	subq	$(VEC_SIZE * 4), %rsi
Packit Service 82fcde
	ja	L(loop_4x_vec)
Packit Service 82fcde
Packit Service 82fcde
L(last_4x_vec_or_less):
Packit Service 82fcde
	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
Packit Service 82fcde
	addl	$(VEC_SIZE * 2), %esi
Packit Service 82fcde
	jle	L(last_2x_vec)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ (%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x0)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x1)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
Packit Service 82fcde
	jnz	L(first_vec_x2_check)
Packit Service 82fcde
	subl	$VEC_SIZE, %esi
Packit Service 82fcde
	jle	L(max)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
Packit Service 82fcde
	jnz	L(first_vec_x3_check)
Packit Service 82fcde
	movq	%r8, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(last_2x_vec):
Packit Service 82fcde
	addl	$(VEC_SIZE * 2), %esi
Packit Service 82fcde
	VPCMPEQ (%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
Packit Service 82fcde
	jnz	L(first_vec_x0_check)
Packit Service 82fcde
	subl	$VEC_SIZE, %esi
Packit Service 82fcde
	jle	L(max)
Packit Service 82fcde
Packit Service 82fcde
	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x1_check)
Packit Service 82fcde
	movq	%r8, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x0_check):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	/* Check the end of data.  */
Packit Service 82fcde
	cmpq	%rax, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x1_check):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	/* Check the end of data.  */
Packit Service 82fcde
	cmpq	%rax, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
	addq	$VEC_SIZE, %rax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x2_check):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	/* Check the end of data.  */
Packit Service 82fcde
	cmpq	%rax, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
	addq	$(VEC_SIZE * 2), %rax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x3_check):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	/* Check the end of data.  */
Packit Service 82fcde
	cmpq	%rax, %rsi
Packit Service 82fcde
	jbe	L(max)
Packit Service 82fcde
	addq	$(VEC_SIZE * 3), %rax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(max):
Packit Service 82fcde
	movq	%r8, %rax
Packit Service 82fcde
#  ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
#  endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(zero):
Packit Service 82fcde
	xorl	%eax, %eax
Packit Service 82fcde
	ret
Packit Service 82fcde
# endif
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x0):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
# ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
# endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x1):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	addq	$VEC_SIZE, %rax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
# ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
# endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(first_vec_x2):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	addq	$(VEC_SIZE * 2), %rax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
# ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
# endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
	.p2align 4
Packit Service 82fcde
L(4x_vec_end):
Packit Service 82fcde
	VPCMPEQ	%ymm1, %ymm0, %ymm1
Packit Service 82fcde
	vpmovmskb %ymm1, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x0)
Packit Service 82fcde
	VPCMPEQ %ymm2, %ymm0, %ymm2
Packit Service 82fcde
	vpmovmskb %ymm2, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x1)
Packit Service 82fcde
	VPCMPEQ %ymm3, %ymm0, %ymm3
Packit Service 82fcde
	vpmovmskb %ymm3, %eax
Packit Service 82fcde
	testl	%eax, %eax
Packit Service 82fcde
	jnz	L(first_vec_x2)
Packit Service 82fcde
	VPCMPEQ %ymm4, %ymm0, %ymm4
Packit Service 82fcde
	vpmovmskb %ymm4, %eax
Packit Service 82fcde
L(first_vec_x3):
Packit Service 82fcde
	tzcntl	%eax, %eax
Packit Service 82fcde
	addq	$(VEC_SIZE * 3), %rax
Packit Service 82fcde
	addq	%rdi, %rax
Packit Service 82fcde
	subq	%rdx, %rax
Packit Service 82fcde
# ifdef USE_AS_WCSLEN
Packit Service 82fcde
	shrq	$2, %rax
Packit Service 82fcde
# endif
Packit Service 82fcde
	VZEROUPPER
Packit Service 82fcde
	ret
Packit Service 82fcde
Packit Service 82fcde
END (STRLEN)
Packit Service 82fcde
#endif