Blame sysdeps/i386/i686/multiarch/wcsrchr-sse2.S

Packit 6c4009
/* wcsrchr with SSE2, without using bsf instructions.
Packit 6c4009
   Copyright (C) 2011-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Intel Corporation.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#if IS_IN (libc)
Packit 6c4009
# include <sysdep.h>
Packit 6c4009
# define CFI_PUSH(REG)	\
Packit 6c4009
	cfi_adjust_cfa_offset (4);	\
Packit 6c4009
	cfi_rel_offset (REG, 0)
Packit 6c4009
Packit 6c4009
# define CFI_POP(REG)	\
Packit 6c4009
	cfi_adjust_cfa_offset (-4);	\
Packit 6c4009
	cfi_restore (REG)
Packit 6c4009
Packit 6c4009
# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
Packit 6c4009
# define POP(REG)	popl REG; CFI_POP (REG)
Packit 6c4009
Packit 6c4009
# define PARMS	8
Packit 6c4009
# define ENTRANCE	PUSH (%edi);
Packit 6c4009
# define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
Packit 6c4009
# define STR1	PARMS
Packit 6c4009
# define STR2	STR1+4
Packit 6c4009
Packit 6c4009
	atom_text_section
Packit 6c4009
ENTRY (__wcsrchr_sse2)
Packit 6c4009
Packit 6c4009
	ENTRANCE
Packit 6c4009
	mov	STR1(%esp), %ecx
Packit 6c4009
	movd	STR2(%esp), %xmm1
Packit 6c4009
Packit 6c4009
	mov	%ecx, %edi
Packit 6c4009
	punpckldq %xmm1, %xmm1
Packit 6c4009
	pxor	%xmm2, %xmm2
Packit 6c4009
	punpckldq %xmm1, %xmm1
Packit 6c4009
Packit 6c4009
/* ECX has OFFSET. */
Packit 6c4009
	and	$63, %ecx
Packit 6c4009
	cmp	$48, %ecx
Packit 6c4009
	ja	L(crosscache)
Packit 6c4009
Packit 6c4009
/* unaligned string. */
Packit 6c4009
	movdqu	(%edi), %xmm0
Packit 6c4009
	pcmpeqd	%xmm0, %xmm2
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
/* Find where NULL is.  */
Packit 6c4009
	pmovmskb %xmm2, %ecx
Packit 6c4009
/* Check if there is a match.  */
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	add	$16, %edi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(unaligned_match1)
Packit 6c4009
Packit 6c4009
	test	%ecx, %ecx
Packit 6c4009
	jnz	L(return_null)
Packit 6c4009
Packit 6c4009
	and	$-16, %edi
Packit 6c4009
Packit 6c4009
	PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	xor	%edx, %edx
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	CFI_POP	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(unaligned_match1):
Packit 6c4009
	test	%ecx, %ecx
Packit 6c4009
	jnz	L(prolog_find_zero_1)
Packit 6c4009
Packit 6c4009
	PUSH	(%esi)
Packit 6c4009
Packit 6c4009
/* Save current match */
Packit 6c4009
	mov	%eax, %edx
Packit 6c4009
	mov	%edi, %esi
Packit 6c4009
	and	$-16, %edi
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	CFI_POP	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(crosscache):
Packit 6c4009
/* Hancle unaligned string.  */
Packit 6c4009
	and	$15, %ecx
Packit 6c4009
	and	$-16, %edi
Packit 6c4009
	pxor	%xmm3, %xmm3
Packit 6c4009
	movdqa	(%edi), %xmm0
Packit 6c4009
	pcmpeqd	%xmm0, %xmm3
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
/* Find where NULL is.  */
Packit 6c4009
	pmovmskb %xmm3, %edx
Packit 6c4009
/* Check if there is a match.  */
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
/* Remove the leading bytes.  */
Packit 6c4009
	shr	%cl, %edx
Packit 6c4009
	shr	%cl, %eax
Packit 6c4009
	add	$16, %edi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(unaligned_match)
Packit 6c4009
Packit 6c4009
	test	%edx, %edx
Packit 6c4009
	jnz	L(return_null)
Packit 6c4009
Packit 6c4009
	PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	xor	%edx, %edx
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	CFI_POP	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(unaligned_match):
Packit 6c4009
	test	%edx, %edx
Packit 6c4009
	jnz	L(prolog_find_zero)
Packit 6c4009
Packit 6c4009
	PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	mov	%eax, %edx
Packit 6c4009
	lea	(%edi, %ecx), %esi
Packit 6c4009
Packit 6c4009
/* Loop start on aligned string.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
L(loop):
Packit 6c4009
	movdqa	(%edi), %xmm0
Packit 6c4009
	pcmpeqd	%xmm0, %xmm2
Packit 6c4009
	add	$16, %edi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
	pmovmskb %xmm2, %ecx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	or	%eax, %ecx
Packit 6c4009
	jnz	L(matches)
Packit 6c4009
Packit 6c4009
	movdqa	(%edi), %xmm3
Packit 6c4009
	pcmpeqd	%xmm3, %xmm2
Packit 6c4009
	add	$16, %edi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm3
Packit 6c4009
	pmovmskb %xmm2, %ecx
Packit 6c4009
	pmovmskb %xmm3, %eax
Packit 6c4009
	or	%eax, %ecx
Packit 6c4009
	jnz	L(matches)
Packit 6c4009
Packit 6c4009
	movdqa	(%edi), %xmm4
Packit 6c4009
	pcmpeqd	%xmm4, %xmm2
Packit 6c4009
	add	$16, %edi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm4
Packit 6c4009
	pmovmskb %xmm2, %ecx
Packit 6c4009
	pmovmskb %xmm4, %eax
Packit 6c4009
	or	%eax, %ecx
Packit 6c4009
	jnz	L(matches)
Packit 6c4009
Packit 6c4009
	movdqa	(%edi), %xmm5
Packit 6c4009
	pcmpeqd	%xmm5, %xmm2
Packit 6c4009
	add	$16, %edi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm5
Packit 6c4009
	pmovmskb %xmm2, %ecx
Packit 6c4009
	pmovmskb %xmm5, %eax
Packit 6c4009
	or	%eax, %ecx
Packit 6c4009
	jz	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(matches):
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(match)
Packit 6c4009
L(return_value):
Packit 6c4009
	test	%edx, %edx
Packit 6c4009
	jz	L(return_null_1)
Packit 6c4009
	mov	%edx, %eax
Packit 6c4009
	mov	%esi, %edi
Packit 6c4009
Packit 6c4009
	POP	(%esi)
Packit 6c4009
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_or_fourth_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(return_null_1):
Packit 6c4009
	POP	(%esi)
Packit 6c4009
Packit 6c4009
	xor	%eax, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match):
Packit 6c4009
	pmovmskb %xmm2, %ecx
Packit 6c4009
	test	%ecx, %ecx
Packit 6c4009
	jnz	L(find_zero)
Packit 6c4009
/* save match info */
Packit 6c4009
	mov	%eax, %edx
Packit 6c4009
	mov	%edi, %esi
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero):
Packit 6c4009
	test	%cl, %cl
Packit 6c4009
	jz	L(find_zero_in_third_or_fourth_wchar)
Packit 6c4009
	test	$15, %cl
Packit 6c4009
	jz	L(find_zero_in_second_wchar)
Packit 6c4009
	and	$1, %eax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
Packit 6c4009
	POP	(%esi)
Packit 6c4009
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero_in_second_wchar):
Packit 6c4009
	and	$1 << 5 - 1, %eax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
Packit 6c4009
	POP	(%esi)
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero_in_third_or_fourth_wchar):
Packit 6c4009
	test	$15, %ch
Packit 6c4009
	jz	L(find_zero_in_fourth_wchar)
Packit 6c4009
	and	$1 << 9 - 1, %eax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
Packit 6c4009
	POP	(%esi)
Packit 6c4009
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero_in_fourth_wchar):
Packit 6c4009
Packit 6c4009
	POP	(%esi)
Packit 6c4009
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_or_fourth_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_second_wchar):
Packit 6c4009
	lea	-12(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_third_or_fourth_wchar):
Packit 6c4009
	test	$15 << 4, %ah
Packit 6c4009
	jnz	L(match_fourth_wchar)
Packit 6c4009
	lea	-8(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_third_wchar):
Packit 6c4009
	lea	-8(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_fourth_wchar):
Packit 6c4009
	lea	-4(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(return_null):
Packit 6c4009
	xor	%eax, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero):
Packit 6c4009
	add	%ecx, %edi
Packit 6c4009
	mov     %edx, %ecx
Packit 6c4009
L(prolog_find_zero_1):
Packit 6c4009
	test	%cl, %cl
Packit 6c4009
	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
Packit 6c4009
	test	$15, %cl
Packit 6c4009
	jz	L(prolog_find_zero_in_second_wchar)
Packit 6c4009
	and	$1, %eax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero_in_second_wchar):
Packit 6c4009
	and	$1 << 5 - 1, %eax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero_in_third_or_fourth_wchar):
Packit 6c4009
	test	$15, %ch
Packit 6c4009
	jz	L(prolog_find_zero_in_fourth_wchar)
Packit 6c4009
	and	$1 << 9 - 1, %eax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero_in_fourth_wchar):
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_or_fourth_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%edi), %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
END (__wcsrchr_sse2)
Packit 6c4009
#endif