Blame sysdeps/x86_64/wcsrchr.S

Packit 6c4009
/* wcsrchr with SSSE3
Packit 6c4009
   Copyright (C) 2011-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Intel Corporation.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (wcsrchr)
Packit 6c4009
Packit 6c4009
	movd	%rsi, %xmm1
Packit 6c4009
	mov	%rdi, %rcx
Packit 6c4009
	punpckldq %xmm1, %xmm1
Packit 6c4009
	pxor	%xmm2, %xmm2
Packit 6c4009
	punpckldq %xmm1, %xmm1
Packit 6c4009
	and	$63, %rcx
Packit 6c4009
	cmp	$48, %rcx
Packit 6c4009
	ja	L(crosscache)
Packit 6c4009
Packit 6c4009
	movdqu	(%rdi), %xmm0
Packit 6c4009
	pcmpeqd	%xmm0, %xmm2
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
	pmovmskb %xmm2, %rcx
Packit 6c4009
	pmovmskb %xmm0, %rax
Packit 6c4009
	add	$16, %rdi
Packit 6c4009
Packit 6c4009
	test	%rax, %rax
Packit 6c4009
	jnz	L(unaligned_match1)
Packit 6c4009
Packit 6c4009
	test	%rcx, %rcx
Packit 6c4009
	jnz	L(return_null)
Packit 6c4009
Packit 6c4009
	and	$-16, %rdi
Packit 6c4009
	xor	%r8, %r8
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(unaligned_match1):
Packit 6c4009
	test	%rcx, %rcx
Packit 6c4009
	jnz	L(prolog_find_zero_1)
Packit 6c4009
Packit 6c4009
	mov	%rax, %r8
Packit 6c4009
	mov	%rdi, %rsi
Packit 6c4009
	and	$-16, %rdi
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(crosscache):
Packit 6c4009
	and	$15, %rcx
Packit 6c4009
	and	$-16, %rdi
Packit 6c4009
	pxor	%xmm3, %xmm3
Packit 6c4009
	movdqa	(%rdi), %xmm0
Packit 6c4009
	pcmpeqd	%xmm0, %xmm3
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
	pmovmskb %xmm3, %rdx
Packit 6c4009
	pmovmskb %xmm0, %rax
Packit 6c4009
	shr	%cl, %rdx
Packit 6c4009
	shr	%cl, %rax
Packit 6c4009
	add	$16, %rdi
Packit 6c4009
Packit 6c4009
	test	%rax, %rax
Packit 6c4009
	jnz	L(unaligned_match)
Packit 6c4009
Packit 6c4009
	test	%rdx, %rdx
Packit 6c4009
	jnz	L(return_null)
Packit 6c4009
Packit 6c4009
	xor	%r8, %r8
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(unaligned_match):
Packit 6c4009
	test	%rdx, %rdx
Packit 6c4009
	jnz	L(prolog_find_zero)
Packit 6c4009
Packit 6c4009
	mov	%rax, %r8
Packit 6c4009
	lea	(%rdi, %rcx), %rsi
Packit 6c4009
Packit 6c4009
/* Loop start on aligned string.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
L(loop):
Packit 6c4009
	movdqa	(%rdi), %xmm0
Packit 6c4009
	pcmpeqd	%xmm0, %xmm2
Packit 6c4009
	add	$16, %rdi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
	pmovmskb %xmm2, %rcx
Packit 6c4009
	pmovmskb %xmm0, %rax
Packit 6c4009
	or	%rax, %rcx
Packit 6c4009
	jnz	L(matches)
Packit 6c4009
Packit 6c4009
	movdqa	(%rdi), %xmm3
Packit 6c4009
	pcmpeqd	%xmm3, %xmm2
Packit 6c4009
	add	$16, %rdi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm3
Packit 6c4009
	pmovmskb %xmm2, %rcx
Packit 6c4009
	pmovmskb %xmm3, %rax
Packit 6c4009
	or	%rax, %rcx
Packit 6c4009
	jnz	L(matches)
Packit 6c4009
Packit 6c4009
	movdqa	(%rdi), %xmm4
Packit 6c4009
	pcmpeqd	%xmm4, %xmm2
Packit 6c4009
	add	$16, %rdi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm4
Packit 6c4009
	pmovmskb %xmm2, %rcx
Packit 6c4009
	pmovmskb %xmm4, %rax
Packit 6c4009
	or	%rax, %rcx
Packit 6c4009
	jnz	L(matches)
Packit 6c4009
Packit 6c4009
	movdqa	(%rdi), %xmm5
Packit 6c4009
	pcmpeqd	%xmm5, %xmm2
Packit 6c4009
	add	$16, %rdi
Packit 6c4009
	pcmpeqd	%xmm1, %xmm5
Packit 6c4009
	pmovmskb %xmm2, %rcx
Packit 6c4009
	pmovmskb %xmm5, %rax
Packit 6c4009
	or	%rax, %rcx
Packit 6c4009
	jz	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(matches):
Packit 6c4009
	test	%rax, %rax
Packit 6c4009
	jnz	L(match)
Packit 6c4009
L(return_value):
Packit 6c4009
	test	%r8, %r8
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
	mov	%r8, %rax
Packit 6c4009
	mov	%rsi, %rdi
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %ah
Packit 6c4009
	jnz	L(match_fourth_wchar)
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match):
Packit 6c4009
	pmovmskb %xmm2, %rcx
Packit 6c4009
	test	%rcx, %rcx
Packit 6c4009
	jnz	L(find_zero)
Packit 6c4009
	mov	%rax, %r8
Packit 6c4009
	mov	%rdi, %rsi
Packit 6c4009
	jmp	L(loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero):
Packit 6c4009
	test	$15, %cl
Packit 6c4009
	jnz	L(find_zero_in_first_wchar)
Packit 6c4009
	test	%cl, %cl
Packit 6c4009
	jnz	L(find_zero_in_second_wchar)
Packit 6c4009
	test	$15, %ch
Packit 6c4009
	jnz	L(find_zero_in_third_wchar)
Packit 6c4009
Packit 6c4009
	and	$1 << 13 - 1, %rax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %ah
Packit 6c4009
	jnz	L(match_fourth_wchar)
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero_in_first_wchar):
Packit 6c4009
	test	$1, %rax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero_in_second_wchar):
Packit 6c4009
	and	$1 << 5 - 1, %rax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(find_zero_in_third_wchar):
Packit 6c4009
	and	$1 << 9 - 1, %rax
Packit 6c4009
	jz	L(return_value)
Packit 6c4009
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero):
Packit 6c4009
	add	%rcx, %rdi
Packit 6c4009
	mov     %rdx, %rcx
Packit 6c4009
L(prolog_find_zero_1):
Packit 6c4009
	test	$15, %cl
Packit 6c4009
	jnz	L(prolog_find_zero_in_first_wchar)
Packit 6c4009
	test	%cl, %cl
Packit 6c4009
	jnz	L(prolog_find_zero_in_second_wchar)
Packit 6c4009
	test	$15, %ch
Packit 6c4009
	jnz	L(prolog_find_zero_in_third_wchar)
Packit 6c4009
Packit 6c4009
	and	$1 << 13 - 1, %rax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %ah
Packit 6c4009
	jnz	L(match_fourth_wchar)
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero_in_first_wchar):
Packit 6c4009
	test	$1, %rax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero_in_second_wchar):
Packit 6c4009
	and	$1 << 5 - 1, %rax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(prolog_find_zero_in_third_wchar):
Packit 6c4009
	and	$1 << 9 - 1, %rax
Packit 6c4009
	jz	L(return_null)
Packit 6c4009
Packit 6c4009
	test	%ah, %ah
Packit 6c4009
	jnz	L(match_third_wchar)
Packit 6c4009
	test	$15 << 4, %al
Packit 6c4009
	jnz	L(match_second_wchar)
Packit 6c4009
	lea	-16(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_second_wchar):
Packit 6c4009
	lea	-12(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_third_wchar):
Packit 6c4009
	lea	-8(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(match_fourth_wchar):
Packit 6c4009
	lea	-4(%rdi), %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(return_null):
Packit 6c4009
	xor	%rax, %rax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
END (wcsrchr)