Blob Blame History Raw
/* Optimized wcslen for x86-64 with SSE2.
   Copyright (C) 2011-2018 Free Software Foundation, Inc.
   Contributed by Intel Corporation.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>

	.text
ENTRY (__wcslen)
	cmpl	$0, (%rdi)
	jz	L(exit_tail0)
	cmpl	$0, 4(%rdi)
	jz	L(exit_tail1)
	cmpl	$0, 8(%rdi)
	jz	L(exit_tail2)
	cmpl	$0, 12(%rdi)
	jz	L(exit_tail3)
	cmpl	$0, 16(%rdi)
	jz	L(exit_tail4)
	cmpl	$0, 20(%rdi)
	jz	L(exit_tail5)
	cmpl	$0, 24(%rdi)
	jz	L(exit_tail6)
	cmpl	$0, 28(%rdi)
	jz	L(exit_tail7)

	pxor	%xmm0, %xmm0

	lea	32(%rdi), %rax
	lea	16(%rdi), %rcx
	and	$-16, %rax

	pcmpeqd	(%rax), %xmm0
	pmovmskb %xmm0, %edx
	pxor	%xmm1, %xmm1
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm1
	pmovmskb %xmm1, %edx
	pxor	%xmm2, %xmm2
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm2
	pmovmskb %xmm2, %edx
	pxor	%xmm3, %xmm3
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm0
	pmovmskb %xmm0, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm1
	pmovmskb %xmm1, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm2
	pmovmskb %xmm2, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm0
	pmovmskb %xmm0, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm1
	pmovmskb %xmm1, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm2
	pmovmskb %xmm2, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	pcmpeqd	(%rax), %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	16(%rax), %rax
	jnz	L(exit)

	and	$-0x40, %rax

	.p2align 4
L(aligned_64_loop):
	movaps	(%rax), %xmm0
	movaps	16(%rax), %xmm1
	movaps	32(%rax), %xmm2
	movaps	48(%rax), %xmm6

	pminub	%xmm1, %xmm0
	pminub	%xmm6, %xmm2
	pminub	%xmm0, %xmm2
	pcmpeqd	%xmm3, %xmm2
	pmovmskb %xmm2, %edx
	test	%edx, %edx
	lea	64(%rax), %rax
	jz	L(aligned_64_loop)

	pcmpeqd	-64(%rax), %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	48(%rcx), %rcx
	jnz	L(exit)

	pcmpeqd	%xmm1, %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	-16(%rcx), %rcx
	jnz	L(exit)

	pcmpeqd	-32(%rax), %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	-16(%rcx), %rcx
	jnz	L(exit)

	pcmpeqd	%xmm6, %xmm3
	pmovmskb %xmm3, %edx
	test	%edx, %edx
	lea	-16(%rcx), %rcx
	jnz	L(exit)

	jmp	L(aligned_64_loop)

	.p2align 4
L(exit):
	sub	%rcx, %rax
	shr	$2, %rax
	test	%dl, %dl
	jz	L(exit_high)

	mov	%dl, %cl
	and	$15, %cl
	jz	L(exit_1)
	ret

	.p2align 4
L(exit_high):
	mov	%dh, %ch
	and	$15, %ch
	jz	L(exit_3)
	add	$2, %rax
	ret

	.p2align 4
L(exit_1):
	add	$1, %rax
	ret

	.p2align 4
L(exit_3):
	add	$3, %rax
	ret

	.p2align 4
L(exit_tail0):
	xor	%rax, %rax
	ret

	.p2align 4
L(exit_tail1):
	mov	$1, %rax
	ret

	.p2align 4
L(exit_tail2):
	mov	$2, %rax
	ret

	.p2align 4
L(exit_tail3):
	mov	$3, %rax
	ret

	.p2align 4
L(exit_tail4):
	mov	$4, %rax
	ret

	.p2align 4
L(exit_tail5):
	mov	$5, %rax
	ret

	.p2align 4
L(exit_tail6):
	mov	$6, %rax
	ret

	.p2align 4
L(exit_tail7):
	mov	$7, %rax
	ret

END (__wcslen)

weak_alias(__wcslen, wcslen)