Blame sysdeps/x86_64/memcmp.S

Packit 6c4009
/* memcmp with SSE2
Packit 6c4009
   Copyright (C) 2009-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Intel Corporation.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (memcmp)
Packit 6c4009
	test	%rdx, %rdx
Packit 6c4009
	jz	L(finz)
Packit 6c4009
	cmpq	$1, %rdx
Packit 6c4009
	jle	L(finr1b)
Packit 6c4009
	subq	%rdi, %rsi
Packit 6c4009
	movq	%rdx, %r10
Packit 6c4009
	cmpq	$32, %r10
Packit 6c4009
	jge	L(gt32)
Packit 6c4009
	/* Handle small chunks and last block of less than 32 bytes.  */
Packit 6c4009
L(small):
Packit 6c4009
	testq	$1, %r10
Packit 6c4009
	jz	L(s2b)
Packit 6c4009
	movzbl	(%rdi),	%eax
Packit 6c4009
	movzbl	(%rdi, %rsi), %edx
Packit 6c4009
	subq    $1, %r10
Packit 6c4009
	je	L(finz1)
Packit 6c4009
	addq	$1, %rdi
Packit 6c4009
	subl	%edx, %eax
Packit 6c4009
	jnz	L(exit)
Packit 6c4009
L(s2b):
Packit 6c4009
	testq	$2, %r10
Packit 6c4009
	jz	L(s4b)
Packit 6c4009
	movzwl	(%rdi),	%eax
Packit 6c4009
	movzwl	(%rdi, %rsi), %edx
Packit 6c4009
	subq    $2, %r10
Packit 6c4009
	je	L(fin2_7)
Packit 6c4009
	addq	$2, %rdi
Packit 6c4009
	cmpl	%edx, %eax
Packit 6c4009
	jnz	L(fin2_7)
Packit 6c4009
L(s4b):
Packit 6c4009
	testq	$4, %r10
Packit 6c4009
	jz	L(s8b)
Packit 6c4009
	movl	(%rdi),	%eax
Packit 6c4009
	movl	(%rdi, %rsi), %edx
Packit 6c4009
	subq    $4, %r10
Packit 6c4009
	je	L(fin2_7)
Packit 6c4009
	addq	$4, %rdi
Packit 6c4009
	cmpl	%edx, %eax
Packit 6c4009
	jnz	L(fin2_7)
Packit 6c4009
L(s8b):
Packit 6c4009
	testq	$8, %r10
Packit 6c4009
	jz	L(s16b)
Packit 6c4009
	movq	(%rdi),	%rax
Packit 6c4009
	movq	(%rdi, %rsi), %rdx
Packit 6c4009
	subq    $8, %r10
Packit 6c4009
	je	L(fin2_7)
Packit 6c4009
	addq	$8, %rdi
Packit 6c4009
	cmpq	%rdx, %rax
Packit 6c4009
	jnz	L(fin2_7)
Packit 6c4009
L(s16b):
Packit 6c4009
	movdqu    (%rdi), %xmm1
Packit 6c4009
	movdqu    (%rdi, %rsi), %xmm0
Packit 6c4009
	pcmpeqb   %xmm0, %xmm1
Packit 6c4009
	pmovmskb  %xmm1, %edx
Packit 6c4009
	xorl	  %eax, %eax
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jz	  L(finz)
Packit 6c4009
	bsfl      %edx, %ecx
Packit 6c4009
	leaq	 (%rdi, %rcx), %rcx
Packit 6c4009
	movzbl	 (%rcx), %eax
Packit 6c4009
	movzbl	 (%rsi, %rcx), %edx
Packit 6c4009
	jmp	 L(finz1)
Packit 6c4009
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
L(finr1b):
Packit 6c4009
	movzbl	(%rdi), %eax
Packit 6c4009
	movzbl  (%rsi), %edx
Packit 6c4009
L(finz1):
Packit 6c4009
	subl	%edx, %eax
Packit 6c4009
L(exit):
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
L(fin2_7):
Packit 6c4009
	cmpq	%rdx, %rax
Packit 6c4009
	jz	L(finz)
Packit 6c4009
	movq	%rax, %r11
Packit 6c4009
	subq	%rdx, %r11
Packit 6c4009
	bsfq	%r11, %rcx
Packit 6c4009
	sarq	$3, %rcx
Packit 6c4009
	salq	$3, %rcx
Packit 6c4009
	sarq	%cl, %rax
Packit 6c4009
	movzbl  %al, %eax
Packit 6c4009
	sarq	%cl, %rdx
Packit 6c4009
	movzbl  %dl, %edx
Packit 6c4009
	subl	%edx, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
L(finz):
Packit 6c4009
	xorl	%eax, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	/* For blocks bigger than 32 bytes
Packit 6c4009
	   1. Advance one of the addr pointer to be 16B aligned.
Packit 6c4009
	   2. Treat the case of both addr pointers aligned to 16B
Packit 6c4009
	      separately to avoid movdqu.
Packit 6c4009
	   3. Handle any blocks of greater than 64 consecutive bytes with
Packit 6c4009
	      unrolling to reduce branches.
Packit 6c4009
	   4. At least one addr pointer is 16B aligned, use memory version
Packit 6c4009
	      of pcmbeqb.
Packit 6c4009
	*/
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
L(gt32):
Packit 6c4009
	movq	%rdx, %r11
Packit 6c4009
	addq	%rdi, %r11
Packit 6c4009
	movq	%rdi, %r8
Packit 6c4009
Packit 6c4009
	andq	$15, %r8
Packit 6c4009
	jz	L(16am)
Packit 6c4009
	/* Both pointers may be misaligned.  */
Packit 6c4009
	movdqu	(%rdi),	%xmm1
Packit 6c4009
	movdqu	(%rdi, %rsi), %xmm0
Packit 6c4009
	pcmpeqb   %xmm0, %xmm1
Packit 6c4009
	pmovmskb  %xmm1, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	neg	 %r8
Packit 6c4009
	leaq    16(%rdi, %r8), %rdi
Packit 6c4009
L(16am):
Packit 6c4009
	/* Handle two 16B aligned pointers separately.  */
Packit 6c4009
	testq   $15, %rsi
Packit 6c4009
	jz      L(ATR)
Packit 6c4009
	testq	$16, %rdi
Packit 6c4009
	jz	L(A32)
Packit 6c4009
	movdqu	(%rdi, %rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq	$16, %rdi
Packit 6c4009
L(A32):
Packit 6c4009
	movq	%r11, %r10
Packit 6c4009
	andq	$-32, %r10
Packit 6c4009
	cmpq	%r10, %rdi
Packit 6c4009
        jge	L(mt16)
Packit 6c4009
	/* Pre-unroll to be ready for unrolled 64B loop.  */
Packit 6c4009
	testq	$32, %rdi
Packit 6c4009
	jz	L(A64)
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb  (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
L(A64):
Packit 6c4009
	movq	%r11, %r10
Packit 6c4009
	andq	$-64, %r10
Packit 6c4009
	cmpq	%r10, %rdi
Packit 6c4009
        jge	L(mt32)
Packit 6c4009
Packit 6c4009
L(A64main):
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb  (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	cmpq       %rdi, %r10
Packit 6c4009
	jne       L(A64main)
Packit 6c4009
Packit 6c4009
L(mt32):
Packit 6c4009
	movq	%r11, %r10
Packit 6c4009
	andq	$-32, %r10
Packit 6c4009
	cmpq	%r10, %rdi
Packit 6c4009
        jge	L(mt16)
Packit 6c4009
Packit 6c4009
L(A32main):
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqu    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb  (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	cmpq       %rdi, %r10
Packit 6c4009
	jne       L(A32main)
Packit 6c4009
L(mt16):
Packit 6c4009
	subq       %rdi, %r11
Packit 6c4009
	je	  L(finz)
Packit 6c4009
	movq	  %r11, %r10
Packit 6c4009
	jmp	  L(small)
Packit 6c4009
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
L(neq):
Packit 6c4009
	bsfl      %edx, %ecx
Packit 6c4009
	movzbl	 (%rdi, %rcx), %eax
Packit 6c4009
	addq	 %rdi, %rsi
Packit 6c4009
	movzbl	 (%rsi,%rcx), %edx
Packit 6c4009
	jmp	 L(finz1)
Packit 6c4009
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
L(ATR):
Packit 6c4009
	movq	%r11, %r10
Packit 6c4009
	andq	$-32, %r10
Packit 6c4009
	cmpq	%r10, %rdi
Packit 6c4009
        jge	L(mt16)
Packit 6c4009
	testq	$16, %rdi
Packit 6c4009
	jz	L(ATR32)
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
	cmpq       %rdi, %r10
Packit 6c4009
	je       L(mt16)
Packit 6c4009
Packit 6c4009
L(ATR32):
Packit 6c4009
	movq	%r11, %r10
Packit 6c4009
	andq	$-64, %r10
Packit 6c4009
	testq	$32, %rdi
Packit 6c4009
	jz	L(ATR64)
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
L(ATR64):
Packit 6c4009
	cmpq       %rdi, %r10
Packit 6c4009
	je	   L(mt32)
Packit 6c4009
Packit 6c4009
L(ATR64main):
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
	cmpq       %rdi, %r10
Packit 6c4009
	jne       L(ATR64main)
Packit 6c4009
Packit 6c4009
	movq	%r11, %r10
Packit 6c4009
	andq	$-32, %r10
Packit 6c4009
	cmpq	%r10, %rdi
Packit 6c4009
        jge	L(mt16)
Packit 6c4009
Packit 6c4009
L(ATR32res):
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	movdqa    (%rdi,%rsi), %xmm0
Packit 6c4009
	pcmpeqb   (%rdi), %xmm0
Packit 6c4009
	pmovmskb  %xmm0, %edx
Packit 6c4009
	subl      $0xffff, %edx
Packit 6c4009
	jnz       L(neq)
Packit 6c4009
	addq       $16, %rdi
Packit 6c4009
Packit 6c4009
	cmpq	  %r10, %rdi
Packit 6c4009
	jne       L(ATR32res)
Packit 6c4009
Packit 6c4009
	subq       %rdi, %r11
Packit 6c4009
	je	  L(finz)
Packit 6c4009
	movq	  %r11, %r10
Packit 6c4009
	jmp	  L(small)
Packit 6c4009
	/* Align to 16byte to improve instruction fetch.  */
Packit 6c4009
	.p2align 4,, 4
Packit 6c4009
END(memcmp)
Packit 6c4009
Packit 6c4009
#undef bcmp
Packit 6c4009
weak_alias (memcmp, bcmp)
Packit 6c4009
libc_hidden_builtin_def (memcmp)