|
Packit |
6c4009 |
/* strcmp with unaligned loads
|
|
Packit |
6c4009 |
Copyright (C) 2013-2018 Free Software Foundation, Inc.
|
|
Packit |
6c4009 |
This file is part of the GNU C Library.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
The GNU C Library is free software; you can redistribute it and/or
|
|
Packit |
6c4009 |
modify it under the terms of the GNU Lesser General Public
|
|
Packit |
6c4009 |
License as published by the Free Software Foundation; either
|
|
Packit |
6c4009 |
version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
The GNU C Library is distributed in the hope that it will be useful,
|
|
Packit |
6c4009 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
6c4009 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
6c4009 |
Lesser General Public License for more details.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
You should have received a copy of the GNU Lesser General Public
|
|
Packit |
6c4009 |
License along with the GNU C Library; if not, see
|
|
Packit |
6c4009 |
<http://www.gnu.org/licenses/>. */
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
#if IS_IN (libc)
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
#include "sysdep.h"
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
ENTRY ( __strcmp_sse2_unaligned)
|
|
Packit |
6c4009 |
movl %edi, %eax
|
|
Packit |
6c4009 |
xorl %edx, %edx
|
|
Packit |
6c4009 |
pxor %xmm7, %xmm7
|
|
Packit |
6c4009 |
orl %esi, %eax
|
|
Packit |
6c4009 |
andl $4095, %eax
|
|
Packit |
6c4009 |
cmpl $4032, %eax
|
|
Packit |
6c4009 |
jg L(cross_page)
|
|
Packit |
6c4009 |
movdqu (%rdi), %xmm1
|
|
Packit |
6c4009 |
movdqu (%rsi), %xmm0
|
|
Packit |
6c4009 |
pcmpeqb %xmm1, %xmm0
|
|
Packit |
6c4009 |
pminub %xmm1, %xmm0
|
|
Packit |
6c4009 |
pxor %xmm1, %xmm1
|
|
Packit |
6c4009 |
pcmpeqb %xmm1, %xmm0
|
|
Packit |
6c4009 |
pmovmskb %xmm0, %eax
|
|
Packit |
6c4009 |
testq %rax, %rax
|
|
Packit |
6c4009 |
je L(next_48_bytes)
|
|
Packit |
6c4009 |
L(return):
|
|
Packit |
6c4009 |
bsfq %rax, %rdx
|
|
Packit |
6c4009 |
movzbl (%rdi, %rdx), %eax
|
|
Packit |
6c4009 |
movzbl (%rsi, %rdx), %edx
|
|
Packit |
6c4009 |
subl %edx, %eax
|
|
Packit |
6c4009 |
ret
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
.p2align 4
|
|
Packit |
6c4009 |
L(next_48_bytes):
|
|
Packit |
6c4009 |
movdqu 16(%rdi), %xmm6
|
|
Packit |
6c4009 |
movdqu 16(%rsi), %xmm3
|
|
Packit |
6c4009 |
movdqu 32(%rdi), %xmm5
|
|
Packit |
6c4009 |
pcmpeqb %xmm6, %xmm3
|
|
Packit |
6c4009 |
movdqu 32(%rsi), %xmm2
|
|
Packit |
6c4009 |
pminub %xmm6, %xmm3
|
|
Packit |
6c4009 |
pcmpeqb %xmm1, %xmm3
|
|
Packit |
6c4009 |
movdqu 48(%rdi), %xmm4
|
|
Packit |
6c4009 |
pcmpeqb %xmm5, %xmm2
|
|
Packit |
6c4009 |
pmovmskb %xmm3, %edx
|
|
Packit |
6c4009 |
movdqu 48(%rsi), %xmm0
|
|
Packit |
6c4009 |
pminub %xmm5, %xmm2
|
|
Packit |
6c4009 |
pcmpeqb %xmm1, %xmm2
|
|
Packit |
6c4009 |
pcmpeqb %xmm4, %xmm0
|
|
Packit |
6c4009 |
pmovmskb %xmm2, %eax
|
|
Packit |
6c4009 |
salq $16, %rdx
|
|
Packit |
6c4009 |
pminub %xmm4, %xmm0
|
|
Packit |
6c4009 |
pcmpeqb %xmm1, %xmm0
|
|
Packit |
6c4009 |
salq $32, %rax
|
|
Packit |
6c4009 |
orq %rdx, %rax
|
|
Packit |
6c4009 |
pmovmskb %xmm0, %ecx
|
|
Packit |
6c4009 |
movq %rcx, %rdx
|
|
Packit |
6c4009 |
salq $48, %rdx
|
|
Packit |
6c4009 |
orq %rdx, %rax
|
|
Packit |
6c4009 |
jne L(return)
|
|
Packit |
6c4009 |
L(main_loop_header):
|
|
Packit |
6c4009 |
leaq 64(%rdi), %rdx
|
|
Packit |
6c4009 |
movl $4096, %ecx
|
|
Packit |
6c4009 |
pxor %xmm9, %xmm9
|
|
Packit |
6c4009 |
andq $-64, %rdx
|
|
Packit |
6c4009 |
subq %rdi, %rdx
|
|
Packit |
6c4009 |
leaq (%rdi, %rdx), %rax
|
|
Packit |
6c4009 |
addq %rsi, %rdx
|
|
Packit |
6c4009 |
movq %rdx, %rsi
|
|
Packit |
6c4009 |
andl $4095, %esi
|
|
Packit |
6c4009 |
subq %rsi, %rcx
|
|
Packit |
6c4009 |
shrq $6, %rcx
|
|
Packit |
6c4009 |
movq %rcx, %rsi
|
|
Packit |
6c4009 |
jmp L(loop_start)
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
.p2align 4
|
|
Packit |
6c4009 |
L(loop):
|
|
Packit |
6c4009 |
addq $64, %rax
|
|
Packit |
6c4009 |
addq $64, %rdx
|
|
Packit |
6c4009 |
L(loop_start):
|
|
Packit |
6c4009 |
testq %rsi, %rsi
|
|
Packit |
6c4009 |
leaq -1(%rsi), %rsi
|
|
Packit |
6c4009 |
je L(loop_cross_page)
|
|
Packit |
6c4009 |
L(back_to_loop):
|
|
Packit |
6c4009 |
movdqu (%rdx), %xmm0
|
|
Packit |
6c4009 |
movdqu 16(%rdx), %xmm1
|
|
Packit |
6c4009 |
movdqa (%rax), %xmm2
|
|
Packit |
6c4009 |
movdqa 16(%rax), %xmm3
|
|
Packit |
6c4009 |
pcmpeqb %xmm2, %xmm0
|
|
Packit |
6c4009 |
movdqu 32(%rdx), %xmm5
|
|
Packit |
6c4009 |
pcmpeqb %xmm3, %xmm1
|
|
Packit |
6c4009 |
pminub %xmm2, %xmm0
|
|
Packit |
6c4009 |
movdqu 48(%rdx), %xmm6
|
|
Packit |
6c4009 |
pminub %xmm3, %xmm1
|
|
Packit |
6c4009 |
movdqa 32(%rax), %xmm2
|
|
Packit |
6c4009 |
pminub %xmm1, %xmm0
|
|
Packit |
6c4009 |
movdqa 48(%rax), %xmm3
|
|
Packit |
6c4009 |
pcmpeqb %xmm2, %xmm5
|
|
Packit |
6c4009 |
pcmpeqb %xmm3, %xmm6
|
|
Packit |
6c4009 |
pminub %xmm2, %xmm5
|
|
Packit |
6c4009 |
pminub %xmm3, %xmm6
|
|
Packit |
6c4009 |
pminub %xmm5, %xmm0
|
|
Packit |
6c4009 |
pminub %xmm6, %xmm0
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm0
|
|
Packit |
6c4009 |
pmovmskb %xmm0, %ecx
|
|
Packit |
6c4009 |
testl %ecx, %ecx
|
|
Packit |
6c4009 |
je L(loop)
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm5
|
|
Packit |
6c4009 |
movdqu (%rdx), %xmm0
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm1
|
|
Packit |
6c4009 |
movdqa (%rax), %xmm2
|
|
Packit |
6c4009 |
pcmpeqb %xmm2, %xmm0
|
|
Packit |
6c4009 |
pminub %xmm2, %xmm0
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm6
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm0
|
|
Packit |
6c4009 |
pmovmskb %xmm1, %ecx
|
|
Packit |
6c4009 |
pmovmskb %xmm5, %r8d
|
|
Packit |
6c4009 |
pmovmskb %xmm0, %edi
|
|
Packit |
6c4009 |
salq $16, %rcx
|
|
Packit |
6c4009 |
salq $32, %r8
|
|
Packit |
6c4009 |
pmovmskb %xmm6, %esi
|
|
Packit |
6c4009 |
orq %r8, %rcx
|
|
Packit |
6c4009 |
orq %rdi, %rcx
|
|
Packit |
6c4009 |
salq $48, %rsi
|
|
Packit |
6c4009 |
orq %rsi, %rcx
|
|
Packit |
6c4009 |
bsfq %rcx, %rcx
|
|
Packit |
6c4009 |
movzbl (%rax, %rcx), %eax
|
|
Packit |
6c4009 |
movzbl (%rdx, %rcx), %edx
|
|
Packit |
6c4009 |
subl %edx, %eax
|
|
Packit |
6c4009 |
ret
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
.p2align 4
|
|
Packit |
6c4009 |
L(loop_cross_page):
|
|
Packit |
6c4009 |
xor %r10, %r10
|
|
Packit |
6c4009 |
movq %rdx, %r9
|
|
Packit |
6c4009 |
and $63, %r9
|
|
Packit |
6c4009 |
subq %r9, %r10
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
movdqa (%rdx, %r10), %xmm0
|
|
Packit |
6c4009 |
movdqa 16(%rdx, %r10), %xmm1
|
|
Packit |
6c4009 |
movdqu (%rax, %r10), %xmm2
|
|
Packit |
6c4009 |
movdqu 16(%rax, %r10), %xmm3
|
|
Packit |
6c4009 |
pcmpeqb %xmm2, %xmm0
|
|
Packit |
6c4009 |
movdqa 32(%rdx, %r10), %xmm5
|
|
Packit |
6c4009 |
pcmpeqb %xmm3, %xmm1
|
|
Packit |
6c4009 |
pminub %xmm2, %xmm0
|
|
Packit |
6c4009 |
movdqa 48(%rdx, %r10), %xmm6
|
|
Packit |
6c4009 |
pminub %xmm3, %xmm1
|
|
Packit |
6c4009 |
movdqu 32(%rax, %r10), %xmm2
|
|
Packit |
6c4009 |
movdqu 48(%rax, %r10), %xmm3
|
|
Packit |
6c4009 |
pcmpeqb %xmm2, %xmm5
|
|
Packit |
6c4009 |
pcmpeqb %xmm3, %xmm6
|
|
Packit |
6c4009 |
pminub %xmm2, %xmm5
|
|
Packit |
6c4009 |
pminub %xmm3, %xmm6
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm0
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm1
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm5
|
|
Packit |
6c4009 |
pcmpeqb %xmm7, %xmm6
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
pmovmskb %xmm1, %ecx
|
|
Packit |
6c4009 |
pmovmskb %xmm5, %r8d
|
|
Packit |
6c4009 |
pmovmskb %xmm0, %edi
|
|
Packit |
6c4009 |
salq $16, %rcx
|
|
Packit |
6c4009 |
salq $32, %r8
|
|
Packit |
6c4009 |
pmovmskb %xmm6, %esi
|
|
Packit |
6c4009 |
orq %r8, %rdi
|
|
Packit |
6c4009 |
orq %rcx, %rdi
|
|
Packit |
6c4009 |
salq $48, %rsi
|
|
Packit |
6c4009 |
orq %rsi, %rdi
|
|
Packit |
6c4009 |
movq %r9, %rcx
|
|
Packit |
6c4009 |
movq $63, %rsi
|
|
Packit |
6c4009 |
shrq %cl, %rdi
|
|
Packit |
6c4009 |
test %rdi, %rdi
|
|
Packit |
6c4009 |
je L(back_to_loop)
|
|
Packit |
6c4009 |
bsfq %rdi, %rcx
|
|
Packit |
6c4009 |
movzbl (%rax, %rcx), %eax
|
|
Packit |
6c4009 |
movzbl (%rdx, %rcx), %edx
|
|
Packit |
6c4009 |
subl %edx, %eax
|
|
Packit |
6c4009 |
ret
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
.p2align 4
|
|
Packit |
6c4009 |
L(cross_page_loop):
|
|
Packit |
6c4009 |
cmpb %cl, %al
|
|
Packit |
6c4009 |
jne L(different)
|
|
Packit |
6c4009 |
addq $1, %rdx
|
|
Packit |
6c4009 |
cmpq $64, %rdx
|
|
Packit |
6c4009 |
je L(main_loop_header)
|
|
Packit |
6c4009 |
L(cross_page):
|
|
Packit |
6c4009 |
movzbl (%rdi, %rdx), %eax
|
|
Packit |
6c4009 |
movzbl (%rsi, %rdx), %ecx
|
|
Packit |
6c4009 |
testb %al, %al
|
|
Packit |
6c4009 |
jne L(cross_page_loop)
|
|
Packit |
6c4009 |
xorl %eax, %eax
|
|
Packit |
6c4009 |
L(different):
|
|
Packit |
6c4009 |
subl %ecx, %eax
|
|
Packit |
6c4009 |
ret
|
|
Packit |
6c4009 |
END (__strcmp_sse2_unaligned)
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
#endif
|