Blame sysdeps/i386/i686/multiarch/wcscpy-ssse3.S

Packit 6c4009
/* wcscpy with SSSE3
Packit 6c4009
   Copyright (C) 2011-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Intel Corporation.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#if IS_IN (libc)
Packit 6c4009
# include <sysdep.h>
Packit 6c4009
Packit 6c4009
# define CFI_PUSH(REG)	\
Packit 6c4009
	cfi_adjust_cfa_offset (4);	\
Packit 6c4009
	cfi_rel_offset (REG, 0)
Packit 6c4009
Packit 6c4009
# define CFI_POP(REG)	\
Packit 6c4009
	cfi_adjust_cfa_offset (-4);	\
Packit 6c4009
	cfi_restore (REG)
Packit 6c4009
Packit 6c4009
# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
Packit 6c4009
# define POP(REG)	popl REG; CFI_POP (REG)
Packit 6c4009
Packit 6c4009
# define PARMS	4
Packit 6c4009
# define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
Packit 6c4009
# define STR1	PARMS
Packit 6c4009
# define STR2	STR1+4
Packit 6c4009
# define LEN	STR2+4
Packit 6c4009
Packit 6c4009
	atom_text_section
Packit 6c4009
ENTRY (__wcscpy_ssse3)
Packit 6c4009
	mov	STR1(%esp), %edx
Packit 6c4009
	mov	STR2(%esp), %ecx
Packit 6c4009
Packit 6c4009
	cmp	$0, (%ecx)
Packit 6c4009
	jz	L(ExitTail4)
Packit 6c4009
	cmp	$0, 4(%ecx)
Packit 6c4009
	jz	L(ExitTail8)
Packit 6c4009
	cmp	$0, 8(%ecx)
Packit 6c4009
	jz	L(ExitTail12)
Packit 6c4009
	cmp	$0, 12(%ecx)
Packit 6c4009
	jz	L(ExitTail16)
Packit 6c4009
Packit 6c4009
	PUSH	(%edi)
Packit 6c4009
	mov	%edx, %edi
Packit 6c4009
	PUSH	(%esi)
Packit 6c4009
	lea	16(%ecx), %esi
Packit 6c4009
Packit 6c4009
	and	$-16, %esi
Packit 6c4009
Packit 6c4009
	pxor	%xmm0, %xmm0
Packit 6c4009
	pcmpeqd	(%esi), %xmm0
Packit 6c4009
	movdqu	(%ecx), %xmm1
Packit 6c4009
	movdqu	%xmm1, (%edx)
Packit 6c4009
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	sub	%ecx, %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	mov	%edx, %eax
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	and	$-16, %edx
Packit 6c4009
	sub	%edx, %eax
Packit 6c4009
Packit 6c4009
	sub	%eax, %ecx
Packit 6c4009
	mov	%ecx, %eax
Packit 6c4009
	and	$0xf, %eax
Packit 6c4009
	mov	$0, %esi
Packit 6c4009
Packit 6c4009
	jz	L(Align16Both)
Packit 6c4009
	cmp	$4, %eax
Packit 6c4009
	je	L(Shl4)
Packit 6c4009
	cmp	$8, %eax
Packit 6c4009
	je	L(Shl8)
Packit 6c4009
	jmp	L(Shl12)
Packit 6c4009
Packit 6c4009
L(Align16Both):
Packit 6c4009
	movaps	(%ecx), %xmm1
Packit 6c4009
	movaps	16(%ecx), %xmm2
Packit 6c4009
	movaps	%xmm1, (%edx)
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	16(%ecx, %esi), %xmm3
Packit 6c4009
	movaps	%xmm2, (%edx, %esi)
Packit 6c4009
	pcmpeqd	%xmm3, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	16(%ecx, %esi), %xmm4
Packit 6c4009
	movaps	%xmm3, (%edx, %esi)
Packit 6c4009
	pcmpeqd	%xmm4, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	16(%ecx, %esi), %xmm1
Packit 6c4009
	movaps	%xmm4, (%edx, %esi)
Packit 6c4009
	pcmpeqd	%xmm1, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	16(%ecx, %esi), %xmm2
Packit 6c4009
	movaps	%xmm1, (%edx, %esi)
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	16(%ecx, %esi), %xmm3
Packit 6c4009
	movaps	%xmm2, (%edx, %esi)
Packit 6c4009
	pcmpeqd	%xmm3, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	%xmm3, (%edx, %esi)
Packit 6c4009
	mov	%ecx, %eax
Packit 6c4009
	lea	16(%ecx, %esi), %ecx
Packit 6c4009
	and	$-0x40, %ecx
Packit 6c4009
	sub	%ecx, %eax
Packit 6c4009
	sub	%eax, %edx
Packit 6c4009
Packit 6c4009
	mov	$-0x40, %esi
Packit 6c4009
Packit 6c4009
L(Aligned64Loop):
Packit 6c4009
	movaps	(%ecx), %xmm2
Packit 6c4009
	movaps	32(%ecx), %xmm3
Packit 6c4009
	movaps	%xmm2, %xmm4
Packit 6c4009
	movaps	16(%ecx), %xmm5
Packit 6c4009
	movaps	%xmm3, %xmm6
Packit 6c4009
	movaps	48(%ecx), %xmm7
Packit 6c4009
	pminub	%xmm5, %xmm2
Packit 6c4009
	pminub	%xmm7, %xmm3
Packit 6c4009
	pminub	%xmm2, %xmm3
Packit 6c4009
	lea	64(%edx), %edx
Packit 6c4009
	pcmpeqd	%xmm0, %xmm3
Packit 6c4009
	lea	64(%ecx), %ecx
Packit 6c4009
	pmovmskb %xmm3, %eax
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Aligned64Leave)
Packit 6c4009
	movaps	%xmm4, -64(%edx)
Packit 6c4009
	movaps	%xmm5, -48(%edx)
Packit 6c4009
	movaps	%xmm6, -32(%edx)
Packit 6c4009
	movaps	%xmm7, -16(%edx)
Packit 6c4009
	jmp	L(Aligned64Loop)
Packit 6c4009
Packit 6c4009
L(Aligned64Leave):
Packit 6c4009
	pcmpeqd	%xmm4, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm5, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	movaps	%xmm4, -64(%edx)
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm6, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	movaps	%xmm5, -48(%edx)
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	movaps	%xmm6, -32(%edx)
Packit 6c4009
	pcmpeqd	%xmm7, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	lea	16(%esi), %esi
Packit 6c4009
	jnz	L(CopyFrom1To16Bytes)
Packit 6c4009
Packit 6c4009
	mov	$-0x40, %esi
Packit 6c4009
	movaps	%xmm7, -16(%edx)
Packit 6c4009
	jmp	L(Aligned64Loop)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(Shl4):
Packit 6c4009
	movaps	-4(%ecx), %xmm1
Packit 6c4009
	movaps	12(%ecx), %xmm2
Packit 6c4009
L(Shl4Start):
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	movaps	%xmm2, %xmm3
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl4LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$4, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	28(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
	movaps	%xmm2, %xmm1
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl4LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$4, %xmm3, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	28(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
	movaps	%xmm2, %xmm3
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl4LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$4, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	28(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl4LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$4, %xmm3, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	lea	28(%ecx), %ecx
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
Packit 6c4009
	mov	%ecx, %eax
Packit 6c4009
	and	$-0x40, %ecx
Packit 6c4009
	sub	%ecx, %eax
Packit 6c4009
	lea	-12(%ecx), %ecx
Packit 6c4009
	sub	%eax, %edx
Packit 6c4009
Packit 6c4009
	movaps	-4(%ecx), %xmm1
Packit 6c4009
Packit 6c4009
L(Shl4LoopStart):
Packit 6c4009
	movaps	12(%ecx), %xmm2
Packit 6c4009
	movaps	28(%ecx), %xmm3
Packit 6c4009
	movaps	%xmm3, %xmm6
Packit 6c4009
	movaps	44(%ecx), %xmm4
Packit 6c4009
	movaps	%xmm4, %xmm7
Packit 6c4009
	movaps	60(%ecx), %xmm5
Packit 6c4009
	pminub	%xmm2, %xmm6
Packit 6c4009
	pminub	%xmm5, %xmm7
Packit 6c4009
	pminub	%xmm6, %xmm7
Packit 6c4009
	pcmpeqd	%xmm0, %xmm7
Packit 6c4009
	pmovmskb %xmm7, %eax
Packit 6c4009
	movaps	%xmm5, %xmm7
Packit 6c4009
	palignr	$4, %xmm4, %xmm5
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	palignr	$4, %xmm3, %xmm4
Packit 6c4009
	jnz	L(Shl4Start)
Packit 6c4009
Packit 6c4009
	palignr	$4, %xmm2, %xmm3
Packit 6c4009
	lea	64(%ecx), %ecx
Packit 6c4009
	palignr	$4, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm7, %xmm1
Packit 6c4009
	movaps	%xmm5, 48(%edx)
Packit 6c4009
	movaps	%xmm4, 32(%edx)
Packit 6c4009
	movaps	%xmm3, 16(%edx)
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	lea	64(%edx), %edx
Packit 6c4009
	jmp	L(Shl4LoopStart)
Packit 6c4009
Packit 6c4009
L(Shl4LoopExit):
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movl	8(%ecx), %esi
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	%esi, 8(%edx)
Packit 6c4009
	POP	(%esi)
Packit 6c4009
	add	$12, %edx
Packit 6c4009
	add	$12, %ecx
Packit 6c4009
	test	%al, %al
Packit 6c4009
	jz	L(ExitHigh)
Packit 6c4009
	test	$0x01, %al
Packit 6c4009
	jnz	L(Exit4)
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(Shl8):
Packit 6c4009
	movaps	-8(%ecx), %xmm1
Packit 6c4009
	movaps	8(%ecx), %xmm2
Packit 6c4009
L(Shl8Start):
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	movaps	%xmm2, %xmm3
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl8LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$8, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	24(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
	movaps	%xmm2, %xmm1
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl8LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$8, %xmm3, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	24(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
	movaps	%xmm2, %xmm3
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl8LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$8, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	24(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl8LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$8, %xmm3, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	lea	24(%ecx), %ecx
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
Packit 6c4009
	mov	%ecx, %eax
Packit 6c4009
	and	$-0x40, %ecx
Packit 6c4009
	sub	%ecx, %eax
Packit 6c4009
	lea	-8(%ecx), %ecx
Packit 6c4009
	sub	%eax, %edx
Packit 6c4009
Packit 6c4009
	movaps	-8(%ecx), %xmm1
Packit 6c4009
Packit 6c4009
L(Shl8LoopStart):
Packit 6c4009
	movaps	8(%ecx), %xmm2
Packit 6c4009
	movaps	24(%ecx), %xmm3
Packit 6c4009
	movaps	%xmm3, %xmm6
Packit 6c4009
	movaps	40(%ecx), %xmm4
Packit 6c4009
	movaps	%xmm4, %xmm7
Packit 6c4009
	movaps	56(%ecx), %xmm5
Packit 6c4009
	pminub	%xmm2, %xmm6
Packit 6c4009
	pminub	%xmm5, %xmm7
Packit 6c4009
	pminub	%xmm6, %xmm7
Packit 6c4009
	pcmpeqd	%xmm0, %xmm7
Packit 6c4009
	pmovmskb %xmm7, %eax
Packit 6c4009
	movaps	%xmm5, %xmm7
Packit 6c4009
	palignr	$8, %xmm4, %xmm5
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	palignr	$8, %xmm3, %xmm4
Packit 6c4009
	jnz	L(Shl8Start)
Packit 6c4009
Packit 6c4009
	palignr	$8, %xmm2, %xmm3
Packit 6c4009
	lea	64(%ecx), %ecx
Packit 6c4009
	palignr	$8, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm7, %xmm1
Packit 6c4009
	movaps	%xmm5, 48(%edx)
Packit 6c4009
	movaps	%xmm4, 32(%edx)
Packit 6c4009
	movaps	%xmm3, 16(%edx)
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	lea	64(%edx), %edx
Packit 6c4009
	jmp	L(Shl8LoopStart)
Packit 6c4009
Packit 6c4009
L(Shl8LoopExit):
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	POP	(%esi)
Packit 6c4009
	add	$8, %edx
Packit 6c4009
	add	$8, %ecx
Packit 6c4009
	test	%al, %al
Packit 6c4009
	jz	L(ExitHigh)
Packit 6c4009
	test	$0x01, %al
Packit 6c4009
	jnz	L(Exit4)
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	CFI_PUSH	(%esi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(Shl12):
Packit 6c4009
	movaps	-12(%ecx), %xmm1
Packit 6c4009
	movaps	4(%ecx), %xmm2
Packit 6c4009
L(Shl12Start):
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	movaps	%xmm2, %xmm3
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl12LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$12, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	20(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
	movaps	%xmm2, %xmm1
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl12LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$12, %xmm3, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	20(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
	movaps	%xmm2, %xmm3
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl12LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$12, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	movaps	20(%ecx), %xmm2
Packit 6c4009
Packit 6c4009
	pcmpeqd	%xmm2, %xmm0
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
	pmovmskb %xmm0, %eax
Packit 6c4009
	lea	16(%ecx), %ecx
Packit 6c4009
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	jnz	L(Shl12LoopExit)
Packit 6c4009
Packit 6c4009
	palignr	$12, %xmm3, %xmm2
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	lea	20(%ecx), %ecx
Packit 6c4009
	lea	16(%edx), %edx
Packit 6c4009
Packit 6c4009
	mov	%ecx, %eax
Packit 6c4009
	and	$-0x40, %ecx
Packit 6c4009
	sub	%ecx, %eax
Packit 6c4009
	lea	-4(%ecx), %ecx
Packit 6c4009
	sub	%eax, %edx
Packit 6c4009
Packit 6c4009
	movaps	-12(%ecx), %xmm1
Packit 6c4009
Packit 6c4009
L(Shl12LoopStart):
Packit 6c4009
	movaps	4(%ecx), %xmm2
Packit 6c4009
	movaps	20(%ecx), %xmm3
Packit 6c4009
	movaps	%xmm3, %xmm6
Packit 6c4009
	movaps	36(%ecx), %xmm4
Packit 6c4009
	movaps	%xmm4, %xmm7
Packit 6c4009
	movaps	52(%ecx), %xmm5
Packit 6c4009
	pminub	%xmm2, %xmm6
Packit 6c4009
	pminub	%xmm5, %xmm7
Packit 6c4009
	pminub	%xmm6, %xmm7
Packit 6c4009
	pcmpeqd	%xmm0, %xmm7
Packit 6c4009
	pmovmskb %xmm7, %eax
Packit 6c4009
	movaps	%xmm5, %xmm7
Packit 6c4009
	palignr	$12, %xmm4, %xmm5
Packit 6c4009
	test	%eax, %eax
Packit 6c4009
	palignr	$12, %xmm3, %xmm4
Packit 6c4009
	jnz	L(Shl12Start)
Packit 6c4009
Packit 6c4009
	palignr	$12, %xmm2, %xmm3
Packit 6c4009
	lea	64(%ecx), %ecx
Packit 6c4009
	palignr	$12, %xmm1, %xmm2
Packit 6c4009
	movaps	%xmm7, %xmm1
Packit 6c4009
	movaps	%xmm5, 48(%edx)
Packit 6c4009
	movaps	%xmm4, 32(%edx)
Packit 6c4009
	movaps	%xmm3, 16(%edx)
Packit 6c4009
	movaps	%xmm2, (%edx)
Packit 6c4009
	lea	64(%edx), %edx
Packit 6c4009
	jmp	L(Shl12LoopStart)
Packit 6c4009
Packit 6c4009
L(Shl12LoopExit):
Packit 6c4009
	movl	(%ecx), %esi
Packit 6c4009
	movl	%esi, (%edx)
Packit 6c4009
	mov	$4, %esi
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(CopyFrom1To16Bytes):
Packit 6c4009
	add	%esi, %edx
Packit 6c4009
	add	%esi, %ecx
Packit 6c4009
Packit 6c4009
	POP	(%esi)
Packit 6c4009
	test	%al, %al
Packit 6c4009
	jz	L(ExitHigh)
Packit 6c4009
	test	$0x01, %al
Packit 6c4009
	jnz	L(Exit4)
Packit 6c4009
L(Exit8):
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(ExitHigh):
Packit 6c4009
	test	$0x01, %ah
Packit 6c4009
	jnz	L(Exit12)
Packit 6c4009
L(Exit16):
Packit 6c4009
	movdqu	(%ecx), %xmm0
Packit 6c4009
	movdqu	%xmm0, (%edx)
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(Exit4):
Packit 6c4009
	movl	(%ecx), %eax
Packit 6c4009
	movl	%eax, (%edx)
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(Exit12):
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	8(%ecx), %eax
Packit 6c4009
	movl	%eax, 8(%edx)
Packit 6c4009
	movl	%edi, %eax
Packit 6c4009
	RETURN
Packit 6c4009
Packit 6c4009
CFI_POP	(%edi)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(ExitTail4):
Packit 6c4009
	movl	(%ecx), %eax
Packit 6c4009
	movl	%eax, (%edx)
Packit 6c4009
	movl	%edx, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(ExitTail8):
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	%edx, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(ExitTail12):
Packit 6c4009
	movlpd	(%ecx), %xmm0
Packit 6c4009
	movlpd	%xmm0, (%edx)
Packit 6c4009
	movl	8(%ecx), %eax
Packit 6c4009
	movl	%eax, 8(%edx)
Packit 6c4009
	movl	%edx, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(ExitTail16):
Packit 6c4009
	movdqu	(%ecx), %xmm0
Packit 6c4009
	movdqu	%xmm0, (%edx)
Packit 6c4009
	movl	%edx, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
END (__wcscpy_ssse3)
Packit 6c4009
#endif