Blame sysdeps/x86_64/strcat.S

Packit 6c4009
/* strcat(dest, src) -- Append SRC on the end of DEST.
Packit 6c4009
   Optimized for x86-64.
Packit 6c4009
   Copyright (C) 2002-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
/* Will be removed when new strcpy implementation gets merged.  */
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (strcat)
Packit 6c4009
	movq %rdi, %rcx		/* Dest. register. */
Packit 6c4009
	andl $7, %ecx		/* mask alignment bits */
Packit 6c4009
	movq %rdi, %rax		/* Duplicate destination pointer.  */
Packit 6c4009
	movq $0xfefefefefefefeff,%r8
Packit 6c4009
Packit 6c4009
	/* First step: Find end of destination.  */
Packit 6c4009
	jz 4f			/* aligned => start loop */
Packit 6c4009
Packit 6c4009
	neg %ecx		/* We need to align to 8 bytes.  */
Packit 6c4009
	addl $8,%ecx
Packit 6c4009
	/* Search the first bytes directly.  */
Packit 6c4009
0:	cmpb $0x0,(%rax)	/* is byte NUL? */
Packit 6c4009
	je 2f			/* yes => start copy */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
	decl %ecx
Packit 6c4009
	jnz 0b
Packit 6c4009
Packit 6c4009
Packit 6c4009
Packit 6c4009
	/* Now the source is aligned.  Scan for NUL byte.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
4:
Packit 6c4009
	/* First unroll.  */
Packit 6c4009
	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
Packit 6c4009
	addq $8,%rax		/* adjust pointer for next word */
Packit 6c4009
	movq %r8, %rdx		/* magic value */
Packit 6c4009
	addq %rcx, %rdx		/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc 3f			/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq %rcx, %rdx		/* (word+magic)^word */
Packit 6c4009
	orq %r8, %rdx		/* set all non-carry bits */
Packit 6c4009
	incq %rdx		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
	jnz 3f			/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	/* Second unroll.  */
Packit 6c4009
	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
Packit 6c4009
	addq $8,%rax		/* adjust pointer for next word */
Packit 6c4009
	movq %r8, %rdx		/* magic value */
Packit 6c4009
	addq %rcx, %rdx		/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc 3f			/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq %rcx, %rdx		/* (word+magic)^word */
Packit 6c4009
	orq %r8, %rdx		/* set all non-carry bits */
Packit 6c4009
	incq %rdx		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
	jnz 3f			/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	/* Third unroll.  */
Packit 6c4009
	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
Packit 6c4009
	addq $8,%rax		/* adjust pointer for next word */
Packit 6c4009
	movq %r8, %rdx		/* magic value */
Packit 6c4009
	addq %rcx, %rdx		/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc 3f			/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq %rcx, %rdx		/* (word+magic)^word */
Packit 6c4009
	orq %r8, %rdx		/* set all non-carry bits */
Packit 6c4009
	incq %rdx		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
	jnz 3f			/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	/* Fourth unroll.  */
Packit 6c4009
	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
Packit 6c4009
	addq $8,%rax		/* adjust pointer for next word */
Packit 6c4009
	movq %r8, %rdx		/* magic value */
Packit 6c4009
	addq %rcx, %rdx		/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc 3f			/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq %rcx, %rdx		/* (word+magic)^word */
Packit 6c4009
	orq %r8, %rdx		/* set all non-carry bits */
Packit 6c4009
	incq %rdx		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
	jz 4b			/* no NUL found => continue loop */
Packit 6c4009
Packit 6c4009
	.p2align 4		/* Align, it's a jump target.  */
Packit 6c4009
3:	subq $8,%rax		/* correct pointer increment.  */
Packit 6c4009
Packit 6c4009
	testb %cl, %cl		/* is first byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	testb %ch, %ch		/* is second byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	testl $0x00ff0000, %ecx /* is third byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return pointer */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	testl $0xff000000, %ecx /* is fourth byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return pointer */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	shrq $32, %rcx		/* look at other half.  */
Packit 6c4009
Packit 6c4009
	testb %cl, %cl		/* is first byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	testb %ch, %ch		/* is second byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	testl $0xff0000, %ecx	/* is third byte NUL? */
Packit 6c4009
	jz 2f			/* yes => return pointer */
Packit 6c4009
	incq %rax		/* increment pointer */
Packit 6c4009
Packit 6c4009
2:
Packit 6c4009
	/* Second step: Copy source to destination.  */
Packit 6c4009
Packit 6c4009
	movq	%rsi, %rcx	/* duplicate  */
Packit 6c4009
	andl	$7,%ecx		/* mask alignment bits */
Packit 6c4009
	movq	%rax, %rdx	/* move around */
Packit 6c4009
	jz	22f		/* aligned => start loop */
Packit 6c4009
Packit 6c4009
	neg	%ecx		/* align to 8 bytes.  */
Packit 6c4009
	addl	$8, %ecx
Packit 6c4009
	/* Align the source pointer.  */
Packit 6c4009
21:
Packit 6c4009
	movb	(%rsi), %al	/* Fetch a byte */
Packit 6c4009
	testb	%al, %al	/* Is it NUL? */
Packit 6c4009
	movb	%al, (%rdx)	/* Store it */
Packit 6c4009
	jz	24f		/* If it was NUL, done! */
Packit 6c4009
	incq	%rsi
Packit 6c4009
	incq	%rdx
Packit 6c4009
	decl	%ecx
Packit 6c4009
	jnz	21b
Packit 6c4009
Packit 6c4009
	/* Now the sources is aligned.  Unfortunatly we cannot force
Packit 6c4009
	   to have both source and destination aligned, so ignore the
Packit 6c4009
	   alignment of the destination.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
22:
Packit 6c4009
	/* 1st unroll.  */
Packit 6c4009
	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
Packit 6c4009
	addq	$8, %rsi	/* Adjust pointer for next word.  */
Packit 6c4009
	movq	%rax, %r9	/* Save a copy for NUL finding.  */
Packit 6c4009
	addq	%r8, %r9	/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc	23f		/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq	%rax, %r9	/* (word+magic)^word */
Packit 6c4009
	orq	%r8, %r9	/* set all non-carry bits */
Packit 6c4009
	incq	%r9		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
Packit 6c4009
	jnz	23f		/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	movq	%rax, (%rdx)	/* Write value to destination.  */
Packit 6c4009
	addq	$8, %rdx	/* Adjust pointer.  */
Packit 6c4009
Packit 6c4009
	/* 2nd unroll.  */
Packit 6c4009
	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
Packit 6c4009
	addq	$8, %rsi	/* Adjust pointer for next word.  */
Packit 6c4009
	movq	%rax, %r9	/* Save a copy for NUL finding.  */
Packit 6c4009
	addq	%r8, %r9	/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc	23f		/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq	%rax, %r9	/* (word+magic)^word */
Packit 6c4009
	orq	%r8, %r9	/* set all non-carry bits */
Packit 6c4009
	incq	%r9		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
Packit 6c4009
	jnz	23f		/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	movq	%rax, (%rdx)	/* Write value to destination.  */
Packit 6c4009
	addq	$8, %rdx	/* Adjust pointer.  */
Packit 6c4009
Packit 6c4009
	/* 3rd unroll.  */
Packit 6c4009
	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
Packit 6c4009
	addq	$8, %rsi	/* Adjust pointer for next word.  */
Packit 6c4009
	movq	%rax, %r9	/* Save a copy for NUL finding.  */
Packit 6c4009
	addq	%r8, %r9	/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc	23f		/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq	%rax, %r9	/* (word+magic)^word */
Packit 6c4009
	orq	%r8, %r9	/* set all non-carry bits */
Packit 6c4009
	incq	%r9		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
Packit 6c4009
	jnz	23f		/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	movq	%rax, (%rdx)	/* Write value to destination.  */
Packit 6c4009
	addq	$8, %rdx	/* Adjust pointer.  */
Packit 6c4009
Packit 6c4009
	/* 4th unroll.  */
Packit 6c4009
	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
Packit 6c4009
	addq	$8, %rsi	/* Adjust pointer for next word.  */
Packit 6c4009
	movq	%rax, %r9	/* Save a copy for NUL finding.  */
Packit 6c4009
	addq	%r8, %r9	/* add the magic value to the word.  We get
Packit 6c4009
				   carry bits reported for each byte which
Packit 6c4009
				   is *not* 0 */
Packit 6c4009
	jnc	23f		/* highest byte is NUL => return pointer */
Packit 6c4009
	xorq	%rax, %r9	/* (word+magic)^word */
Packit 6c4009
	orq	%r8, %r9	/* set all non-carry bits */
Packit 6c4009
	incq	%r9		/* add 1: if one carry bit was *not* set
Packit 6c4009
				   the addition will not result in 0.  */
Packit 6c4009
Packit 6c4009
	jnz	23f		/* found NUL => return pointer */
Packit 6c4009
Packit 6c4009
	movq	%rax, (%rdx)	/* Write value to destination.  */
Packit 6c4009
	addq	$8, %rdx	/* Adjust pointer.  */
Packit 6c4009
	jmp	22b		/* Next iteration.  */
Packit 6c4009
Packit 6c4009
	/* Do the last few bytes. %rax contains the value to write.
Packit 6c4009
	   The loop is unrolled twice.  */
Packit 6c4009
	.p2align 4
Packit 6c4009
23:
Packit 6c4009
	movb	%al, (%rdx)	/* 1st byte.  */
Packit 6c4009
	testb	%al, %al	/* Is it NUL.  */
Packit 6c4009
	jz	24f		/* yes, finish.  */
Packit 6c4009
	incq	%rdx		/* Increment destination.  */
Packit 6c4009
	movb	%ah, (%rdx)	/* 2nd byte.  */
Packit 6c4009
	testb	%ah, %ah	/* Is it NUL?.  */
Packit 6c4009
	jz	24f		/* yes, finish.  */
Packit 6c4009
	incq	%rdx		/* Increment destination.  */
Packit 6c4009
	shrq	$16, %rax	/* Shift...  */
Packit 6c4009
	jmp	23b		/* and look at next two bytes in %rax.  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
24:
Packit 6c4009
	movq	%rdi, %rax	/* Source is return value.  */
Packit 6c4009
	retq
Packit 6c4009
END (strcat)
Packit 6c4009
libc_hidden_builtin_def (strcat)