Blame sysdeps/i386/i586/strchr.S

Packit 6c4009
/* Find character CH in a NUL terminated string.
Packit 6c4009
   Highly optimized version for ix85, x>=5.
Packit 6c4009
   Copyright (C) 1995-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
/* This version is especially optimized for the i586 (and following?)
Packit 6c4009
   processors.  This is mainly done by using the two pipelines.  The
Packit 6c4009
   version optimized for i486 is weak in this aspect because to get
Packit 6c4009
   as much parallelism we have to execute some *more* instructions.
Packit 6c4009
Packit 6c4009
   The code below is structured to reflect the pairing of the instructions
Packit 6c4009
   as *I think* it is.  I have no processor data book to verify this.
Packit 6c4009
   If you find something you think is incorrect let me know.  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* The magic value which is used throughout in the whole code.  */
Packit 6c4009
#define magic 0xfefefeff
Packit 6c4009
Packit 6c4009
#define PARMS	4+16	/* space for 4 saved regs */
Packit 6c4009
#define RTN	PARMS
Packit 6c4009
#define STR	RTN
Packit 6c4009
#define CHR	STR+4
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (strchr)
Packit 6c4009
Packit 6c4009
	pushl %edi		/* Save callee-safe registers.  */
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	pushl %esi
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
Packit 6c4009
	pushl %ebx
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	pushl %ebp
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
Packit 6c4009
	movl STR(%esp), %eax
Packit 6c4009
	movl CHR(%esp), %edx
Packit 6c4009
Packit 6c4009
	movl %eax, %edi		/* duplicate string pointer for later */
Packit 6c4009
	cfi_rel_offset (edi, 12)
Packit 6c4009
	xorl %ecx, %ecx		/* clear %ecx */
Packit 6c4009
Packit 6c4009
	/* At the moment %edx contains C.  What we need for the
Packit 6c4009
	   algorithm is C in all bytes of the dword.  Avoid
Packit 6c4009
	   operations on 16 bit words because these require an
Packit 6c4009
	   prefix byte (and one more cycle).  */
Packit 6c4009
	movb %dl, %dh		/* now it is 0|0|c|c */
Packit 6c4009
	movb %dl, %cl		/* we construct the lower half in %ecx */
Packit 6c4009
Packit 6c4009
	shll $16, %edx		/* now %edx is c|c|0|0 */
Packit 6c4009
	movb %cl, %ch		/* now %ecx is 0|0|c|c */
Packit 6c4009
Packit 6c4009
	orl %ecx, %edx		/* and finally c|c|c|c */
Packit 6c4009
	andl $3, %edi		/* mask alignment bits */
Packit 6c4009
Packit 6c4009
	jz L(11)		/* alignment is 0 => start loop */
Packit 6c4009
Packit 6c4009
	movb %dl, %cl		/* 0 is needed below */
Packit 6c4009
	jp L(0)			/* exactly two bits set */
Packit 6c4009
Packit 6c4009
	xorb (%eax), %cl	/* is byte the one we are looking for? */
Packit 6c4009
	jz L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	xorb %dl, %cl		/* load single byte and test for NUL */
Packit 6c4009
	je L(3)			/* yes => return NULL */
Packit 6c4009
Packit 6c4009
	movb 1(%eax), %cl	/* load single byte */
Packit 6c4009
	incl %eax
Packit 6c4009
Packit 6c4009
	cmpb %cl, %dl		/* is byte == C? */
Packit 6c4009
	je L(out)		/* aligned => return pointer */
Packit 6c4009
Packit 6c4009
	cmpb $0, %cl		/* is byte NUL? */
Packit 6c4009
	je L(3)			/* yes => return NULL */
Packit 6c4009
Packit 6c4009
	incl %eax
Packit 6c4009
	decl %edi
Packit 6c4009
Packit 6c4009
	jne L(11)
Packit 6c4009
Packit 6c4009
L(0):	movb (%eax), %cl	/* load single byte */
Packit 6c4009
Packit 6c4009
	cmpb %cl, %dl		/* is byte == C? */
Packit 6c4009
	je L(out)		/* aligned => return pointer */
Packit 6c4009
Packit 6c4009
	cmpb $0, %cl		/* is byte NUL? */
Packit 6c4009
	je L(3)			/* yes => return NULL */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	cfi_rel_offset (esi, 8)
Packit 6c4009
	cfi_rel_offset (ebx, 4)
Packit 6c4009
	cfi_rel_offset (ebp, 0)
Packit 6c4009
Packit 6c4009
	/* The following code is the preparation for the loop.  The
Packit 6c4009
	   four instruction up to `L1' will not be executed in the loop
Packit 6c4009
	   because the same code is found at the end of the loop, but
Packit 6c4009
	   there it is executed in parallel with other instructions.  */
Packit 6c4009
L(11):	movl (%eax), %ecx
Packit 6c4009
	movl $magic, %ebp
Packit 6c4009
Packit 6c4009
	movl $magic, %edi
Packit 6c4009
	addl %ecx, %ebp
Packit 6c4009
Packit 6c4009
	/* The main loop: it looks complex and indeed it is.  I would
Packit 6c4009
	   love to say `it was hard to write, so it should he hard to
Packit 6c4009
	   read' but I will give some more hints.  To fully understand
Packit 6c4009
	   this code you should first take a look at the i486 version.
Packit 6c4009
	   The basic algorithm is the same, but here the code organized
Packit 6c4009
	   in a way which permits to use both pipelines all the time.
Packit 6c4009
Packit 6c4009
	   I tried to make it a bit more understandable by indenting
Packit 6c4009
	   the code according to stage in the algorithm.  It goes as
Packit 6c4009
	   follows:
Packit 6c4009
		check for 0 in 1st word
Packit 6c4009
			check for C in 1st word
Packit 6c4009
					check for 0 in 2nd word
Packit 6c4009
						check for C in 2nd word
Packit 6c4009
		check for 0 in 3rd word
Packit 6c4009
			check for C in 3rd word
Packit 6c4009
					check for 0 in 4th word
Packit 6c4009
						check for C in 4th word
Packit 6c4009
Packit 6c4009
	   Please note that doing the test for NUL before the test for
Packit 6c4009
	   C allows us to overlap the test for 0 in the next word with
Packit 6c4009
	   the test for C.  */
Packit 6c4009
Packit 6c4009
L(1):	xorl %ecx, %ebp			/* (word^magic) */
Packit 6c4009
	addl %ecx, %edi			/* add magic word */
Packit 6c4009
Packit 6c4009
	leal 4(%eax), %eax		/* increment pointer */
Packit 6c4009
	jnc L(4)			/* previous addl caused overflow? */
Packit 6c4009
Packit 6c4009
		movl %ecx, %ebx		/* duplicate original word */
Packit 6c4009
	orl $magic, %ebp		/* (word^magic)|magic */
Packit 6c4009
Packit 6c4009
	addl $1, %ebp			/* (word^magic)|magic == 0xffffffff? */
Packit 6c4009
	jne L(4)				/* yes => we found word with NUL */
Packit 6c4009
Packit 6c4009
		movl $magic, %esi	/* load magic value */
Packit 6c4009
		xorl %edx, %ebx		/* clear words which are C */
Packit 6c4009
Packit 6c4009
					movl (%eax), %ecx
Packit 6c4009
		addl %ebx, %esi		/* (word+magic) */
Packit 6c4009
Packit 6c4009
					movl $magic, %edi
Packit 6c4009
		jnc L(5)		/* previous addl caused overflow? */
Packit 6c4009
Packit 6c4009
					movl %edi, %ebp
Packit 6c4009
		xorl %ebx, %esi		/* (word+magic)^word */
Packit 6c4009
Packit 6c4009
					addl %ecx, %ebp
Packit 6c4009
		orl $magic, %esi	/* ((word+magic)^word)|magic */
Packit 6c4009
Packit 6c4009
		addl $1, %esi		/* ((word+magic)^word)|magic==0xf..f?*/
Packit 6c4009
		jne L(5)		/* yes => we found word with C */
Packit 6c4009
Packit 6c4009
					xorl %ecx, %ebp
Packit 6c4009
					addl %ecx, %edi
Packit 6c4009
Packit 6c4009
					leal 4(%eax), %eax
Packit 6c4009
					jnc L(4)
Packit 6c4009
Packit 6c4009
						movl %ecx, %ebx
Packit 6c4009
					orl $magic, %ebp
Packit 6c4009
Packit 6c4009
					addl $1, %ebp
Packit 6c4009
					jne L(4)
Packit 6c4009
Packit 6c4009
						movl $magic, %esi
Packit 6c4009
						xorl %edx, %ebx
Packit 6c4009
Packit 6c4009
	movl (%eax), %ecx
Packit 6c4009
						addl %ebx, %esi
Packit 6c4009
Packit 6c4009
	movl $magic, %edi
Packit 6c4009
						jnc L(5)
Packit 6c4009
Packit 6c4009
	movl %edi, %ebp
Packit 6c4009
						xorl %ebx, %esi
Packit 6c4009
Packit 6c4009
	addl %ecx, %ebp
Packit 6c4009
						orl $magic, %esi
Packit 6c4009
Packit 6c4009
						addl $1, %esi
Packit 6c4009
						jne L(5)
Packit 6c4009
Packit 6c4009
	xorl %ecx, %ebp
Packit 6c4009
	addl %ecx, %edi
Packit 6c4009
Packit 6c4009
	leal 4(%eax), %eax
Packit 6c4009
	jnc L(4)
Packit 6c4009
Packit 6c4009
		movl %ecx, %ebx
Packit 6c4009
	orl $magic, %ebp
Packit 6c4009
Packit 6c4009
	addl $1, %ebp
Packit 6c4009
	jne L(4)
Packit 6c4009
Packit 6c4009
		movl $magic, %esi
Packit 6c4009
		xorl %edx, %ebx
Packit 6c4009
Packit 6c4009
					movl (%eax), %ecx
Packit 6c4009
		addl %ebx, %esi
Packit 6c4009
Packit 6c4009
					movl $magic, %edi
Packit 6c4009
		jnc L(5)
Packit 6c4009
Packit 6c4009
					movl %edi, %ebp
Packit 6c4009
		xorl %ebx, %esi
Packit 6c4009
Packit 6c4009
					addl %ecx, %ebp
Packit 6c4009
		orl $magic, %esi
Packit 6c4009
Packit 6c4009
		addl $1, %esi
Packit 6c4009
		jne L(5)
Packit 6c4009
Packit 6c4009
					xorl %ecx, %ebp
Packit 6c4009
					addl %ecx, %edi
Packit 6c4009
Packit 6c4009
					leal 4(%eax), %eax
Packit 6c4009
					jnc L(4)
Packit 6c4009
Packit 6c4009
						movl %ecx, %ebx
Packit 6c4009
					orl $magic, %ebp
Packit 6c4009
Packit 6c4009
					addl $1, %ebp
Packit 6c4009
					jne L(4)
Packit 6c4009
Packit 6c4009
						movl $magic, %esi
Packit 6c4009
						xorl %edx, %ebx
Packit 6c4009
Packit 6c4009
	movl (%eax), %ecx
Packit 6c4009
						addl %ebx, %esi
Packit 6c4009
Packit 6c4009
	movl $magic, %edi
Packit 6c4009
						jnc L(5)
Packit 6c4009
Packit 6c4009
	movl %edi, %ebp
Packit 6c4009
						xorl %ebx, %esi
Packit 6c4009
Packit 6c4009
	addl %ecx, %ebp
Packit 6c4009
						orl $magic, %esi
Packit 6c4009
Packit 6c4009
						addl $1, %esi
Packit 6c4009
Packit 6c4009
						je L(1)
Packit 6c4009
Packit 6c4009
	/* We know there is no NUL byte but a C byte in the word.
Packit 6c4009
	   %ebx contains NUL in this particular byte.  */
Packit 6c4009
L(5):	subl $4, %eax		/* adjust pointer */
Packit 6c4009
	testb %bl, %bl		/* first byte == C? */
Packit 6c4009
Packit 6c4009
	jz L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
	testb %bh, %bh		/* second byte == C? */
Packit 6c4009
Packit 6c4009
	jz L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	shrl $16, %ebx		/* make upper bytes accessible */
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	cmp $0, %bl		/* third byte == C */
Packit 6c4009
	je L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
L(out):	popl %ebp		/* restore saved registers */
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	cfi_restore (ebp)
Packit 6c4009
	popl %ebx
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	cfi_restore (ebx)
Packit 6c4009
Packit 6c4009
	popl %esi
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	cfi_restore (esi)
Packit 6c4009
	popl %edi
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	cfi_restore (edi)
Packit 6c4009
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	cfi_adjust_cfa_offset (16)
Packit 6c4009
	cfi_rel_offset (edi, 12)
Packit 6c4009
	cfi_rel_offset (esi, 8)
Packit 6c4009
	cfi_rel_offset (ebx, 4)
Packit 6c4009
	cfi_rel_offset (ebp, 0)
Packit 6c4009
	/* We know there is a NUL byte in the word.  But we have to test
Packit 6c4009
	   whether there is an C byte before it in the word.  */
Packit 6c4009
L(4):	subl $4, %eax		/* adjust pointer */
Packit 6c4009
	cmpb %dl, %cl		/* first byte == C? */
Packit 6c4009
Packit 6c4009
	je L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	cmpb $0, %cl		/* first byte == NUL? */
Packit 6c4009
	je L(3)			/* yes => return NULL */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	cmpb %dl, %ch		/* second byte == C? */
Packit 6c4009
	je L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	cmpb $0, %ch		/* second byte == NUL? */
Packit 6c4009
	je L(3)			/* yes => return NULL */
Packit 6c4009
Packit 6c4009
	shrl $16, %ecx		/* make upper bytes accessible */
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	cmpb %dl, %cl		/* third byte == C? */
Packit 6c4009
	je L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
	cmpb $0, %cl		/* third byte == NUL? */
Packit 6c4009
	je L(3)			/* yes => return NULL */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	/* The test four the fourth byte is necessary!  */
Packit 6c4009
	cmpb %dl, %ch		/* fourth byte == C? */
Packit 6c4009
	je L(out)		/* yes => return pointer */
Packit 6c4009
Packit 6c4009
L(3):	xorl %eax, %eax
Packit 6c4009
	jmp L(out)
Packit 6c4009
END (strchr)
Packit 6c4009
Packit 6c4009
#undef index
Packit 6c4009
weak_alias (strchr, index)
Packit 6c4009
libc_hidden_builtin_def (strchr)