Blame sysdeps/i386/i586/strlen.S

Packit 6c4009
/* strlen -- Compute length of NUL terminated string.
Packit 6c4009
   Highly optimized version for ix86, x>=5.
Packit 6c4009
   Copyright (C) 1995-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
/* This version is especially optimized for the i586 (and following?)
Packit 6c4009
   processors.  This is mainly done by using the two pipelines.  The
Packit 6c4009
   version optimized for i486 is weak in this aspect because to get
Packit 6c4009
   as much parallelism we have to execute some *more* instructions.
Packit 6c4009
Packit 6c4009
   The code below is structured to reflect the pairing of the instructions
Packit 6c4009
   as *I think* it is.  I have no processor data book to verify this.
Packit 6c4009
   If you find something you think is incorrect let me know.  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* The magic value which is used throughout in the whole code.  */
Packit 6c4009
#define magic 0xfefefeff
Packit 6c4009
Packit 6c4009
#define PARMS	4		/* no space for saved regs */
Packit 6c4009
#define STR	PARMS
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (strlen)
Packit 6c4009
Packit 6c4009
	movl STR(%esp), %eax
Packit 6c4009
	movl $3, %edx		/* load mask (= 3) */
Packit 6c4009
Packit 6c4009
	andl %eax, %edx		/* separate last two bits of address */
Packit 6c4009
Packit 6c4009
	jz L(1)			/* aligned => start loop */
Packit 6c4009
	jp L(0)			/* exactly two bits set */
Packit 6c4009
Packit 6c4009
	cmpb %dh, (%eax)	/* is byte NUL? */
Packit 6c4009
	je L(2)			/* yes => return */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
	cmpb %dh, (%eax)	/* is byte NUL? */
Packit 6c4009
Packit 6c4009
	je L(2)			/* yes => return */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
	xorl $2, %edx
Packit 6c4009
Packit 6c4009
	jz L(1)
Packit 6c4009
Packit 6c4009
L(0):	cmpb %dh, (%eax)	/* is byte NUL? */
Packit 6c4009
	je L(2)			/* yes => return */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
	xorl %edx, %edx		/* We need %edx == 0 for later */
Packit 6c4009
Packit 6c4009
      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
Packit 6c4009
	 change any of the hole bits of LONGWORD.
Packit 6c4009
Packit 6c4009
	 1) Is this safe?  Will it catch all the zero bytes?
Packit 6c4009
	 Suppose there is a byte with all zeros.  Any carry bits
Packit 6c4009
	 propagating from its left will fall into the hole at its
Packit 6c4009
	 least significant bit and stop.  Since there will be no
Packit 6c4009
	 carry from its most significant bit, the LSB of the
Packit 6c4009
	 byte to the left will be unchanged, and the zero will be
Packit 6c4009
	 detected.
Packit 6c4009
Packit 6c4009
	 2) Is this worthwhile?  Will it ignore everything except
Packit 6c4009
	 zero bytes?  Suppose every byte of LONGWORD has a bit set
Packit 6c4009
	 somewhere.  There will be a carry into bit 8.	If bit 8
Packit 6c4009
	 is set, this will carry into bit 16.  If bit 8 is clear,
Packit 6c4009
	 one of bits 9-15 must be set, so there will be a carry
Packit 6c4009
	 into bit 16.  Similarly, there will be a carry into bit
Packit 6c4009
	 24.  If one of bits 24-31 is set, there will be a carry
Packit 6c4009
	 into bit 32 (=carry flag), so all of the hole bits will
Packit 6c4009
	 be changed.
Packit 6c4009
Packit 6c4009
	 Note: %edx == 0 in any case here.  */
Packit 6c4009
Packit 6c4009
L(1):
Packit 6c4009
	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
Packit 6c4009
	addl $4, %eax		/* adjust pointer for *next* word */
Packit 6c4009
Packit 6c4009
	subl %ecx, %edx		/* first step to negate word */
Packit 6c4009
	addl $magic, %ecx	/* add magic word */
Packit 6c4009
Packit 6c4009
	decl %edx		/* complete negation of word */
Packit 6c4009
	jnc L(3)		/* previous addl caused overflow? */
Packit 6c4009
Packit 6c4009
	xorl %ecx, %edx		/* (word+magic)^word */
Packit 6c4009
Packit 6c4009
	andl $~magic, %edx	/* any of the carry flags set? */
Packit 6c4009
Packit 6c4009
	jne L(3)		/* yes => determine byte */
Packit 6c4009
Packit 6c4009
Packit 6c4009
	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
Packit 6c4009
	addl $4, %eax		/* adjust pointer for *next* word */
Packit 6c4009
Packit 6c4009
	subl %ecx, %edx		/* first step to negate word */
Packit 6c4009
	addl $magic, %ecx	/* add magic word */
Packit 6c4009
Packit 6c4009
	decl %edx		/* complete negation of word */
Packit 6c4009
	jnc L(3)		/* previous addl caused overflow? */
Packit 6c4009
Packit 6c4009
	xorl %ecx, %edx		/* (word+magic)^word */
Packit 6c4009
Packit 6c4009
	andl $~magic, %edx	/* any of the carry flags set? */
Packit 6c4009
Packit 6c4009
	jne L(3)		/* yes => determine byte */
Packit 6c4009
Packit 6c4009
Packit 6c4009
	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
Packit 6c4009
	addl $4, %eax		/* adjust pointer for *next* word */
Packit 6c4009
Packit 6c4009
	subl %ecx, %edx		/* first step to negate word */
Packit 6c4009
	addl $magic, %ecx	/* add magic word */
Packit 6c4009
Packit 6c4009
	decl %edx		/* complete negation of word */
Packit 6c4009
	jnc L(3)		/* previous addl caused overflow? */
Packit 6c4009
Packit 6c4009
	xorl %ecx, %edx		/* (word+magic)^word */
Packit 6c4009
Packit 6c4009
	andl $~magic, %edx	/* any of the carry flags set? */
Packit 6c4009
Packit 6c4009
	jne L(3)		/* yes => determine byte */
Packit 6c4009
Packit 6c4009
Packit 6c4009
	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
Packit 6c4009
	addl $4, %eax		/* adjust pointer for *next* word */
Packit 6c4009
Packit 6c4009
	subl %ecx, %edx		/* first step to negate word */
Packit 6c4009
	addl $magic, %ecx	/* add magic word */
Packit 6c4009
Packit 6c4009
	decl %edx		/* complete negation of word */
Packit 6c4009
	jnc L(3)		/* previous addl caused overflow? */
Packit 6c4009
Packit 6c4009
	xorl %ecx, %edx		/* (word+magic)^word */
Packit 6c4009
Packit 6c4009
	andl $~magic, %edx	/* any of the carry flags set? */
Packit 6c4009
Packit 6c4009
	je L(1)			/* no => start loop again */
Packit 6c4009
Packit 6c4009
Packit 6c4009
L(3):	subl $4, %eax		/* correct too early pointer increment */
Packit 6c4009
	subl $magic, %ecx
Packit 6c4009
Packit 6c4009
	cmpb $0, %cl		/* lowest byte NUL? */
Packit 6c4009
	jz L(2)			/* yes => return */
Packit 6c4009
Packit 6c4009
	inc %eax		/* increment pointer */
Packit 6c4009
	testb %ch, %ch		/* second byte NUL? */
Packit 6c4009
Packit 6c4009
	jz L(2)			/* yes => return */
Packit 6c4009
Packit 6c4009
	shrl $16, %ecx		/* make upper bytes accessible */
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
	cmpb $0, %cl		/* is third byte NUL? */
Packit 6c4009
	jz L(2)			/* yes => return */
Packit 6c4009
Packit 6c4009
	incl %eax		/* increment pointer */
Packit 6c4009
Packit 6c4009
L(2):	subl STR(%esp), %eax	/* now compute the length as difference
Packit 6c4009
				   between start and terminating NUL
Packit 6c4009
				   character */
Packit 6c4009
	ret
Packit 6c4009
END (strlen)
Packit 6c4009
libc_hidden_builtin_def (strlen)