Blame string/strchr.c

Packit 6c4009
/* Copyright (C) 1991-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
Packit 6c4009
   with help from Dan Sahlin (dan@sics.se) and
Packit 6c4009
   bug fix and commentary by Jim Blandy (jimb@ai.mit.edu);
Packit 6c4009
   adaptation to strchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
Packit 6c4009
   and implemented by Roland McGrath (roland@ai.mit.edu).
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <string.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
Packit 6c4009
#undef strchr
Packit 6c4009
Packit 6c4009
#ifndef STRCHR
Packit 6c4009
# define STRCHR strchr
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Find the first occurrence of C in S.  */
Packit 6c4009
char *
Packit 6c4009
STRCHR (const char *s, int c_in)
Packit 6c4009
{
Packit 6c4009
  const unsigned char *char_ptr;
Packit 6c4009
  const unsigned long int *longword_ptr;
Packit 6c4009
  unsigned long int longword, magic_bits, charmask;
Packit 6c4009
  unsigned char c;
Packit 6c4009
Packit 6c4009
  c = (unsigned char) c_in;
Packit 6c4009
Packit 6c4009
  /* Handle the first few characters by reading one character at a time.
Packit 6c4009
     Do this until CHAR_PTR is aligned on a longword boundary.  */
Packit 6c4009
  for (char_ptr = (const unsigned char *) s;
Packit 6c4009
       ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
Packit 6c4009
       ++char_ptr)
Packit 6c4009
    if (*char_ptr == c)
Packit 6c4009
      return (void *) char_ptr;
Packit 6c4009
    else if (*char_ptr == '\0')
Packit 6c4009
      return NULL;
Packit 6c4009
Packit 6c4009
  /* All these elucidatory comments refer to 4-byte longwords,
Packit 6c4009
     but the theory applies equally well to 8-byte longwords.  */
Packit 6c4009
Packit 6c4009
  longword_ptr = (unsigned long int *) char_ptr;
Packit 6c4009
Packit 6c4009
  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
Packit 6c4009
     the "holes."  Note that there is a hole just to the left of
Packit 6c4009
     each byte, with an extra at the end:
Packit 6c4009
Packit 6c4009
     bits:  01111110 11111110 11111110 11111111
Packit 6c4009
     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
Packit 6c4009
Packit 6c4009
     The 1-bits make sure that carries propagate to the next 0-bit.
Packit 6c4009
     The 0-bits provide holes for carries to fall into.  */
Packit 6c4009
  magic_bits = -1;
Packit 6c4009
  magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1;
Packit 6c4009
Packit 6c4009
  /* Set up a longword, each of whose bytes is C.  */
Packit 6c4009
  charmask = c | (c << 8);
Packit 6c4009
  charmask |= charmask << 16;
Packit 6c4009
  if (sizeof (longword) > 4)
Packit 6c4009
    /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
Packit 6c4009
    charmask |= (charmask << 16) << 16;
Packit 6c4009
  if (sizeof (longword) > 8)
Packit 6c4009
    abort ();
Packit 6c4009
Packit 6c4009
  /* Instead of the traditional loop which tests each character,
Packit 6c4009
     we will test a longword at a time.  The tricky part is testing
Packit 6c4009
     if *any of the four* bytes in the longword in question are zero.  */
Packit 6c4009
  for (;;)
Packit 6c4009
    {
Packit 6c4009
      /* We tentatively exit the loop if adding MAGIC_BITS to
Packit 6c4009
	 LONGWORD fails to change any of the hole bits of LONGWORD.
Packit 6c4009
Packit 6c4009
	 1) Is this safe?  Will it catch all the zero bytes?
Packit 6c4009
	 Suppose there is a byte with all zeros.  Any carry bits
Packit 6c4009
	 propagating from its left will fall into the hole at its
Packit 6c4009
	 least significant bit and stop.  Since there will be no
Packit 6c4009
	 carry from its most significant bit, the LSB of the
Packit 6c4009
	 byte to the left will be unchanged, and the zero will be
Packit 6c4009
	 detected.
Packit 6c4009
Packit 6c4009
	 2) Is this worthwhile?  Will it ignore everything except
Packit 6c4009
	 zero bytes?  Suppose every byte of LONGWORD has a bit set
Packit 6c4009
	 somewhere.  There will be a carry into bit 8.  If bit 8
Packit 6c4009
	 is set, this will carry into bit 16.  If bit 8 is clear,
Packit 6c4009
	 one of bits 9-15 must be set, so there will be a carry
Packit 6c4009
	 into bit 16.  Similarly, there will be a carry into bit
Packit 6c4009
	 24.  If one of bits 24-30 is set, there will be a carry
Packit 6c4009
	 into bit 31, so all of the hole bits will be changed.
Packit 6c4009
Packit 6c4009
	 The one misfire occurs when bits 24-30 are clear and bit
Packit 6c4009
	 31 is set; in this case, the hole at bit 31 is not
Packit 6c4009
	 changed.  If we had access to the processor carry flag,
Packit 6c4009
	 we could close this loophole by putting the fourth hole
Packit 6c4009
	 at bit 32!
Packit 6c4009
Packit 6c4009
	 So it ignores everything except 128's, when they're aligned
Packit 6c4009
	 properly.
Packit 6c4009
Packit 6c4009
	 3) But wait!  Aren't we looking for C as well as zero?
Packit 6c4009
	 Good point.  So what we do is XOR LONGWORD with a longword,
Packit 6c4009
	 each of whose bytes is C.  This turns each byte that is C
Packit 6c4009
	 into a zero.  */
Packit 6c4009
Packit 6c4009
      longword = *longword_ptr++;
Packit 6c4009
Packit 6c4009
      /* Add MAGIC_BITS to LONGWORD.  */
Packit 6c4009
      if ((((longword + magic_bits)
Packit 6c4009
Packit 6c4009
	    /* Set those bits that were unchanged by the addition.  */
Packit 6c4009
	    ^ ~longword)
Packit 6c4009
Packit 6c4009
	   /* Look at only the hole bits.  If any of the hole bits
Packit 6c4009
	      are unchanged, most likely one of the bytes was a
Packit 6c4009
	      zero.  */
Packit 6c4009
	   & ~magic_bits) != 0 ||
Packit 6c4009
Packit 6c4009
	  /* That caught zeroes.  Now test for C.  */
Packit 6c4009
	  ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
Packit 6c4009
	   & ~magic_bits) != 0)
Packit 6c4009
	{
Packit 6c4009
	  /* Which of the bytes was C or zero?
Packit 6c4009
	     If none of them were, it was a misfire; continue the search.  */
Packit 6c4009
Packit 6c4009
	  const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
Packit 6c4009
Packit 6c4009
	  if (*cp == c)
Packit 6c4009
	    return (char *) cp;
Packit 6c4009
	  else if (*cp == '\0')
Packit 6c4009
	    return NULL;
Packit 6c4009
	  if (*++cp == c)
Packit 6c4009
	    return (char *) cp;
Packit 6c4009
	  else if (*cp == '\0')
Packit 6c4009
	    return NULL;
Packit 6c4009
	  if (*++cp == c)
Packit 6c4009
	    return (char *) cp;
Packit 6c4009
	  else if (*cp == '\0')
Packit 6c4009
	    return NULL;
Packit 6c4009
	  if (*++cp == c)
Packit 6c4009
	    return (char *) cp;
Packit 6c4009
	  else if (*cp == '\0')
Packit 6c4009
	    return NULL;
Packit 6c4009
	  if (sizeof (longword) > 4)
Packit 6c4009
	    {
Packit 6c4009
	      if (*++cp == c)
Packit 6c4009
		return (char *) cp;
Packit 6c4009
	      else if (*cp == '\0')
Packit 6c4009
		return NULL;
Packit 6c4009
	      if (*++cp == c)
Packit 6c4009
		return (char *) cp;
Packit 6c4009
	      else if (*cp == '\0')
Packit 6c4009
		return NULL;
Packit 6c4009
	      if (*++cp == c)
Packit 6c4009
		return (char *) cp;
Packit 6c4009
	      else if (*cp == '\0')
Packit 6c4009
		return NULL;
Packit 6c4009
	      if (*++cp == c)
Packit 6c4009
		return (char *) cp;
Packit 6c4009
	      else if (*cp == '\0')
Packit 6c4009
		return NULL;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return NULL;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#ifdef weak_alias
Packit 6c4009
# undef index
Packit 6c4009
weak_alias (strchr, index)
Packit 6c4009
#endif
Packit 6c4009
libc_hidden_builtin_def (strchr)