Blame string/strnlen.c

Packit 6c4009
/* Find the length of STRING, but scan at most MAXLEN characters.
Packit 6c4009
   Copyright (C) 1991-2018 Free Software Foundation, Inc.
Packit 6c4009
   Contributed by Jakub Jelinek <jakub@redhat.com>.
Packit 6c4009
Packit 6c4009
   Based on strlen written by Torbjorn Granlund (tege@sics.se),
Packit 6c4009
   with help from Dan Sahlin (dan@sics.se);
Packit 6c4009
   commentary by Jim Blandy (jimb@ai.mit.edu).
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public License as
Packit 6c4009
   published by the Free Software Foundation; either version 2.1 of the
Packit 6c4009
   License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; see the file COPYING.LIB.  If
Packit 6c4009
   not, see <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <string.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
Packit 6c4009
/* Find the length of S, but scan at most MAXLEN characters.  If no
Packit 6c4009
   '\0' terminator is found in that many characters, return MAXLEN.  */
Packit 6c4009
Packit 6c4009
#ifdef STRNLEN
Packit 6c4009
# define __strnlen STRNLEN
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
size_t
Packit 6c4009
__strnlen (const char *str, size_t maxlen)
Packit 6c4009
{
Packit 6c4009
  const char *char_ptr, *end_ptr = str + maxlen;
Packit 6c4009
  const unsigned long int *longword_ptr;
Packit 6c4009
  unsigned long int longword, himagic, lomagic;
Packit 6c4009
Packit 6c4009
  if (maxlen == 0)
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  if (__glibc_unlikely (end_ptr < str))
Packit 6c4009
    end_ptr = (const char *) ~0UL;
Packit 6c4009
Packit 6c4009
  /* Handle the first few characters by reading one character at a time.
Packit 6c4009
     Do this until CHAR_PTR is aligned on a longword boundary.  */
Packit 6c4009
  for (char_ptr = str; ((unsigned long int) char_ptr
Packit 6c4009
			& (sizeof (longword) - 1)) != 0;
Packit 6c4009
       ++char_ptr)
Packit 6c4009
    if (*char_ptr == '\0')
Packit 6c4009
      {
Packit 6c4009
	if (char_ptr > end_ptr)
Packit 6c4009
	  char_ptr = end_ptr;
Packit 6c4009
	return char_ptr - str;
Packit 6c4009
      }
Packit 6c4009
Packit 6c4009
  /* All these elucidatory comments refer to 4-byte longwords,
Packit 6c4009
     but the theory applies equally well to 8-byte longwords.  */
Packit 6c4009
Packit 6c4009
  longword_ptr = (unsigned long int *) char_ptr;
Packit 6c4009
Packit 6c4009
  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
Packit 6c4009
     the "holes."  Note that there is a hole just to the left of
Packit 6c4009
     each byte, with an extra at the end:
Packit 6c4009
Packit 6c4009
     bits:  01111110 11111110 11111110 11111111
Packit 6c4009
     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
Packit 6c4009
Packit 6c4009
     The 1-bits make sure that carries propagate to the next 0-bit.
Packit 6c4009
     The 0-bits provide holes for carries to fall into.  */
Packit 6c4009
  himagic = 0x80808080L;
Packit 6c4009
  lomagic = 0x01010101L;
Packit 6c4009
  if (sizeof (longword) > 4)
Packit 6c4009
    {
Packit 6c4009
      /* 64-bit version of the magic.  */
Packit 6c4009
      /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
Packit 6c4009
      himagic = ((himagic << 16) << 16) | himagic;
Packit 6c4009
      lomagic = ((lomagic << 16) << 16) | lomagic;
Packit 6c4009
    }
Packit 6c4009
  if (sizeof (longword) > 8)
Packit 6c4009
    abort ();
Packit 6c4009
Packit 6c4009
  /* Instead of the traditional loop which tests each character,
Packit 6c4009
     we will test a longword at a time.  The tricky part is testing
Packit 6c4009
     if *any of the four* bytes in the longword in question are zero.  */
Packit 6c4009
  while (longword_ptr < (unsigned long int *) end_ptr)
Packit 6c4009
    {
Packit 6c4009
      /* We tentatively exit the loop if adding MAGIC_BITS to
Packit 6c4009
	 LONGWORD fails to change any of the hole bits of LONGWORD.
Packit 6c4009
Packit 6c4009
	 1) Is this safe?  Will it catch all the zero bytes?
Packit 6c4009
	 Suppose there is a byte with all zeros.  Any carry bits
Packit 6c4009
	 propagating from its left will fall into the hole at its
Packit 6c4009
	 least significant bit and stop.  Since there will be no
Packit 6c4009
	 carry from its most significant bit, the LSB of the
Packit 6c4009
	 byte to the left will be unchanged, and the zero will be
Packit 6c4009
	 detected.
Packit 6c4009
Packit 6c4009
	 2) Is this worthwhile?  Will it ignore everything except
Packit 6c4009
	 zero bytes?  Suppose every byte of LONGWORD has a bit set
Packit 6c4009
	 somewhere.  There will be a carry into bit 8.  If bit 8
Packit 6c4009
	 is set, this will carry into bit 16.  If bit 8 is clear,
Packit 6c4009
	 one of bits 9-15 must be set, so there will be a carry
Packit 6c4009
	 into bit 16.  Similarly, there will be a carry into bit
Packit 6c4009
	 24.  If one of bits 24-30 is set, there will be a carry
Packit 6c4009
	 into bit 31, so all of the hole bits will be changed.
Packit 6c4009
Packit 6c4009
	 The one misfire occurs when bits 24-30 are clear and bit
Packit 6c4009
	 31 is set; in this case, the hole at bit 31 is not
Packit 6c4009
	 changed.  If we had access to the processor carry flag,
Packit 6c4009
	 we could close this loophole by putting the fourth hole
Packit 6c4009
	 at bit 32!
Packit 6c4009
Packit 6c4009
	 So it ignores everything except 128's, when they're aligned
Packit 6c4009
	 properly.  */
Packit 6c4009
Packit 6c4009
      longword = *longword_ptr++;
Packit 6c4009
Packit 6c4009
      if ((longword - lomagic) & himagic)
Packit 6c4009
	{
Packit 6c4009
	  /* Which of the bytes was the zero?  If none of them were, it was
Packit 6c4009
	     a misfire; continue the search.  */
Packit 6c4009
Packit 6c4009
	  const char *cp = (const char *) (longword_ptr - 1);
Packit 6c4009
Packit 6c4009
	  char_ptr = cp;
Packit 6c4009
	  if (cp[0] == 0)
Packit 6c4009
	    break;
Packit 6c4009
	  char_ptr = cp + 1;
Packit 6c4009
	  if (cp[1] == 0)
Packit 6c4009
	    break;
Packit 6c4009
	  char_ptr = cp + 2;
Packit 6c4009
	  if (cp[2] == 0)
Packit 6c4009
	    break;
Packit 6c4009
	  char_ptr = cp + 3;
Packit 6c4009
	  if (cp[3] == 0)
Packit 6c4009
	    break;
Packit 6c4009
	  if (sizeof (longword) > 4)
Packit 6c4009
	    {
Packit 6c4009
	      char_ptr = cp + 4;
Packit 6c4009
	      if (cp[4] == 0)
Packit 6c4009
		break;
Packit 6c4009
	      char_ptr = cp + 5;
Packit 6c4009
	      if (cp[5] == 0)
Packit 6c4009
		break;
Packit 6c4009
	      char_ptr = cp + 6;
Packit 6c4009
	      if (cp[6] == 0)
Packit 6c4009
		break;
Packit 6c4009
	      char_ptr = cp + 7;
Packit 6c4009
	      if (cp[7] == 0)
Packit 6c4009
		break;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      char_ptr = end_ptr;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (char_ptr > end_ptr)
Packit 6c4009
    char_ptr = end_ptr;
Packit 6c4009
  return char_ptr - str;
Packit 6c4009
}
Packit 6c4009
#ifndef STRNLEN
Packit 6c4009
libc_hidden_def (__strnlen)
Packit 6c4009
weak_alias (__strnlen, strnlen)
Packit 6c4009
#endif
Packit 6c4009
libc_hidden_def (strnlen)