Blame string/strstr.c

Packit 6c4009
/* Return the offset of one string within another.
Packit 6c4009
   Copyright (C) 1994-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#ifndef _LIBC
Packit 6c4009
# include <config.h>
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#include <string.h>
Packit 6c4009
Packit 6c4009
#define RETURN_TYPE char *
Packit 6c4009
#define AVAILABLE(h, h_l, j, n_l)			\
Packit Bot a01981
  (((j) + (n_l) <= (h_l)) \
Packit Bot a01981
   || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
Packit Bot a01981
       (j) + (n_l) <= (h_l)))
Packit 6c4009
#include "str-two-way.h"
Packit 6c4009
Packit 6c4009
#undef strstr
Packit 6c4009
Packit 6c4009
#ifndef STRSTR
Packit 6c4009
#define STRSTR strstr
Packit 6c4009
#endif
Packit 6c4009
Packit Bot 247c9b
static inline char *
Packit Bot 247c9b
strstr2 (const unsigned char *hs, const unsigned char *ne)
Packit Service 8828c8
{
Packit Bot 247c9b
  uint32_t h1 = (ne[0] << 16) | ne[1];
Packit Bot 247c9b
  uint32_t h2 = 0;
Packit Bot 247c9b
  for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
Packit Bot 247c9b
      h2 = (h2 << 16) | c;
Packit Bot 247c9b
  return h1 == h2 ? (char *)hs - 2 : NULL;
Packit Bot 247c9b
}
Packit Bot e50fe1
Packit Bot 247c9b
static inline char *
Packit Bot 247c9b
strstr3 (const unsigned char *hs, const unsigned char *ne)
Packit Bot 247c9b
{
Packit Bot 247c9b
  uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
Packit Bot 247c9b
  uint32_t h2 = 0;
Packit Bot 247c9b
  for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
Packit Bot 247c9b
      h2 = (h2 | c) << 8;
Packit Bot 247c9b
  return h1 == h2 ? (char *)hs - 3 : NULL;
Packit Bot 247c9b
}
Packit Bot e50fe1
Packit Bot 247c9b
/* Hash character pairs so a small shift table can be used.  All bits of
Packit Bot 247c9b
   p[0] are included, but not all bits from p[-1].  So if two equal hashes
Packit Bot 247c9b
   match on p[-1], p[0] matches too.  Hash collisions are harmless and result
Packit Bot 247c9b
   in smaller shifts.  */
Packit Bot 247c9b
#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
Packit Bot 247c9b
Packit Bot 247c9b
/* Fast strstr algorithm with guaranteed linear-time performance.
Packit Bot 247c9b
   Small needles up to size 3 use a dedicated linear search.  Longer needles
Packit Bot 247c9b
   up to size 256 use a novel modified Horspool algorithm.  It hashes pairs
Packit Bot 247c9b
   of characters to quickly skip past mismatches.  The main search loop only
Packit Bot 247c9b
   exits if the last 2 characters match, avoiding unnecessary calls to memcmp
Packit Bot 247c9b
   and allowing for a larger skip if there is no match.  A self-adapting
Packit Bot 247c9b
   filtering check is used to quickly detect mismatches in long needles.
Packit Bot 247c9b
   By limiting the needle length to 256, the shift table can be reduced to 8
Packit Bot 247c9b
   bits per entry, lowering preprocessing overhead and minimizing cache effects.
Packit Bot 247c9b
   The limit also implies worst-case performance is linear.
Packit Bot 247c9b
   Needles larger than 256 characters use the linear-time Two-Way algorithm.  */
Packit Bot 247c9b
char *
Packit Bot 247c9b
STRSTR (const char *haystack, const char *needle)
Packit Bot 247c9b
{
Packit Bot 247c9b
  const unsigned char *hs = (const unsigned char *) haystack;
Packit Bot 247c9b
  const unsigned char *ne = (const unsigned char *) needle;
Packit Bot 247c9b
Packit Bot 247c9b
  /* Handle short needle special cases first.  */
Packit Bot 247c9b
  if (ne[0] == '\0')
Packit Bot 247c9b
    return (char *)hs;
Packit Bot 247c9b
  hs = (const unsigned char *)strchr ((const char*)hs, ne[0]);
Packit Bot 247c9b
  if (hs == NULL || ne[1] == '\0')
Packit Bot 247c9b
    return (char*)hs;
Packit Bot 247c9b
  if (ne[2] == '\0')
Packit Bot 247c9b
    return strstr2 (hs, ne);
Packit Bot 247c9b
  if (ne[3] == '\0')
Packit Bot 247c9b
    return strstr3 (hs, ne);
Packit Bot 247c9b
Packit Bot 247c9b
  /* Ensure haystack length is at least as long as needle length.
Packit Bot 247c9b
     Since a match may occur early on in a huge haystack, use strnlen
Packit Bot e50fe1
     and read ahead a few cachelines for improved performance.  */
Packit Bot 247c9b
  size_t ne_len = strlen ((const char*)ne);
Packit Bot 247c9b
  size_t hs_len = __strnlen ((const char*)hs, ne_len | 512);
Packit Bot 247c9b
  if (hs_len < ne_len)
Packit 6c4009
    return NULL;
Packit Bot e50fe1
Packit Bot 247c9b
  /* Check whether we have a match.  This improves performance since we
Packit Bot 247c9b
     avoid initialization overheads.  */
Packit Bot 247c9b
  if (memcmp (hs, ne, ne_len) == 0)
Packit Bot 247c9b
    return (char *) hs;
Packit Bot 247c9b
Packit Bot 247c9b
  /* Use Two-Way algorithm for very long needles.  */
Packit Bot 247c9b
  if (__glibc_unlikely (ne_len > 256))
Packit Bot 247c9b
    return two_way_long_needle (hs, hs_len, ne, ne_len);
Packit Bot 247c9b
Packit Bot 247c9b
  const unsigned char *end = hs + hs_len - ne_len;
Packit Bot 247c9b
  uint8_t shift[256];
Packit Bot 247c9b
  size_t tmp, shift1;
Packit Bot 247c9b
  size_t m1 = ne_len - 1;
Packit Bot 247c9b
  size_t offset = 0;
Packit Bot 247c9b
Packit Bot 247c9b
  /* Initialize bad character shift hash table.  */
Packit Bot 247c9b
  memset (shift, 0, sizeof (shift));
Packit Bot 247c9b
  for (int i = 1; i < m1; i++)
Packit Bot 247c9b
    shift[hash2 (ne + i)] = i;
Packit Bot 247c9b
  /* Shift1 is the amount we can skip after matching the hash of the
Packit Bot 247c9b
     needle end but not the full needle.  */
Packit Bot 247c9b
  shift1 = m1 - shift[hash2 (ne + m1)];
Packit Bot 247c9b
  shift[hash2 (ne + m1)] = m1;
Packit Bot 247c9b
Packit Bot 247c9b
  while (1)
Packit Bot 247c9b
    {
Packit Bot 247c9b
      if (__glibc_unlikely (hs > end))
Packit Bot 247c9b
	{
Packit Bot 247c9b
	  end += __strnlen ((const char*)end + m1 + 1, 2048);
Packit Bot 247c9b
	  if (hs > end)
Packit Bot 247c9b
	    return NULL;
Packit Bot 247c9b
	}
Packit Bot 247c9b
Packit Bot 247c9b
      /* Skip past character pairs not in the needle.  */
Packit Bot 247c9b
      do
Packit Bot 247c9b
	{
Packit Bot 247c9b
	  hs += m1;
Packit Bot 247c9b
	  tmp = shift[hash2 (hs)];
Packit Bot 247c9b
	}
Packit Bot 247c9b
      while (tmp == 0 && hs <= end);
Packit Bot 247c9b
Packit Bot 247c9b
      /* If the match is not at the end of the needle, shift to the end
Packit Bot 247c9b
	 and continue until we match the hash of the needle end.  */
Packit Bot 247c9b
      hs -= tmp;
Packit Bot 247c9b
      if (tmp < m1)
Packit Bot 247c9b
	continue;
Packit Bot 247c9b
Packit Bot 247c9b
      /* Hash of the last 2 characters matches.  If the needle is long,
Packit Bot 247c9b
	 try to quickly filter out mismatches.  */
Packit Bot 247c9b
      if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
Packit Bot 247c9b
	{
Packit Bot 247c9b
	  if (memcmp (hs, ne, m1) == 0)
Packit Bot 247c9b
	    return (void *) hs;
Packit Bot 247c9b
Packit Bot 247c9b
	  /* Adjust filter offset when it doesn't find the mismatch.  */
Packit Bot 247c9b
	  offset = (offset >= 8 ? offset : m1) - 8;
Packit Bot 247c9b
	}
Packit Bot 247c9b
Packit Bot 247c9b
      /* Skip based on matching the hash of the needle end.  */
Packit Bot 247c9b
      hs += shift1;
Packit Bot 247c9b
    }
Packit 6c4009
}
Packit 6c4009
libc_hidden_builtin_def (strstr)