Blame string/strstr.c

Packit 6c4009
/* Return the offset of one string within another.
Packit 6c4009
   Copyright (C) 1994-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#ifndef _LIBC
Packit 6c4009
# include <config.h>
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#include <string.h>
Packit 6c4009
Packit 6c4009
#define RETURN_TYPE char *
Packit 6c4009
#define AVAILABLE(h, h_l, j, n_l)			\
Packit Service 623b56
  (((j) + (n_l) <= (h_l)) \
Packit Service 623b56
   || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
Packit Service 623b56
       (j) + (n_l) <= (h_l)))
Packit 6c4009
#include "str-two-way.h"
Packit 6c4009
Packit 6c4009
#undef strstr
Packit 6c4009
Packit 6c4009
#ifndef STRSTR
Packit 6c4009
#define STRSTR strstr
Packit 6c4009
#endif
Packit 6c4009
Packit Service 998090
static inline char *
Packit Service 998090
strstr2 (const unsigned char *hs, const unsigned char *ne)
Packit Service 8f37d2
{
Packit Service 998090
  uint32_t h1 = (ne[0] << 16) | ne[1];
Packit Service 998090
  uint32_t h2 = 0;
Packit Service 998090
  for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
Packit Service 998090
      h2 = (h2 << 16) | c;
Packit Service 998090
  return h1 == h2 ? (char *)hs - 2 : NULL;
Packit Service 998090
}
Packit Service 5b66b0
Packit Service 998090
static inline char *
Packit Service 998090
strstr3 (const unsigned char *hs, const unsigned char *ne)
Packit Service 998090
{
Packit Service 998090
  uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
Packit Service 998090
  uint32_t h2 = 0;
Packit Service 998090
  for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
Packit Service 998090
      h2 = (h2 | c) << 8;
Packit Service 998090
  return h1 == h2 ? (char *)hs - 3 : NULL;
Packit Service 998090
}
Packit Service 5b66b0
Packit Service 998090
/* Hash character pairs so a small shift table can be used.  All bits of
Packit Service 998090
   p[0] are included, but not all bits from p[-1].  So if two equal hashes
Packit Service 998090
   match on p[-1], p[0] matches too.  Hash collisions are harmless and result
Packit Service 998090
   in smaller shifts.  */
Packit Service 998090
#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
Packit Service 998090
Packit Service 998090
/* Fast strstr algorithm with guaranteed linear-time performance.
Packit Service 998090
   Small needles up to size 3 use a dedicated linear search.  Longer needles
Packit Service 998090
   up to size 256 use a novel modified Horspool algorithm.  It hashes pairs
Packit Service 998090
   of characters to quickly skip past mismatches.  The main search loop only
Packit Service 998090
   exits if the last 2 characters match, avoiding unnecessary calls to memcmp
Packit Service 998090
   and allowing for a larger skip if there is no match.  A self-adapting
Packit Service 998090
   filtering check is used to quickly detect mismatches in long needles.
Packit Service 998090
   By limiting the needle length to 256, the shift table can be reduced to 8
Packit Service 998090
   bits per entry, lowering preprocessing overhead and minimizing cache effects.
Packit Service 998090
   The limit also implies worst-case performance is linear.
Packit Service 998090
   Needles larger than 256 characters use the linear-time Two-Way algorithm.  */
Packit Service 998090
char *
Packit Service 998090
STRSTR (const char *haystack, const char *needle)
Packit Service 998090
{
Packit Service 998090
  const unsigned char *hs = (const unsigned char *) haystack;
Packit Service 998090
  const unsigned char *ne = (const unsigned char *) needle;
Packit Service 998090
Packit Service 998090
  /* Handle short needle special cases first.  */
Packit Service 998090
  if (ne[0] == '\0')
Packit Service 998090
    return (char *)hs;
Packit Service 998090
  hs = (const unsigned char *)strchr ((const char*)hs, ne[0]);
Packit Service 998090
  if (hs == NULL || ne[1] == '\0')
Packit Service 998090
    return (char*)hs;
Packit Service 998090
  if (ne[2] == '\0')
Packit Service 998090
    return strstr2 (hs, ne);
Packit Service 998090
  if (ne[3] == '\0')
Packit Service 998090
    return strstr3 (hs, ne);
Packit Service 998090
Packit Service 998090
  /* Ensure haystack length is at least as long as needle length.
Packit Service 998090
     Since a match may occur early on in a huge haystack, use strnlen
Packit Service 5b66b0
     and read ahead a few cachelines for improved performance.  */
Packit Service 998090
  size_t ne_len = strlen ((const char*)ne);
Packit Service 998090
  size_t hs_len = __strnlen ((const char*)hs, ne_len | 512);
Packit Service 998090
  if (hs_len < ne_len)
Packit 6c4009
    return NULL;
Packit Service 5b66b0
Packit Service 998090
  /* Check whether we have a match.  This improves performance since we
Packit Service 998090
     avoid initialization overheads.  */
Packit Service 998090
  if (memcmp (hs, ne, ne_len) == 0)
Packit Service 998090
    return (char *) hs;
Packit Service 998090
Packit Service 998090
  /* Use Two-Way algorithm for very long needles.  */
Packit Service 998090
  if (__glibc_unlikely (ne_len > 256))
Packit Service 998090
    return two_way_long_needle (hs, hs_len, ne, ne_len);
Packit Service 998090
Packit Service 998090
  const unsigned char *end = hs + hs_len - ne_len;
Packit Service 998090
  uint8_t shift[256];
Packit Service 998090
  size_t tmp, shift1;
Packit Service 998090
  size_t m1 = ne_len - 1;
Packit Service 998090
  size_t offset = 0;
Packit Service 998090
Packit Service 998090
  /* Initialize bad character shift hash table.  */
Packit Service 998090
  memset (shift, 0, sizeof (shift));
Packit Service 998090
  for (int i = 1; i < m1; i++)
Packit Service 998090
    shift[hash2 (ne + i)] = i;
Packit Service 998090
  /* Shift1 is the amount we can skip after matching the hash of the
Packit Service 998090
     needle end but not the full needle.  */
Packit Service 998090
  shift1 = m1 - shift[hash2 (ne + m1)];
Packit Service 998090
  shift[hash2 (ne + m1)] = m1;
Packit Service 998090
Packit Service 998090
  while (1)
Packit Service 998090
    {
Packit Service 998090
      if (__glibc_unlikely (hs > end))
Packit Service 998090
	{
Packit Service 998090
	  end += __strnlen ((const char*)end + m1 + 1, 2048);
Packit Service 998090
	  if (hs > end)
Packit Service 998090
	    return NULL;
Packit Service 998090
	}
Packit Service 998090
Packit Service 998090
      /* Skip past character pairs not in the needle.  */
Packit Service 998090
      do
Packit Service 998090
	{
Packit Service 998090
	  hs += m1;
Packit Service 998090
	  tmp = shift[hash2 (hs)];
Packit Service 998090
	}
Packit Service 998090
      while (tmp == 0 && hs <= end);
Packit Service 998090
Packit Service 998090
      /* If the match is not at the end of the needle, shift to the end
Packit Service 998090
	 and continue until we match the hash of the needle end.  */
Packit Service 998090
      hs -= tmp;
Packit Service 998090
      if (tmp < m1)
Packit Service 998090
	continue;
Packit Service 998090
Packit Service 998090
      /* Hash of the last 2 characters matches.  If the needle is long,
Packit Service 998090
	 try to quickly filter out mismatches.  */
Packit Service 998090
      if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
Packit Service 998090
	{
Packit Service 998090
	  if (memcmp (hs, ne, m1) == 0)
Packit Service 998090
	    return (void *) hs;
Packit Service 998090
Packit Service 998090
	  /* Adjust filter offset when it doesn't find the mismatch.  */
Packit Service 998090
	  offset = (offset >= 8 ? offset : m1) - 8;
Packit Service 998090
	}
Packit Service 998090
Packit Service 998090
      /* Skip based on matching the hash of the needle end.  */
Packit Service 998090
      hs += shift1;
Packit Service 998090
    }
Packit 6c4009
}
Packit 6c4009
libc_hidden_builtin_def (strstr)