Blame string/strstr.c

Packit 6c4009
/* Return the offset of one string within another.
Packit 6c4009
   Copyright (C) 1994-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#ifndef _LIBC
Packit 6c4009
# include <config.h>
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#include <string.h>
Packit 6c4009
Packit 6c4009
#define RETURN_TYPE char *
Packit 6c4009
#define AVAILABLE(h, h_l, j, n_l)			\
Packit Service 7151ee
  (((j) + (n_l) <= (h_l)) \
Packit Service 7151ee
   || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
Packit Service 7151ee
       (j) + (n_l) <= (h_l)))
Packit 6c4009
#include "str-two-way.h"
Packit 6c4009
Packit 6c4009
#undef strstr
Packit 6c4009
Packit 6c4009
#ifndef STRSTR
Packit 6c4009
#define STRSTR strstr
Packit 6c4009
#endif
Packit 6c4009
Packit Service 0a8c19
static inline char *
Packit Service 0a8c19
strstr2 (const unsigned char *hs, const unsigned char *ne)
Packit Service 1a92a4
{
Packit Service 0a8c19
  uint32_t h1 = (ne[0] << 16) | ne[1];
Packit Service 0a8c19
  uint32_t h2 = 0;
Packit Service 0a8c19
  for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
Packit Service 0a8c19
      h2 = (h2 << 16) | c;
Packit Service 0a8c19
  return h1 == h2 ? (char *)hs - 2 : NULL;
Packit Service 0a8c19
}
Packit Service b2d53b
Packit Service 0a8c19
static inline char *
Packit Service 0a8c19
strstr3 (const unsigned char *hs, const unsigned char *ne)
Packit Service 0a8c19
{
Packit Service 0a8c19
  uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
Packit Service 0a8c19
  uint32_t h2 = 0;
Packit Service 0a8c19
  for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
Packit Service 0a8c19
      h2 = (h2 | c) << 8;
Packit Service 0a8c19
  return h1 == h2 ? (char *)hs - 3 : NULL;
Packit Service 0a8c19
}
Packit Service b2d53b
Packit Service 0a8c19
/* Hash character pairs so a small shift table can be used.  All bits of
Packit Service 0a8c19
   p[0] are included, but not all bits from p[-1].  So if two equal hashes
Packit Service 0a8c19
   match on p[-1], p[0] matches too.  Hash collisions are harmless and result
Packit Service 0a8c19
   in smaller shifts.  */
Packit Service 0a8c19
#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
Packit Service 0a8c19
Packit Service 0a8c19
/* Fast strstr algorithm with guaranteed linear-time performance.
Packit Service 0a8c19
   Small needles up to size 3 use a dedicated linear search.  Longer needles
Packit Service 0a8c19
   up to size 256 use a novel modified Horspool algorithm.  It hashes pairs
Packit Service 0a8c19
   of characters to quickly skip past mismatches.  The main search loop only
Packit Service 0a8c19
   exits if the last 2 characters match, avoiding unnecessary calls to memcmp
Packit Service 0a8c19
   and allowing for a larger skip if there is no match.  A self-adapting
Packit Service 0a8c19
   filtering check is used to quickly detect mismatches in long needles.
Packit Service 0a8c19
   By limiting the needle length to 256, the shift table can be reduced to 8
Packit Service 0a8c19
   bits per entry, lowering preprocessing overhead and minimizing cache effects.
Packit Service 0a8c19
   The limit also implies worst-case performance is linear.
Packit Service 0a8c19
   Needles larger than 256 characters use the linear-time Two-Way algorithm.  */
Packit Service 0a8c19
char *
Packit Service 0a8c19
STRSTR (const char *haystack, const char *needle)
Packit Service 0a8c19
{
Packit Service 0a8c19
  const unsigned char *hs = (const unsigned char *) haystack;
Packit Service 0a8c19
  const unsigned char *ne = (const unsigned char *) needle;
Packit Service 0a8c19
Packit Service 0a8c19
  /* Handle short needle special cases first.  */
Packit Service 0a8c19
  if (ne[0] == '\0')
Packit Service 0a8c19
    return (char *)hs;
Packit Service 0a8c19
  hs = (const unsigned char *)strchr ((const char*)hs, ne[0]);
Packit Service 0a8c19
  if (hs == NULL || ne[1] == '\0')
Packit Service 0a8c19
    return (char*)hs;
Packit Service 0a8c19
  if (ne[2] == '\0')
Packit Service 0a8c19
    return strstr2 (hs, ne);
Packit Service 0a8c19
  if (ne[3] == '\0')
Packit Service 0a8c19
    return strstr3 (hs, ne);
Packit Service 0a8c19
Packit Service 0a8c19
  /* Ensure haystack length is at least as long as needle length.
Packit Service 0a8c19
     Since a match may occur early on in a huge haystack, use strnlen
Packit Service b2d53b
     and read ahead a few cachelines for improved performance.  */
Packit Service 0a8c19
  size_t ne_len = strlen ((const char*)ne);
Packit Service 0a8c19
  size_t hs_len = __strnlen ((const char*)hs, ne_len | 512);
Packit Service 0a8c19
  if (hs_len < ne_len)
Packit 6c4009
    return NULL;
Packit Service b2d53b
Packit Service 0a8c19
  /* Check whether we have a match.  This improves performance since we
Packit Service 0a8c19
     avoid initialization overheads.  */
Packit Service 0a8c19
  if (memcmp (hs, ne, ne_len) == 0)
Packit Service 0a8c19
    return (char *) hs;
Packit Service 0a8c19
Packit Service 0a8c19
  /* Use Two-Way algorithm for very long needles.  */
Packit Service 0a8c19
  if (__glibc_unlikely (ne_len > 256))
Packit Service 0a8c19
    return two_way_long_needle (hs, hs_len, ne, ne_len);
Packit Service 0a8c19
Packit Service 0a8c19
  const unsigned char *end = hs + hs_len - ne_len;
Packit Service 0a8c19
  uint8_t shift[256];
Packit Service 0a8c19
  size_t tmp, shift1;
Packit Service 0a8c19
  size_t m1 = ne_len - 1;
Packit Service 0a8c19
  size_t offset = 0;
Packit Service 0a8c19
Packit Service 0a8c19
  /* Initialize bad character shift hash table.  */
Packit Service 0a8c19
  memset (shift, 0, sizeof (shift));
Packit Service 0a8c19
  for (int i = 1; i < m1; i++)
Packit Service 0a8c19
    shift[hash2 (ne + i)] = i;
Packit Service 0a8c19
  /* Shift1 is the amount we can skip after matching the hash of the
Packit Service 0a8c19
     needle end but not the full needle.  */
Packit Service 0a8c19
  shift1 = m1 - shift[hash2 (ne + m1)];
Packit Service 0a8c19
  shift[hash2 (ne + m1)] = m1;
Packit Service 0a8c19
Packit Service 0a8c19
  while (1)
Packit Service 0a8c19
    {
Packit Service 0a8c19
      if (__glibc_unlikely (hs > end))
Packit Service 0a8c19
	{
Packit Service 0a8c19
	  end += __strnlen ((const char*)end + m1 + 1, 2048);
Packit Service 0a8c19
	  if (hs > end)
Packit Service 0a8c19
	    return NULL;
Packit Service 0a8c19
	}
Packit Service 0a8c19
Packit Service 0a8c19
      /* Skip past character pairs not in the needle.  */
Packit Service 0a8c19
      do
Packit Service 0a8c19
	{
Packit Service 0a8c19
	  hs += m1;
Packit Service 0a8c19
	  tmp = shift[hash2 (hs)];
Packit Service 0a8c19
	}
Packit Service 0a8c19
      while (tmp == 0 && hs <= end);
Packit Service 0a8c19
Packit Service 0a8c19
      /* If the match is not at the end of the needle, shift to the end
Packit Service 0a8c19
	 and continue until we match the hash of the needle end.  */
Packit Service 0a8c19
      hs -= tmp;
Packit Service 0a8c19
      if (tmp < m1)
Packit Service 0a8c19
	continue;
Packit Service 0a8c19
Packit Service 0a8c19
      /* Hash of the last 2 characters matches.  If the needle is long,
Packit Service 0a8c19
	 try to quickly filter out mismatches.  */
Packit Service 0a8c19
      if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
Packit Service 0a8c19
	{
Packit Service 0a8c19
	  if (memcmp (hs, ne, m1) == 0)
Packit Service 0a8c19
	    return (void *) hs;
Packit Service 0a8c19
Packit Service 0a8c19
	  /* Adjust filter offset when it doesn't find the mismatch.  */
Packit Service 0a8c19
	  offset = (offset >= 8 ? offset : m1) - 8;
Packit Service 0a8c19
	}
Packit Service 0a8c19
Packit Service 0a8c19
      /* Skip based on matching the hash of the needle end.  */
Packit Service 0a8c19
      hs += shift1;
Packit Service 0a8c19
    }
Packit 6c4009
}
Packit 6c4009
libc_hidden_builtin_def (strstr)