Blame string/str-two-way.h

Packit Service 82fcde
/* Byte-wise substring search, using the Two-Way algorithm.
Packit Service 82fcde
   Copyright (C) 2008-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
   Written by Eric Blake <ebb9@byu.net>, 2008.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
/* Before including this file, you need to include <string.h> (and
Packit Service 82fcde
   <config.h> before that, if not part of libc), and define:
Packit Service 82fcde
     RETURN_TYPE             A macro that expands to the return type.
Packit Service 82fcde
     AVAILABLE(h, h_l, j, n_l)
Packit Service 82fcde
			     A macro that returns nonzero if there are
Packit Service 82fcde
			     at least N_L bytes left starting at H[J].
Packit Service 82fcde
			     H is 'unsigned char *', H_L, J, and N_L
Packit Service 82fcde
			     are 'size_t'; H_L is an lvalue.  For
Packit Service 82fcde
			     NUL-terminated searches, H_L can be
Packit Service 82fcde
			     modified each iteration to avoid having
Packit Service 82fcde
			     to compute the end of H up front.
Packit Service 82fcde
Packit Service 82fcde
  For case-insensitivity, you may optionally define:
Packit Service 82fcde
     CMP_FUNC(p1, p2, l)     A macro that returns 0 iff the first L
Packit Service 82fcde
			     characters of P1 and P2 are equal.
Packit Service 82fcde
     CANON_ELEMENT(c)        A macro that canonicalizes an element right after
Packit Service 82fcde
			     it has been fetched from one of the two strings.
Packit Service 82fcde
			     The argument is an 'unsigned char'; the result
Packit Service 82fcde
			     must be an 'unsigned char' as well.
Packit Service 82fcde
Packit Service 82fcde
  Other macros you may optionally define:
Packit Service 82fcde
     RET0_IF_0(a)            Documented below at default definition.
Packit Service 82fcde
     CHECK_EOL               Same.
Packit Service 82fcde
Packit Service 82fcde
  This file undefines the macros listed above, and defines
Packit Service 82fcde
  LONG_NEEDLE_THRESHOLD.
Packit Service 82fcde
*/
Packit Service 82fcde
Packit Service 82fcde
#include <limits.h>
Packit Service 82fcde
#include <stdint.h>
Packit Service 82fcde
#include <sys/param.h>                  /* Defines MAX.  */
Packit Service 82fcde
Packit Service 82fcde
/* We use the Two-Way string matching algorithm, which guarantees
Packit Service 82fcde
   linear complexity with constant space.  Additionally, for long
Packit Service 82fcde
   needles, we also use a bad character shift table similar to the
Packit Service 82fcde
   Boyer-Moore algorithm to achieve improved (potentially sub-linear)
Packit Service 82fcde
   performance.
Packit Service 82fcde
Packit Service 82fcde
   See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
Packit Service 82fcde
   and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
Packit Service 82fcde
*/
Packit Service 82fcde
Packit Service 82fcde
/* Point at which computing a bad-byte shift table is likely to be
Packit Service 82fcde
   worthwhile.  Small needles should not compute a table, since it
Packit Service 82fcde
   adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a
Packit Service 82fcde
   speedup no greater than a factor of NEEDLE_LEN.  The larger the
Packit Service 82fcde
   needle, the better the potential performance gain.  On the other
Packit Service 82fcde
   hand, on non-POSIX systems with CHAR_BIT larger than eight, the
Packit Service 82fcde
   memory required for the table is prohibitive.  */
Packit Service 82fcde
#if CHAR_BIT < 10
Packit Service 82fcde
# define LONG_NEEDLE_THRESHOLD 32U
Packit Service 82fcde
#else
Packit Service 82fcde
# define LONG_NEEDLE_THRESHOLD SIZE_MAX
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
#ifndef CANON_ELEMENT
Packit Service 82fcde
# define CANON_ELEMENT(c) c
Packit Service 82fcde
#endif
Packit Service 82fcde
#ifndef CMP_FUNC
Packit Service 82fcde
# define CMP_FUNC memcmp
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Check for end-of-line in strstr and strcasestr routines.
Packit Service 82fcde
   We piggy-back matching procedure for detecting EOL where possible,
Packit Service 82fcde
   and use AVAILABLE macro otherwise.  */
Packit Service 82fcde
#ifndef CHECK_EOL
Packit Service 82fcde
# define CHECK_EOL (0)
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Return NULL if argument is '\0'.  */
Packit Service 82fcde
#ifndef RET0_IF_0
Packit Service 82fcde
# define RET0_IF_0(a) /* nothing */
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
Packit Service 82fcde
   Return the index of the first byte in the right half, and set
Packit Service 82fcde
   *PERIOD to the global period of the right half.
Packit Service 82fcde
Packit Service 82fcde
   The global period of a string is the smallest index (possibly its
Packit Service 82fcde
   length) at which all remaining bytes in the string are repetitions
Packit Service 82fcde
   of the prefix (the last repetition may be a subset of the prefix).
Packit Service 82fcde
Packit Service 82fcde
   When NEEDLE is factored into two halves, a local period is the
Packit Service 82fcde
   length of the smallest word that shares a suffix with the left half
Packit Service 82fcde
   and shares a prefix with the right half.  All factorizations of a
Packit Service 82fcde
   non-empty NEEDLE have a local period of at least 1 and no greater
Packit Service 82fcde
   than NEEDLE_LEN.
Packit Service 82fcde
Packit Service 82fcde
   A critical factorization has the property that the local period
Packit Service 82fcde
   equals the global period.  All strings have at least one critical
Packit Service 82fcde
   factorization with the left half smaller than the global period.
Packit Service 82fcde
Packit Service 82fcde
   Given an ordered alphabet, a critical factorization can be computed
Packit Service 82fcde
   in linear time, with 2 * NEEDLE_LEN comparisons, by computing the
Packit Service 82fcde
   larger of two ordered maximal suffixes.  The ordered maximal
Packit Service 82fcde
   suffixes are determined by lexicographic comparison of
Packit Service 82fcde
   periodicity.  */
Packit Service 82fcde
static size_t
Packit Service 82fcde
critical_factorization (const unsigned char *needle, size_t needle_len,
Packit Service 82fcde
			size_t *period)
Packit Service 82fcde
{
Packit Service 82fcde
  /* Index of last byte of left half, or SIZE_MAX.  */
Packit Service 82fcde
  size_t max_suffix, max_suffix_rev;
Packit Service 82fcde
  size_t j; /* Index into NEEDLE for current candidate suffix.  */
Packit Service 82fcde
  size_t k; /* Offset into current period.  */
Packit Service 82fcde
  size_t p; /* Intermediate period.  */
Packit Service 82fcde
  unsigned char a, b; /* Current comparison bytes.  */
Packit Service 82fcde
Packit Service 82fcde
  /* Invariants:
Packit Service 82fcde
     0 <= j < NEEDLE_LEN - 1
Packit Service 82fcde
     -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed)
Packit Service 82fcde
     min(max_suffix, max_suffix_rev) < global period of NEEDLE
Packit Service 82fcde
     1 <= p <= global period of NEEDLE
Packit Service 82fcde
     p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j]
Packit Service 82fcde
     1 <= k <= p
Packit Service 82fcde
  */
Packit Service 82fcde
Packit Service 82fcde
  /* Perform lexicographic search.  */
Packit Service 82fcde
  max_suffix = SIZE_MAX;
Packit Service 82fcde
  j = 0;
Packit Service 82fcde
  k = p = 1;
Packit Service 82fcde
  while (j + k < needle_len)
Packit Service 82fcde
    {
Packit Service 82fcde
      a = CANON_ELEMENT (needle[j + k]);
Packit Service 82fcde
      b = CANON_ELEMENT (needle[max_suffix + k]);
Packit Service 82fcde
      if (a < b)
Packit Service 82fcde
	{
Packit Service 82fcde
	  /* Suffix is smaller, period is entire prefix so far.  */
Packit Service 82fcde
	  j += k;
Packit Service 82fcde
	  k = 1;
Packit Service 82fcde
	  p = j - max_suffix;
Packit Service 82fcde
	}
Packit Service 82fcde
      else if (a == b)
Packit Service 82fcde
	{
Packit Service 82fcde
	  /* Advance through repetition of the current period.  */
Packit Service 82fcde
	  if (k != p)
Packit Service 82fcde
	    ++k;
Packit Service 82fcde
	  else
Packit Service 82fcde
	    {
Packit Service 82fcde
	      j += p;
Packit Service 82fcde
	      k = 1;
Packit Service 82fcde
	    }
Packit Service 82fcde
	}
Packit Service 82fcde
      else /* b < a */
Packit Service 82fcde
	{
Packit Service 82fcde
	  /* Suffix is larger, start over from current location.  */
Packit Service 82fcde
	  max_suffix = j++;
Packit Service 82fcde
	  k = p = 1;
Packit Service 82fcde
	}
Packit Service 82fcde
    }
Packit Service 82fcde
  *period = p;
Packit Service 82fcde
Packit Service 82fcde
  /* Perform reverse lexicographic search.  */
Packit Service 82fcde
  max_suffix_rev = SIZE_MAX;
Packit Service 82fcde
  j = 0;
Packit Service 82fcde
  k = p = 1;
Packit Service 82fcde
  while (j + k < needle_len)
Packit Service 82fcde
    {
Packit Service 82fcde
      a = CANON_ELEMENT (needle[j + k]);
Packit Service 82fcde
      b = CANON_ELEMENT (needle[max_suffix_rev + k]);
Packit Service 82fcde
      if (b < a)
Packit Service 82fcde
	{
Packit Service 82fcde
	  /* Suffix is smaller, period is entire prefix so far.  */
Packit Service 82fcde
	  j += k;
Packit Service 82fcde
	  k = 1;
Packit Service 82fcde
	  p = j - max_suffix_rev;
Packit Service 82fcde
	}
Packit Service 82fcde
      else if (a == b)
Packit Service 82fcde
	{
Packit Service 82fcde
	  /* Advance through repetition of the current period.  */
Packit Service 82fcde
	  if (k != p)
Packit Service 82fcde
	    ++k;
Packit Service 82fcde
	  else
Packit Service 82fcde
	    {
Packit Service 82fcde
	      j += p;
Packit Service 82fcde
	      k = 1;
Packit Service 82fcde
	    }
Packit Service 82fcde
	}
Packit Service 82fcde
      else /* a < b */
Packit Service 82fcde
	{
Packit Service 82fcde
	  /* Suffix is larger, start over from current location.  */
Packit Service 82fcde
	  max_suffix_rev = j++;
Packit Service 82fcde
	  k = p = 1;
Packit Service 82fcde
	}
Packit Service 82fcde
    }
Packit Service 82fcde
Packit Service 82fcde
  /* Choose the longer suffix.  Return the first byte of the right
Packit Service 82fcde
     half, rather than the last byte of the left half.  */
Packit Service 82fcde
  if (max_suffix_rev + 1 < max_suffix + 1)
Packit Service 82fcde
    return max_suffix + 1;
Packit Service 82fcde
  *period = p;
Packit Service 82fcde
  return max_suffix_rev + 1;
Packit Service 82fcde
}
Packit Service 82fcde
Packit Service 82fcde
/* Return the first location of non-empty NEEDLE within HAYSTACK, or
Packit Service 82fcde
   NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This
Packit Service 82fcde
   method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD.
Packit Service 82fcde
   Performance is guaranteed to be linear, with an initialization cost
Packit Service 82fcde
   of 2 * NEEDLE_LEN comparisons.
Packit Service 82fcde
Packit Service 82fcde
   If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
Packit Service 82fcde
   most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.
Packit Service 82fcde
   If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
Packit Service 82fcde
   HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.  */
Packit Service 62e058
static inline RETURN_TYPE
Packit Service 82fcde
two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
Packit Service 82fcde
		      const unsigned char *needle, size_t needle_len)
Packit Service 82fcde
{
Packit Service 82fcde
  size_t i; /* Index into current byte of NEEDLE.  */
Packit Service 82fcde
  size_t j; /* Index into current window of HAYSTACK.  */
Packit Service 82fcde
  size_t period; /* The period of the right half of needle.  */
Packit Service 82fcde
  size_t suffix; /* The index of the right half of needle.  */
Packit Service 82fcde
Packit Service 82fcde
  /* Factor the needle into two halves, such that the left half is
Packit Service 82fcde
     smaller than the global period, and the right half is
Packit Service 82fcde
     periodic (with a period as large as NEEDLE_LEN - suffix).  */
Packit Service 82fcde
  suffix = critical_factorization (needle, needle_len, &period);
Packit Service 82fcde
Packit Service 82fcde
  /* Perform the search.  Each iteration compares the right half
Packit Service 82fcde
     first.  */
Packit Service 82fcde
  if (CMP_FUNC (needle, needle + period, suffix) == 0)
Packit Service 82fcde
    {
Packit Service 82fcde
      /* Entire needle is periodic; a mismatch can only advance by the
Packit Service 82fcde
	 period, so use memory to avoid rescanning known occurrences
Packit Service 82fcde
	 of the period.  */
Packit Service 82fcde
      size_t memory = 0;
Packit Service 82fcde
      j = 0;
Packit Service 82fcde
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit Service 82fcde
	{
Packit Service 82fcde
	  const unsigned char *pneedle;
Packit Service 82fcde
	  const unsigned char *phaystack;
Packit Service 82fcde
Packit Service 82fcde
	  /* Scan for matches in right half.  */
Packit Service 82fcde
	  i = MAX (suffix, memory);
Packit Service 82fcde
	  pneedle = &needle[i];
Packit Service 82fcde
	  phaystack = &haystack[i + j];
Packit Service 82fcde
	  while (i < needle_len && (CANON_ELEMENT (*pneedle++)
Packit Service 82fcde
				    == CANON_ELEMENT (*phaystack++)))
Packit Service 82fcde
	    ++i;
Packit Service 82fcde
	  if (needle_len <= i)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      /* Scan for matches in left half.  */
Packit Service 82fcde
	      i = suffix - 1;
Packit Service 82fcde
	      pneedle = &needle[i];
Packit Service 82fcde
	      phaystack = &haystack[i + j];
Packit Service 82fcde
	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
Packit Service 82fcde
					== CANON_ELEMENT (*phaystack--)))
Packit Service 82fcde
		--i;
Packit Service 82fcde
	      if (i + 1 < memory + 1)
Packit Service 82fcde
		return (RETURN_TYPE) (haystack + j);
Packit Service 82fcde
	      /* No match, so remember how many repetitions of period
Packit Service 82fcde
		 on the right half were scanned.  */
Packit Service 82fcde
	      j += period;
Packit Service 82fcde
	      memory = needle_len - period;
Packit Service 82fcde
	    }
Packit Service 82fcde
	  else
Packit Service 82fcde
	    {
Packit Service 82fcde
	      j += i - suffix + 1;
Packit Service 82fcde
	      memory = 0;
Packit Service 82fcde
	    }
Packit Service 82fcde
	}
Packit Service 82fcde
    }
Packit Service 82fcde
  else
Packit Service 82fcde
    {
Packit Service 82fcde
      const unsigned char *phaystack;
Packit Service 82fcde
      /* The comparison always starts from needle[suffix], so cache it
Packit Service 82fcde
	 and use an optimized first-character loop.  */
Packit Service 82fcde
      unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
Packit Service 82fcde
Packit Service 82fcde
      /* The two halves of needle are distinct; no extra memory is
Packit Service 82fcde
	 required, and any mismatch results in a maximal shift.  */
Packit Service 82fcde
      period = MAX (suffix, needle_len - suffix) + 1;
Packit Service 82fcde
      j = 0;
Packit Service 82fcde
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit Service 82fcde
	{
Packit Service 82fcde
	  unsigned char haystack_char;
Packit Service 82fcde
	  const unsigned char *pneedle;
Packit Service 82fcde
Packit Service 82fcde
	  phaystack = &haystack[suffix + j];
Packit Service 82fcde
Packit Service 82fcde
#ifdef FASTSEARCH
Packit Service 82fcde
	  if (*phaystack++ != needle_suffix)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      phaystack = FASTSEARCH (phaystack, needle_suffix,
Packit Service 82fcde
				      haystack_len - needle_len - j);
Packit Service 82fcde
	      if (phaystack == NULL)
Packit Service 82fcde
		goto ret0;
Packit Service 82fcde
	      j = phaystack - &haystack[suffix];
Packit Service 82fcde
	      phaystack++;
Packit Service 82fcde
	    }
Packit Service 82fcde
#else
Packit Service 82fcde
	  while (needle_suffix
Packit Service 82fcde
	      != (haystack_char = CANON_ELEMENT (*phaystack++)))
Packit Service 82fcde
	    {
Packit Service 82fcde
	      RET0_IF_0 (haystack_char);
Packit Service 82fcde
# if !CHECK_EOL
Packit Service 82fcde
	      ++j;
Packit Service 82fcde
	      if (!AVAILABLE (haystack, haystack_len, j, needle_len))
Packit Service 82fcde
		goto ret0;
Packit Service 82fcde
# endif
Packit Service 82fcde
	    }
Packit Service 82fcde
Packit Service 82fcde
# if CHECK_EOL
Packit Service 82fcde
	  /* Calculate J if it wasn't kept up-to-date in the first-character
Packit Service 82fcde
	     loop.  */
Packit Service 82fcde
	  j = phaystack - &haystack[suffix] - 1;
Packit Service 82fcde
# endif
Packit Service 82fcde
#endif
Packit Service 82fcde
	  /* Scan for matches in right half.  */
Packit Service 82fcde
	  i = suffix + 1;
Packit Service 82fcde
	  pneedle = &needle[i];
Packit Service 82fcde
	  while (i < needle_len)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      if (CANON_ELEMENT (*pneedle++)
Packit Service 82fcde
		  != (haystack_char = CANON_ELEMENT (*phaystack++)))
Packit Service 82fcde
		{
Packit Service 82fcde
		  RET0_IF_0 (haystack_char);
Packit Service 82fcde
		  break;
Packit Service 82fcde
		}
Packit Service 82fcde
	      ++i;
Packit Service 82fcde
	    }
Packit Service 82fcde
#if CHECK_EOL
Packit Service 82fcde
	  /* Update minimal length of haystack.  */
Packit Service 82fcde
	  if (phaystack > haystack + haystack_len)
Packit Service 82fcde
	    haystack_len = phaystack - haystack;
Packit Service 82fcde
#endif
Packit Service 82fcde
	  if (needle_len <= i)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      /* Scan for matches in left half.  */
Packit Service 82fcde
	      i = suffix - 1;
Packit Service 82fcde
	      pneedle = &needle[i];
Packit Service 82fcde
	      phaystack = &haystack[i + j];
Packit Service 82fcde
	      while (i != SIZE_MAX)
Packit Service 82fcde
		{
Packit Service 82fcde
		  if (CANON_ELEMENT (*pneedle--)
Packit Service 82fcde
		      != (haystack_char = CANON_ELEMENT (*phaystack--)))
Packit Service 82fcde
		    {
Packit Service 82fcde
		      RET0_IF_0 (haystack_char);
Packit Service 82fcde
		      break;
Packit Service 82fcde
		    }
Packit Service 82fcde
		  --i;
Packit Service 82fcde
		}
Packit Service 82fcde
	      if (i == SIZE_MAX)
Packit Service 82fcde
		return (RETURN_TYPE) (haystack + j);
Packit Service 82fcde
	      j += period;
Packit Service 82fcde
	    }
Packit Service 82fcde
	  else
Packit Service 82fcde
	    j += i - suffix + 1;
Packit Service 82fcde
	}
Packit Service 82fcde
    }
Packit Service 82fcde
 ret0: __attribute__ ((unused))
Packit Service 82fcde
  return NULL;
Packit Service 82fcde
}
Packit Service 82fcde
Packit Service 82fcde
/* Return the first location of non-empty NEEDLE within HAYSTACK, or
Packit Service 82fcde
   NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This
Packit Service 82fcde
   method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN.
Packit Service 82fcde
   Performance is guaranteed to be linear, with an initialization cost
Packit Service 82fcde
   of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations.
Packit Service 82fcde
Packit Service 82fcde
   If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
Packit Service 82fcde
   most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching,
Packit Service 82fcde
   and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible.
Packit Service 82fcde
   If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
Packit Service 82fcde
   HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and
Packit Service 62e058
   sublinear performance is not possible.
Packit Service 62e058
Packit Service 62e058
   Since this function is large and complex, block inlining to avoid
Packit Service 62e058
   slowing down the common case of small needles.  */
Packit Service 62e058
__attribute__((noinline)) static RETURN_TYPE
Packit Service 82fcde
two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
Packit Service 82fcde
		     const unsigned char *needle, size_t needle_len)
Packit Service 82fcde
{
Packit Service 82fcde
  size_t i; /* Index into current byte of NEEDLE.  */
Packit Service 82fcde
  size_t j; /* Index into current window of HAYSTACK.  */
Packit Service 82fcde
  size_t period; /* The period of the right half of needle.  */
Packit Service 82fcde
  size_t suffix; /* The index of the right half of needle.  */
Packit Service 82fcde
  size_t shift_table[1U << CHAR_BIT]; /* See below.  */
Packit Service 82fcde
Packit Service 82fcde
  /* Factor the needle into two halves, such that the left half is
Packit Service 82fcde
     smaller than the global period, and the right half is
Packit Service 82fcde
     periodic (with a period as large as NEEDLE_LEN - suffix).  */
Packit Service 82fcde
  suffix = critical_factorization (needle, needle_len, &period);
Packit Service 82fcde
Packit Service 82fcde
  /* Populate shift_table.  For each possible byte value c,
Packit Service 82fcde
     shift_table[c] is the distance from the last occurrence of c to
Packit Service 82fcde
     the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE.
Packit Service 82fcde
     shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0.  */
Packit Service 82fcde
  for (i = 0; i < 1U << CHAR_BIT; i++)
Packit Service 82fcde
    shift_table[i] = needle_len;
Packit Service 82fcde
  for (i = 0; i < needle_len; i++)
Packit Service 82fcde
    shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1;
Packit Service 82fcde
Packit Service 82fcde
  /* Perform the search.  Each iteration compares the right half
Packit Service 82fcde
     first.  */
Packit Service 82fcde
  if (CMP_FUNC (needle, needle + period, suffix) == 0)
Packit Service 82fcde
    {
Packit Service 82fcde
      /* Entire needle is periodic; a mismatch can only advance by the
Packit Service 82fcde
	 period, so use memory to avoid rescanning known occurrences
Packit Service 82fcde
	 of the period.  */
Packit Service 82fcde
      size_t memory = 0;
Packit Service 82fcde
      size_t shift;
Packit Service 82fcde
      j = 0;
Packit Service 82fcde
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit Service 82fcde
	{
Packit Service 82fcde
	  const unsigned char *pneedle;
Packit Service 82fcde
	  const unsigned char *phaystack;
Packit Service 82fcde
Packit Service 82fcde
	  /* Check the last byte first; if it does not match, then
Packit Service 82fcde
	     shift to the next possible match location.  */
Packit Service 82fcde
	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
Packit Service 82fcde
	  if (0 < shift)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      if (memory && shift < period)
Packit Service 82fcde
		{
Packit Service 82fcde
		  /* Since needle is periodic, but the last period has
Packit Service 82fcde
		     a byte out of place, there can be no match until
Packit Service 82fcde
		     after the mismatch.  */
Packit Service 82fcde
		  shift = needle_len - period;
Packit Service 82fcde
		}
Packit Service 82fcde
	      memory = 0;
Packit Service 82fcde
	      j += shift;
Packit Service 82fcde
	      continue;
Packit Service 82fcde
	    }
Packit Service 82fcde
	  /* Scan for matches in right half.  The last byte has
Packit Service 82fcde
	     already been matched, by virtue of the shift table.  */
Packit Service 82fcde
	  i = MAX (suffix, memory);
Packit Service 82fcde
	  pneedle = &needle[i];
Packit Service 82fcde
	  phaystack = &haystack[i + j];
Packit Service 82fcde
	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
Packit Service 82fcde
					== CANON_ELEMENT (*phaystack++)))
Packit Service 82fcde
	    ++i;
Packit Service 82fcde
	  if (needle_len - 1 <= i)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      /* Scan for matches in left half.  */
Packit Service 82fcde
	      i = suffix - 1;
Packit Service 82fcde
	      pneedle = &needle[i];
Packit Service 82fcde
	      phaystack = &haystack[i + j];
Packit Service 82fcde
	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
Packit Service 82fcde
					== CANON_ELEMENT (*phaystack--)))
Packit Service 82fcde
		--i;
Packit Service 82fcde
	      if (i + 1 < memory + 1)
Packit Service 82fcde
		return (RETURN_TYPE) (haystack + j);
Packit Service 82fcde
	      /* No match, so remember how many repetitions of period
Packit Service 82fcde
		 on the right half were scanned.  */
Packit Service 82fcde
	      j += period;
Packit Service 82fcde
	      memory = needle_len - period;
Packit Service 82fcde
	    }
Packit Service 82fcde
	  else
Packit Service 82fcde
	    {
Packit Service 82fcde
	      j += i - suffix + 1;
Packit Service 82fcde
	      memory = 0;
Packit Service 82fcde
	    }
Packit Service 82fcde
	}
Packit Service 82fcde
    }
Packit Service 82fcde
  else
Packit Service 82fcde
    {
Packit Service 82fcde
      /* The two halves of needle are distinct; no extra memory is
Packit Service 82fcde
	 required, and any mismatch results in a maximal shift.  */
Packit Service 82fcde
      size_t shift;
Packit Service 82fcde
      period = MAX (suffix, needle_len - suffix) + 1;
Packit Service 82fcde
      j = 0;
Packit Service 82fcde
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit Service 82fcde
	{
Packit Service 82fcde
	  const unsigned char *pneedle;
Packit Service 82fcde
	  const unsigned char *phaystack;
Packit Service 82fcde
Packit Service 82fcde
	  /* Check the last byte first; if it does not match, then
Packit Service 82fcde
	     shift to the next possible match location.  */
Packit Service 82fcde
	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
Packit Service 82fcde
	  if (0 < shift)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      j += shift;
Packit Service 82fcde
	      continue;
Packit Service 82fcde
	    }
Packit Service 82fcde
	  /* Scan for matches in right half.  The last byte has
Packit Service 82fcde
	     already been matched, by virtue of the shift table.  */
Packit Service 82fcde
	  i = suffix;
Packit Service 82fcde
	  pneedle = &needle[i];
Packit Service 82fcde
	  phaystack = &haystack[i + j];
Packit Service 82fcde
	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
Packit Service 82fcde
					== CANON_ELEMENT (*phaystack++)))
Packit Service 82fcde
	    ++i;
Packit Service 82fcde
	  if (needle_len - 1 <= i)
Packit Service 82fcde
	    {
Packit Service 82fcde
	      /* Scan for matches in left half.  */
Packit Service 82fcde
	      i = suffix - 1;
Packit Service 82fcde
	      pneedle = &needle[i];
Packit Service 82fcde
	      phaystack = &haystack[i + j];
Packit Service 82fcde
	      while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--)
Packit Service 82fcde
				       == CANON_ELEMENT (*phaystack--)))
Packit Service 82fcde
		--i;
Packit Service 82fcde
	      if (i == SIZE_MAX)
Packit Service 82fcde
		return (RETURN_TYPE) (haystack + j);
Packit Service 82fcde
	      j += period;
Packit Service 82fcde
	    }
Packit Service 82fcde
	  else
Packit Service 82fcde
	    j += i - suffix + 1;
Packit Service 82fcde
	}
Packit Service 82fcde
    }
Packit Service 82fcde
  return NULL;
Packit Service 82fcde
}
Packit Service 82fcde
Packit Service 82fcde
#undef AVAILABLE
Packit Service 82fcde
#undef CANON_ELEMENT
Packit Service 82fcde
#undef CMP_FUNC
Packit Service 82fcde
#undef RET0_IF_0
Packit Service 82fcde
#undef RETURN_TYPE
Packit Service 82fcde
#undef CHECK_EOL