Blame string/str-two-way.h

Packit 6c4009
/* Byte-wise substring search, using the Two-Way algorithm.
Packit 6c4009
   Copyright (C) 2008-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Written by Eric Blake <ebb9@byu.net>, 2008.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
/* Before including this file, you need to include <string.h> (and
Packit 6c4009
   <config.h> before that, if not part of libc), and define:
Packit 6c4009
     RETURN_TYPE             A macro that expands to the return type.
Packit 6c4009
     AVAILABLE(h, h_l, j, n_l)
Packit 6c4009
			     A macro that returns nonzero if there are
Packit 6c4009
			     at least N_L bytes left starting at H[J].
Packit 6c4009
			     H is 'unsigned char *', H_L, J, and N_L
Packit 6c4009
			     are 'size_t'; H_L is an lvalue.  For
Packit 6c4009
			     NUL-terminated searches, H_L can be
Packit 6c4009
			     modified each iteration to avoid having
Packit 6c4009
			     to compute the end of H up front.
Packit 6c4009
Packit 6c4009
  For case-insensitivity, you may optionally define:
Packit 6c4009
     CMP_FUNC(p1, p2, l)     A macro that returns 0 iff the first L
Packit 6c4009
			     characters of P1 and P2 are equal.
Packit 6c4009
     CANON_ELEMENT(c)        A macro that canonicalizes an element right after
Packit 6c4009
			     it has been fetched from one of the two strings.
Packit 6c4009
			     The argument is an 'unsigned char'; the result
Packit 6c4009
			     must be an 'unsigned char' as well.
Packit 6c4009
Packit 6c4009
  Other macros you may optionally define:
Packit 6c4009
     RET0_IF_0(a)            Documented below at default definition.
Packit 6c4009
     CHECK_EOL               Same.
Packit 6c4009
Packit 6c4009
  This file undefines the macros listed above, and defines
Packit 6c4009
  LONG_NEEDLE_THRESHOLD.
Packit 6c4009
*/
Packit 6c4009
Packit 6c4009
#include <limits.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <sys/param.h>                  /* Defines MAX.  */
Packit 6c4009
Packit 6c4009
/* We use the Two-Way string matching algorithm, which guarantees
Packit 6c4009
   linear complexity with constant space.  Additionally, for long
Packit 6c4009
   needles, we also use a bad character shift table similar to the
Packit 6c4009
   Boyer-Moore algorithm to achieve improved (potentially sub-linear)
Packit 6c4009
   performance.
Packit 6c4009
Packit 6c4009
   See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
Packit 6c4009
   and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
Packit 6c4009
*/
Packit 6c4009
Packit 6c4009
/* Point at which computing a bad-byte shift table is likely to be
Packit 6c4009
   worthwhile.  Small needles should not compute a table, since it
Packit 6c4009
   adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a
Packit 6c4009
   speedup no greater than a factor of NEEDLE_LEN.  The larger the
Packit 6c4009
   needle, the better the potential performance gain.  On the other
Packit 6c4009
   hand, on non-POSIX systems with CHAR_BIT larger than eight, the
Packit 6c4009
   memory required for the table is prohibitive.  */
Packit 6c4009
#if CHAR_BIT < 10
Packit 6c4009
# define LONG_NEEDLE_THRESHOLD 32U
Packit 6c4009
#else
Packit 6c4009
# define LONG_NEEDLE_THRESHOLD SIZE_MAX
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#ifndef CANON_ELEMENT
Packit 6c4009
# define CANON_ELEMENT(c) c
Packit 6c4009
#endif
Packit 6c4009
#ifndef CMP_FUNC
Packit 6c4009
# define CMP_FUNC memcmp
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Check for end-of-line in strstr and strcasestr routines.
Packit 6c4009
   We piggy-back matching procedure for detecting EOL where possible,
Packit 6c4009
   and use AVAILABLE macro otherwise.  */
Packit 6c4009
#ifndef CHECK_EOL
Packit 6c4009
# define CHECK_EOL (0)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Return NULL if argument is '\0'.  */
Packit 6c4009
#ifndef RET0_IF_0
Packit 6c4009
# define RET0_IF_0(a) /* nothing */
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
Packit 6c4009
   Return the index of the first byte in the right half, and set
Packit 6c4009
   *PERIOD to the global period of the right half.
Packit 6c4009
Packit 6c4009
   The global period of a string is the smallest index (possibly its
Packit 6c4009
   length) at which all remaining bytes in the string are repetitions
Packit 6c4009
   of the prefix (the last repetition may be a subset of the prefix).
Packit 6c4009
Packit 6c4009
   When NEEDLE is factored into two halves, a local period is the
Packit 6c4009
   length of the smallest word that shares a suffix with the left half
Packit 6c4009
   and shares a prefix with the right half.  All factorizations of a
Packit 6c4009
   non-empty NEEDLE have a local period of at least 1 and no greater
Packit 6c4009
   than NEEDLE_LEN.
Packit 6c4009
Packit 6c4009
   A critical factorization has the property that the local period
Packit 6c4009
   equals the global period.  All strings have at least one critical
Packit 6c4009
   factorization with the left half smaller than the global period.
Packit 6c4009
Packit 6c4009
   Given an ordered alphabet, a critical factorization can be computed
Packit 6c4009
   in linear time, with 2 * NEEDLE_LEN comparisons, by computing the
Packit 6c4009
   larger of two ordered maximal suffixes.  The ordered maximal
Packit 6c4009
   suffixes are determined by lexicographic comparison of
Packit 6c4009
   periodicity.  */
Packit 6c4009
static size_t
Packit 6c4009
critical_factorization (const unsigned char *needle, size_t needle_len,
Packit 6c4009
			size_t *period)
Packit 6c4009
{
Packit 6c4009
  /* Index of last byte of left half, or SIZE_MAX.  */
Packit 6c4009
  size_t max_suffix, max_suffix_rev;
Packit 6c4009
  size_t j; /* Index into NEEDLE for current candidate suffix.  */
Packit 6c4009
  size_t k; /* Offset into current period.  */
Packit 6c4009
  size_t p; /* Intermediate period.  */
Packit 6c4009
  unsigned char a, b; /* Current comparison bytes.  */
Packit 6c4009
Packit 6c4009
  /* Invariants:
Packit 6c4009
     0 <= j < NEEDLE_LEN - 1
Packit 6c4009
     -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed)
Packit 6c4009
     min(max_suffix, max_suffix_rev) < global period of NEEDLE
Packit 6c4009
     1 <= p <= global period of NEEDLE
Packit 6c4009
     p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j]
Packit 6c4009
     1 <= k <= p
Packit 6c4009
  */
Packit 6c4009
Packit 6c4009
  /* Perform lexicographic search.  */
Packit 6c4009
  max_suffix = SIZE_MAX;
Packit 6c4009
  j = 0;
Packit 6c4009
  k = p = 1;
Packit 6c4009
  while (j + k < needle_len)
Packit 6c4009
    {
Packit 6c4009
      a = CANON_ELEMENT (needle[j + k]);
Packit 6c4009
      b = CANON_ELEMENT (needle[max_suffix + k]);
Packit 6c4009
      if (a < b)
Packit 6c4009
	{
Packit 6c4009
	  /* Suffix is smaller, period is entire prefix so far.  */
Packit 6c4009
	  j += k;
Packit 6c4009
	  k = 1;
Packit 6c4009
	  p = j - max_suffix;
Packit 6c4009
	}
Packit 6c4009
      else if (a == b)
Packit 6c4009
	{
Packit 6c4009
	  /* Advance through repetition of the current period.  */
Packit 6c4009
	  if (k != p)
Packit 6c4009
	    ++k;
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      j += p;
Packit 6c4009
	      k = 1;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      else /* b < a */
Packit 6c4009
	{
Packit 6c4009
	  /* Suffix is larger, start over from current location.  */
Packit 6c4009
	  max_suffix = j++;
Packit 6c4009
	  k = p = 1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  *period = p;
Packit 6c4009
Packit 6c4009
  /* Perform reverse lexicographic search.  */
Packit 6c4009
  max_suffix_rev = SIZE_MAX;
Packit 6c4009
  j = 0;
Packit 6c4009
  k = p = 1;
Packit 6c4009
  while (j + k < needle_len)
Packit 6c4009
    {
Packit 6c4009
      a = CANON_ELEMENT (needle[j + k]);
Packit 6c4009
      b = CANON_ELEMENT (needle[max_suffix_rev + k]);
Packit 6c4009
      if (b < a)
Packit 6c4009
	{
Packit 6c4009
	  /* Suffix is smaller, period is entire prefix so far.  */
Packit 6c4009
	  j += k;
Packit 6c4009
	  k = 1;
Packit 6c4009
	  p = j - max_suffix_rev;
Packit 6c4009
	}
Packit 6c4009
      else if (a == b)
Packit 6c4009
	{
Packit 6c4009
	  /* Advance through repetition of the current period.  */
Packit 6c4009
	  if (k != p)
Packit 6c4009
	    ++k;
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      j += p;
Packit 6c4009
	      k = 1;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      else /* a < b */
Packit 6c4009
	{
Packit 6c4009
	  /* Suffix is larger, start over from current location.  */
Packit 6c4009
	  max_suffix_rev = j++;
Packit 6c4009
	  k = p = 1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Choose the longer suffix.  Return the first byte of the right
Packit 6c4009
     half, rather than the last byte of the left half.  */
Packit 6c4009
  if (max_suffix_rev + 1 < max_suffix + 1)
Packit 6c4009
    return max_suffix + 1;
Packit 6c4009
  *period = p;
Packit 6c4009
  return max_suffix_rev + 1;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Return the first location of non-empty NEEDLE within HAYSTACK, or
Packit 6c4009
   NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This
Packit 6c4009
   method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD.
Packit 6c4009
   Performance is guaranteed to be linear, with an initialization cost
Packit 6c4009
   of 2 * NEEDLE_LEN comparisons.
Packit 6c4009
Packit 6c4009
   If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
Packit 6c4009
   most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.
Packit 6c4009
   If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
Packit 6c4009
   HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.  */
Packit Service 8f37d2
static inline RETURN_TYPE
Packit 6c4009
two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
Packit 6c4009
		      const unsigned char *needle, size_t needle_len)
Packit 6c4009
{
Packit 6c4009
  size_t i; /* Index into current byte of NEEDLE.  */
Packit 6c4009
  size_t j; /* Index into current window of HAYSTACK.  */
Packit 6c4009
  size_t period; /* The period of the right half of needle.  */
Packit 6c4009
  size_t suffix; /* The index of the right half of needle.  */
Packit 6c4009
Packit 6c4009
  /* Factor the needle into two halves, such that the left half is
Packit 6c4009
     smaller than the global period, and the right half is
Packit 6c4009
     periodic (with a period as large as NEEDLE_LEN - suffix).  */
Packit 6c4009
  suffix = critical_factorization (needle, needle_len, &period);
Packit 6c4009
Packit 6c4009
  /* Perform the search.  Each iteration compares the right half
Packit 6c4009
     first.  */
Packit 6c4009
  if (CMP_FUNC (needle, needle + period, suffix) == 0)
Packit 6c4009
    {
Packit 6c4009
      /* Entire needle is periodic; a mismatch can only advance by the
Packit 6c4009
	 period, so use memory to avoid rescanning known occurrences
Packit 6c4009
	 of the period.  */
Packit 6c4009
      size_t memory = 0;
Packit 6c4009
      j = 0;
Packit 6c4009
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit 6c4009
	{
Packit 6c4009
	  const unsigned char *pneedle;
Packit 6c4009
	  const unsigned char *phaystack;
Packit 6c4009
Packit 6c4009
	  /* Scan for matches in right half.  */
Packit 6c4009
	  i = MAX (suffix, memory);
Packit 6c4009
	  pneedle = &needle[i];
Packit 6c4009
	  phaystack = &haystack[i + j];
Packit 6c4009
	  while (i < needle_len && (CANON_ELEMENT (*pneedle++)
Packit 6c4009
				    == CANON_ELEMENT (*phaystack++)))
Packit 6c4009
	    ++i;
Packit 6c4009
	  if (needle_len <= i)
Packit 6c4009
	    {
Packit 6c4009
	      /* Scan for matches in left half.  */
Packit 6c4009
	      i = suffix - 1;
Packit 6c4009
	      pneedle = &needle[i];
Packit 6c4009
	      phaystack = &haystack[i + j];
Packit 6c4009
	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
Packit 6c4009
					== CANON_ELEMENT (*phaystack--)))
Packit 6c4009
		--i;
Packit 6c4009
	      if (i + 1 < memory + 1)
Packit 6c4009
		return (RETURN_TYPE) (haystack + j);
Packit 6c4009
	      /* No match, so remember how many repetitions of period
Packit 6c4009
		 on the right half were scanned.  */
Packit 6c4009
	      j += period;
Packit 6c4009
	      memory = needle_len - period;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      j += i - suffix + 1;
Packit 6c4009
	      memory = 0;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      const unsigned char *phaystack;
Packit 6c4009
      /* The comparison always starts from needle[suffix], so cache it
Packit 6c4009
	 and use an optimized first-character loop.  */
Packit 6c4009
      unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
Packit 6c4009
Packit 6c4009
      /* The two halves of needle are distinct; no extra memory is
Packit 6c4009
	 required, and any mismatch results in a maximal shift.  */
Packit 6c4009
      period = MAX (suffix, needle_len - suffix) + 1;
Packit 6c4009
      j = 0;
Packit 6c4009
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit 6c4009
	{
Packit 6c4009
	  unsigned char haystack_char;
Packit 6c4009
	  const unsigned char *pneedle;
Packit 6c4009
Packit 6c4009
	  phaystack = &haystack[suffix + j];
Packit 6c4009
Packit 6c4009
#ifdef FASTSEARCH
Packit 6c4009
	  if (*phaystack++ != needle_suffix)
Packit 6c4009
	    {
Packit 6c4009
	      phaystack = FASTSEARCH (phaystack, needle_suffix,
Packit 6c4009
				      haystack_len - needle_len - j);
Packit 6c4009
	      if (phaystack == NULL)
Packit 6c4009
		goto ret0;
Packit 6c4009
	      j = phaystack - &haystack[suffix];
Packit 6c4009
	      phaystack++;
Packit 6c4009
	    }
Packit 6c4009
#else
Packit 6c4009
	  while (needle_suffix
Packit 6c4009
	      != (haystack_char = CANON_ELEMENT (*phaystack++)))
Packit 6c4009
	    {
Packit 6c4009
	      RET0_IF_0 (haystack_char);
Packit 6c4009
# if !CHECK_EOL
Packit 6c4009
	      ++j;
Packit 6c4009
	      if (!AVAILABLE (haystack, haystack_len, j, needle_len))
Packit 6c4009
		goto ret0;
Packit 6c4009
# endif
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
# if CHECK_EOL
Packit 6c4009
	  /* Calculate J if it wasn't kept up-to-date in the first-character
Packit 6c4009
	     loop.  */
Packit 6c4009
	  j = phaystack - &haystack[suffix] - 1;
Packit 6c4009
# endif
Packit 6c4009
#endif
Packit 6c4009
	  /* Scan for matches in right half.  */
Packit 6c4009
	  i = suffix + 1;
Packit 6c4009
	  pneedle = &needle[i];
Packit 6c4009
	  while (i < needle_len)
Packit 6c4009
	    {
Packit 6c4009
	      if (CANON_ELEMENT (*pneedle++)
Packit 6c4009
		  != (haystack_char = CANON_ELEMENT (*phaystack++)))
Packit 6c4009
		{
Packit 6c4009
		  RET0_IF_0 (haystack_char);
Packit 6c4009
		  break;
Packit 6c4009
		}
Packit 6c4009
	      ++i;
Packit 6c4009
	    }
Packit 6c4009
#if CHECK_EOL
Packit 6c4009
	  /* Update minimal length of haystack.  */
Packit 6c4009
	  if (phaystack > haystack + haystack_len)
Packit 6c4009
	    haystack_len = phaystack - haystack;
Packit 6c4009
#endif
Packit 6c4009
	  if (needle_len <= i)
Packit 6c4009
	    {
Packit 6c4009
	      /* Scan for matches in left half.  */
Packit 6c4009
	      i = suffix - 1;
Packit 6c4009
	      pneedle = &needle[i];
Packit 6c4009
	      phaystack = &haystack[i + j];
Packit 6c4009
	      while (i != SIZE_MAX)
Packit 6c4009
		{
Packit 6c4009
		  if (CANON_ELEMENT (*pneedle--)
Packit 6c4009
		      != (haystack_char = CANON_ELEMENT (*phaystack--)))
Packit 6c4009
		    {
Packit 6c4009
		      RET0_IF_0 (haystack_char);
Packit 6c4009
		      break;
Packit 6c4009
		    }
Packit 6c4009
		  --i;
Packit 6c4009
		}
Packit 6c4009
	      if (i == SIZE_MAX)
Packit 6c4009
		return (RETURN_TYPE) (haystack + j);
Packit 6c4009
	      j += period;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    j += i - suffix + 1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
 ret0: __attribute__ ((unused))
Packit 6c4009
  return NULL;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Return the first location of non-empty NEEDLE within HAYSTACK, or
Packit 6c4009
   NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This
Packit 6c4009
   method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN.
Packit 6c4009
   Performance is guaranteed to be linear, with an initialization cost
Packit 6c4009
   of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations.
Packit 6c4009
Packit 6c4009
   If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
Packit 6c4009
   most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching,
Packit 6c4009
   and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible.
Packit 6c4009
   If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
Packit 6c4009
   HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and
Packit Service 8f37d2
   sublinear performance is not possible.
Packit Service 8f37d2
Packit Service 8f37d2
   Since this function is large and complex, block inlining to avoid
Packit Service 8f37d2
   slowing down the common case of small needles.  */
Packit Service 8f37d2
__attribute__((noinline)) static RETURN_TYPE
Packit 6c4009
two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
Packit 6c4009
		     const unsigned char *needle, size_t needle_len)
Packit 6c4009
{
Packit 6c4009
  size_t i; /* Index into current byte of NEEDLE.  */
Packit 6c4009
  size_t j; /* Index into current window of HAYSTACK.  */
Packit 6c4009
  size_t period; /* The period of the right half of needle.  */
Packit 6c4009
  size_t suffix; /* The index of the right half of needle.  */
Packit 6c4009
  size_t shift_table[1U << CHAR_BIT]; /* See below.  */
Packit 6c4009
Packit 6c4009
  /* Factor the needle into two halves, such that the left half is
Packit 6c4009
     smaller than the global period, and the right half is
Packit 6c4009
     periodic (with a period as large as NEEDLE_LEN - suffix).  */
Packit 6c4009
  suffix = critical_factorization (needle, needle_len, &period);
Packit 6c4009
Packit 6c4009
  /* Populate shift_table.  For each possible byte value c,
Packit 6c4009
     shift_table[c] is the distance from the last occurrence of c to
Packit 6c4009
     the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE.
Packit 6c4009
     shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0.  */
Packit 6c4009
  for (i = 0; i < 1U << CHAR_BIT; i++)
Packit 6c4009
    shift_table[i] = needle_len;
Packit 6c4009
  for (i = 0; i < needle_len; i++)
Packit 6c4009
    shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1;
Packit 6c4009
Packit 6c4009
  /* Perform the search.  Each iteration compares the right half
Packit 6c4009
     first.  */
Packit 6c4009
  if (CMP_FUNC (needle, needle + period, suffix) == 0)
Packit 6c4009
    {
Packit 6c4009
      /* Entire needle is periodic; a mismatch can only advance by the
Packit 6c4009
	 period, so use memory to avoid rescanning known occurrences
Packit 6c4009
	 of the period.  */
Packit 6c4009
      size_t memory = 0;
Packit 6c4009
      size_t shift;
Packit 6c4009
      j = 0;
Packit 6c4009
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit 6c4009
	{
Packit 6c4009
	  const unsigned char *pneedle;
Packit 6c4009
	  const unsigned char *phaystack;
Packit 6c4009
Packit 6c4009
	  /* Check the last byte first; if it does not match, then
Packit 6c4009
	     shift to the next possible match location.  */
Packit 6c4009
	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
Packit 6c4009
	  if (0 < shift)
Packit 6c4009
	    {
Packit 6c4009
	      if (memory && shift < period)
Packit 6c4009
		{
Packit 6c4009
		  /* Since needle is periodic, but the last period has
Packit 6c4009
		     a byte out of place, there can be no match until
Packit 6c4009
		     after the mismatch.  */
Packit 6c4009
		  shift = needle_len - period;
Packit 6c4009
		}
Packit 6c4009
	      memory = 0;
Packit 6c4009
	      j += shift;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
	  /* Scan for matches in right half.  The last byte has
Packit 6c4009
	     already been matched, by virtue of the shift table.  */
Packit 6c4009
	  i = MAX (suffix, memory);
Packit 6c4009
	  pneedle = &needle[i];
Packit 6c4009
	  phaystack = &haystack[i + j];
Packit 6c4009
	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
Packit 6c4009
					== CANON_ELEMENT (*phaystack++)))
Packit 6c4009
	    ++i;
Packit 6c4009
	  if (needle_len - 1 <= i)
Packit 6c4009
	    {
Packit 6c4009
	      /* Scan for matches in left half.  */
Packit 6c4009
	      i = suffix - 1;
Packit 6c4009
	      pneedle = &needle[i];
Packit 6c4009
	      phaystack = &haystack[i + j];
Packit 6c4009
	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
Packit 6c4009
					== CANON_ELEMENT (*phaystack--)))
Packit 6c4009
		--i;
Packit 6c4009
	      if (i + 1 < memory + 1)
Packit 6c4009
		return (RETURN_TYPE) (haystack + j);
Packit 6c4009
	      /* No match, so remember how many repetitions of period
Packit 6c4009
		 on the right half were scanned.  */
Packit 6c4009
	      j += period;
Packit 6c4009
	      memory = needle_len - period;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      j += i - suffix + 1;
Packit 6c4009
	      memory = 0;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      /* The two halves of needle are distinct; no extra memory is
Packit 6c4009
	 required, and any mismatch results in a maximal shift.  */
Packit 6c4009
      size_t shift;
Packit 6c4009
      period = MAX (suffix, needle_len - suffix) + 1;
Packit 6c4009
      j = 0;
Packit 6c4009
      while (AVAILABLE (haystack, haystack_len, j, needle_len))
Packit 6c4009
	{
Packit 6c4009
	  const unsigned char *pneedle;
Packit 6c4009
	  const unsigned char *phaystack;
Packit 6c4009
Packit 6c4009
	  /* Check the last byte first; if it does not match, then
Packit 6c4009
	     shift to the next possible match location.  */
Packit 6c4009
	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
Packit 6c4009
	  if (0 < shift)
Packit 6c4009
	    {
Packit 6c4009
	      j += shift;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
	  /* Scan for matches in right half.  The last byte has
Packit 6c4009
	     already been matched, by virtue of the shift table.  */
Packit 6c4009
	  i = suffix;
Packit 6c4009
	  pneedle = &needle[i];
Packit 6c4009
	  phaystack = &haystack[i + j];
Packit 6c4009
	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
Packit 6c4009
					== CANON_ELEMENT (*phaystack++)))
Packit 6c4009
	    ++i;
Packit 6c4009
	  if (needle_len - 1 <= i)
Packit 6c4009
	    {
Packit 6c4009
	      /* Scan for matches in left half.  */
Packit 6c4009
	      i = suffix - 1;
Packit 6c4009
	      pneedle = &needle[i];
Packit 6c4009
	      phaystack = &haystack[i + j];
Packit 6c4009
	      while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--)
Packit 6c4009
				       == CANON_ELEMENT (*phaystack--)))
Packit 6c4009
		--i;
Packit 6c4009
	      if (i == SIZE_MAX)
Packit 6c4009
		return (RETURN_TYPE) (haystack + j);
Packit 6c4009
	      j += period;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    j += i - suffix + 1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  return NULL;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#undef AVAILABLE
Packit 6c4009
#undef CANON_ELEMENT
Packit 6c4009
#undef CMP_FUNC
Packit 6c4009
#undef RET0_IF_0
Packit 6c4009
#undef RETURN_TYPE
Packit 6c4009
#undef CHECK_EOL