Blame kwset.c

Packit Service fe3200
/*
Packit Service fe3200
 * This file has been copied from commit e7ac713d^ in the GNU grep git
Packit Service fe3200
 * repository. A few small changes have been made to adapt the code to
Packit Service fe3200
 * Git.
Packit Service fe3200
 */
Packit Service fe3200
Packit Service fe3200
/* kwset.c - search for any of a set of keywords.
Packit Service fe3200
   Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc.
Packit Service fe3200
Packit Service fe3200
   This program is free software; you can redistribute it and/or modify
Packit Service fe3200
   it under the terms of the GNU General Public License as published by
Packit Service fe3200
   the Free Software Foundation; either version 2, or (at your option)
Packit Service fe3200
   any later version.
Packit Service fe3200
Packit Service fe3200
   This program is distributed in the hope that it will be useful,
Packit Service fe3200
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service fe3200
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service fe3200
   GNU General Public License for more details.
Packit Service fe3200
Packit Service fe3200
   You should have received a copy of the GNU General Public License
Packit Service fe3200
   along with this program; if not, see <http://www.gnu.org/licenses/>. */
Packit Service fe3200
Packit Service fe3200
/* Written August 1989 by Mike Haertel.
Packit Service fe3200
   The author may be reached (Email) at the address mike@ai.mit.edu,
Packit Service fe3200
   or (US mail) as Mike Haertel c/o Free Software Foundation. */
Packit Service fe3200
Packit Service fe3200
/* The algorithm implemented by these routines bears a startling resemblance
Packit Service fe3200
   to one discovered by Beate Commentz-Walter, although it is not identical.
Packit Service fe3200
   See "A String Matching Algorithm Fast on the Average," Technical Report,
Packit Service fe3200
   IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
Packit Service fe3200
   Heidelberg, Germany.  See also Aho, A.V., and M. Corasick, "Efficient
Packit Service fe3200
   String Matching:  An Aid to Bibliographic Search," CACM June 1975,
Packit Service fe3200
   Vol. 18, No. 6, which describes the failure function used below. */
Packit Service fe3200
Packit Service fe3200
#include "cache.h"
Packit Service fe3200
Packit Service fe3200
#include "kwset.h"
Packit Service fe3200
#include "compat/obstack.h"
Packit Service fe3200
Packit Service fe3200
#define NCHAR (UCHAR_MAX + 1)
Packit Service fe3200
/* adapter for `xmalloc()`, which takes `size_t`, not `long` */
Packit Service fe3200
static void *obstack_chunk_alloc(long size)
Packit Service fe3200
{
Packit Service fe3200
	if (size < 0)
Packit Service fe3200
		BUG("Cannot allocate a negative amount: %ld", size);
Packit Service fe3200
	return xmalloc(size);
Packit Service fe3200
}
Packit Service fe3200
#define obstack_chunk_free free
Packit Service fe3200
Packit Service fe3200
#define U(c) ((unsigned char) (c))
Packit Service fe3200
Packit Service fe3200
/* Balanced tree of edges and labels leaving a given trie node. */
Packit Service fe3200
struct tree
Packit Service fe3200
{
Packit Service fe3200
  struct tree *llink;		/* Left link; MUST be first field. */
Packit Service fe3200
  struct tree *rlink;		/* Right link (to larger labels). */
Packit Service fe3200
  struct trie *trie;		/* Trie node pointed to by this edge. */
Packit Service fe3200
  unsigned char label;		/* Label on this edge. */
Packit Service fe3200
  char balance;			/* Difference in depths of subtrees. */
Packit Service fe3200
};
Packit Service fe3200
Packit Service fe3200
/* Node of a trie representing a set of reversed keywords. */
Packit Service fe3200
struct trie
Packit Service fe3200
{
Packit Service fe3200
  unsigned int accepting;	/* Word index of accepted word, or zero. */
Packit Service fe3200
  struct tree *links;		/* Tree of edges leaving this node. */
Packit Service fe3200
  struct trie *parent;		/* Parent of this node. */
Packit Service fe3200
  struct trie *next;		/* List of all trie nodes in level order. */
Packit Service fe3200
  struct trie *fail;		/* Aho-Corasick failure function. */
Packit Service fe3200
  int depth;			/* Depth of this node from the root. */
Packit Service fe3200
  int shift;			/* Shift function for search failures. */
Packit Service fe3200
  int maxshift;			/* Max shift of self and descendants. */
Packit Service fe3200
};
Packit Service fe3200
Packit Service fe3200
/* Structure returned opaquely to the caller, containing everything. */
Packit Service fe3200
struct kwset
Packit Service fe3200
{
Packit Service fe3200
  struct obstack obstack;	/* Obstack for node allocation. */
Packit Service fe3200
  int words;			/* Number of words in the trie. */
Packit Service fe3200
  struct trie *trie;		/* The trie itself. */
Packit Service fe3200
  int mind;			/* Minimum depth of an accepting node. */
Packit Service fe3200
  int maxd;			/* Maximum depth of any node. */
Packit Service fe3200
  unsigned char delta[NCHAR];	/* Delta table for rapid search. */
Packit Service fe3200
  struct trie *next[NCHAR];	/* Table of children of the root. */
Packit Service fe3200
  char *target;			/* Target string if there's only one. */
Packit Service fe3200
  int mind2;			/* Used in Boyer-Moore search for one string. */
Packit Service fe3200
  unsigned char const *trans;  /* Character translation table. */
Packit Service fe3200
};
Packit Service fe3200
Packit Service fe3200
/* Allocate and initialize a keyword set object, returning an opaque
Packit Service fe3200
   pointer to it.  Return NULL if memory is not available. */
Packit Service fe3200
kwset_t
Packit Service fe3200
kwsalloc (unsigned char const *trans)
Packit Service fe3200
{
Packit Service fe3200
  struct kwset *kwset;
Packit Service fe3200
Packit Service fe3200
  kwset = (struct kwset *) xmalloc(sizeof (struct kwset));
Packit Service fe3200
Packit Service fe3200
  obstack_init(&kwset->obstack);
Packit Service fe3200
  kwset->words = 0;
Packit Service fe3200
  kwset->trie
Packit Service fe3200
    = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
Packit Service fe3200
  if (!kwset->trie)
Packit Service fe3200
    {
Packit Service fe3200
      kwsfree((kwset_t) kwset);
Packit Service fe3200
      return NULL;
Packit Service fe3200
    }
Packit Service fe3200
  kwset->trie->accepting = 0;
Packit Service fe3200
  kwset->trie->links = NULL;
Packit Service fe3200
  kwset->trie->parent = NULL;
Packit Service fe3200
  kwset->trie->next = NULL;
Packit Service fe3200
  kwset->trie->fail = NULL;
Packit Service fe3200
  kwset->trie->depth = 0;
Packit Service fe3200
  kwset->trie->shift = 0;
Packit Service fe3200
  kwset->mind = INT_MAX;
Packit Service fe3200
  kwset->maxd = -1;
Packit Service fe3200
  kwset->target = NULL;
Packit Service fe3200
  kwset->trans = trans;
Packit Service fe3200
Packit Service fe3200
  return (kwset_t) kwset;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* This upper bound is valid for CHAR_BIT >= 4 and
Packit Service fe3200
   exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
Packit Service fe3200
#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
Packit Service fe3200
Packit Service fe3200
/* Add the given string to the contents of the keyword set.  Return NULL
Packit Service fe3200
   for success, an error message otherwise. */
Packit Service fe3200
const char *
Packit Service fe3200
kwsincr (kwset_t kws, char const *text, size_t len)
Packit Service fe3200
{
Packit Service fe3200
  struct kwset *kwset;
Packit Service fe3200
  register struct trie *trie;
Packit Service fe3200
  register unsigned char label;
Packit Service fe3200
  register struct tree *link;
Packit Service fe3200
  register int depth;
Packit Service fe3200
  struct tree *links[DEPTH_SIZE];
Packit Service fe3200
  enum { L, R } dirs[DEPTH_SIZE];
Packit Service fe3200
  struct tree *t, *r, *l, *rl, *lr;
Packit Service fe3200
Packit Service fe3200
  kwset = (struct kwset *) kws;
Packit Service fe3200
  trie = kwset->trie;
Packit Service fe3200
  text += len;
Packit Service fe3200
Packit Service fe3200
  /* Descend the trie (built of reversed keywords) character-by-character,
Packit Service fe3200
     installing new nodes when necessary. */
Packit Service fe3200
  while (len--)
Packit Service fe3200
    {
Packit Service fe3200
      label = kwset->trans ? kwset->trans[U(*--text)] : *--text;
Packit Service fe3200
Packit Service fe3200
      /* Descend the tree of outgoing links for this trie node,
Packit Service fe3200
	 looking for the current character and keeping track
Packit Service fe3200
	 of the path followed. */
Packit Service fe3200
      link = trie->links;
Packit Service fe3200
      links[0] = (struct tree *) &trie->links;
Packit Service fe3200
      dirs[0] = L;
Packit Service fe3200
      depth = 1;
Packit Service fe3200
Packit Service fe3200
      while (link && label != link->label)
Packit Service fe3200
	{
Packit Service fe3200
	  links[depth] = link;
Packit Service fe3200
	  if (label < link->label)
Packit Service fe3200
	    dirs[depth++] = L, link = link->llink;
Packit Service fe3200
	  else
Packit Service fe3200
	    dirs[depth++] = R, link = link->rlink;
Packit Service fe3200
	}
Packit Service fe3200
Packit Service fe3200
      /* The current character doesn't have an outgoing link at
Packit Service fe3200
	 this trie node, so build a new trie node and install
Packit Service fe3200
	 a link in the current trie node's tree. */
Packit Service fe3200
      if (!link)
Packit Service fe3200
	{
Packit Service fe3200
	  link = (struct tree *) obstack_alloc(&kwset->obstack,
Packit Service fe3200
					       sizeof (struct tree));
Packit Service fe3200
	  if (!link)
Packit Service fe3200
	    return "memory exhausted";
Packit Service fe3200
	  link->llink = NULL;
Packit Service fe3200
	  link->rlink = NULL;
Packit Service fe3200
	  link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
Packit Service fe3200
						     sizeof (struct trie));
Packit Service fe3200
	  if (!link->trie)
Packit Service fe3200
	    {
Packit Service fe3200
	      obstack_free(&kwset->obstack, link);
Packit Service fe3200
	      return "memory exhausted";
Packit Service fe3200
	    }
Packit Service fe3200
	  link->trie->accepting = 0;
Packit Service fe3200
	  link->trie->links = NULL;
Packit Service fe3200
	  link->trie->parent = trie;
Packit Service fe3200
	  link->trie->next = NULL;
Packit Service fe3200
	  link->trie->fail = NULL;
Packit Service fe3200
	  link->trie->depth = trie->depth + 1;
Packit Service fe3200
	  link->trie->shift = 0;
Packit Service fe3200
	  link->label = label;
Packit Service fe3200
	  link->balance = 0;
Packit Service fe3200
Packit Service fe3200
	  /* Install the new tree node in its parent. */
Packit Service fe3200
	  if (dirs[--depth] == L)
Packit Service fe3200
	    links[depth]->llink = link;
Packit Service fe3200
	  else
Packit Service fe3200
	    links[depth]->rlink = link;
Packit Service fe3200
Packit Service fe3200
	  /* Back up the tree fixing the balance flags. */
Packit Service fe3200
	  while (depth && !links[depth]->balance)
Packit Service fe3200
	    {
Packit Service fe3200
	      if (dirs[depth] == L)
Packit Service fe3200
		--links[depth]->balance;
Packit Service fe3200
	      else
Packit Service fe3200
		++links[depth]->balance;
Packit Service fe3200
	      --depth;
Packit Service fe3200
	    }
Packit Service fe3200
Packit Service fe3200
	  /* Rebalance the tree by pointer rotations if necessary. */
Packit Service fe3200
	  if (depth && ((dirs[depth] == L && --links[depth]->balance)
Packit Service fe3200
			|| (dirs[depth] == R && ++links[depth]->balance)))
Packit Service fe3200
	    {
Packit Service fe3200
	      switch (links[depth]->balance)
Packit Service fe3200
		{
Packit Service fe3200
		case (char) -2:
Packit Service fe3200
		  switch (dirs[depth + 1])
Packit Service fe3200
		    {
Packit Service fe3200
		    case L:
Packit Service fe3200
		      r = links[depth], t = r->llink, rl = t->rlink;
Packit Service fe3200
		      t->rlink = r, r->llink = rl;
Packit Service fe3200
		      t->balance = r->balance = 0;
Packit Service fe3200
		      break;
Packit Service fe3200
		    case R:
Packit Service fe3200
		      r = links[depth], l = r->llink, t = l->rlink;
Packit Service fe3200
		      rl = t->rlink, lr = t->llink;
Packit Service fe3200
		      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
Packit Service fe3200
		      l->balance = t->balance != 1 ? 0 : -1;
Packit Service fe3200
		      r->balance = t->balance != (char) -1 ? 0 : 1;
Packit Service fe3200
		      t->balance = 0;
Packit Service fe3200
		      break;
Packit Service fe3200
		    default:
Packit Service fe3200
		      abort ();
Packit Service fe3200
		    }
Packit Service fe3200
		  break;
Packit Service fe3200
		case 2:
Packit Service fe3200
		  switch (dirs[depth + 1])
Packit Service fe3200
		    {
Packit Service fe3200
		    case R:
Packit Service fe3200
		      l = links[depth], t = l->rlink, lr = t->llink;
Packit Service fe3200
		      t->llink = l, l->rlink = lr;
Packit Service fe3200
		      t->balance = l->balance = 0;
Packit Service fe3200
		      break;
Packit Service fe3200
		    case L:
Packit Service fe3200
		      l = links[depth], r = l->rlink, t = r->llink;
Packit Service fe3200
		      lr = t->llink, rl = t->rlink;
Packit Service fe3200
		      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
Packit Service fe3200
		      l->balance = t->balance != 1 ? 0 : -1;
Packit Service fe3200
		      r->balance = t->balance != (char) -1 ? 0 : 1;
Packit Service fe3200
		      t->balance = 0;
Packit Service fe3200
		      break;
Packit Service fe3200
		    default:
Packit Service fe3200
		      abort ();
Packit Service fe3200
		    }
Packit Service fe3200
		  break;
Packit Service fe3200
		default:
Packit Service fe3200
		  abort ();
Packit Service fe3200
		}
Packit Service fe3200
Packit Service fe3200
	      if (dirs[depth - 1] == L)
Packit Service fe3200
		links[depth - 1]->llink = t;
Packit Service fe3200
	      else
Packit Service fe3200
		links[depth - 1]->rlink = t;
Packit Service fe3200
	    }
Packit Service fe3200
	}
Packit Service fe3200
Packit Service fe3200
      trie = link->trie;
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  /* Mark the node we finally reached as accepting, encoding the
Packit Service fe3200
     index number of this word in the keyword set so far. */
Packit Service fe3200
  if (!trie->accepting)
Packit Service fe3200
    trie->accepting = 1 + 2 * kwset->words;
Packit Service fe3200
  ++kwset->words;
Packit Service fe3200
Packit Service fe3200
  /* Keep track of the longest and shortest string of the keyword set. */
Packit Service fe3200
  if (trie->depth < kwset->mind)
Packit Service fe3200
    kwset->mind = trie->depth;
Packit Service fe3200
  if (trie->depth > kwset->maxd)
Packit Service fe3200
    kwset->maxd = trie->depth;
Packit Service fe3200
Packit Service fe3200
  return NULL;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Enqueue the trie nodes referenced from the given tree in the
Packit Service fe3200
   given queue. */
Packit Service fe3200
static void
Packit Service fe3200
enqueue (struct tree *tree, struct trie **last)
Packit Service fe3200
{
Packit Service fe3200
  if (!tree)
Packit Service fe3200
    return;
Packit Service fe3200
  enqueue(tree->llink, last);
Packit Service fe3200
  enqueue(tree->rlink, last);
Packit Service fe3200
  (*last) = (*last)->next = tree->trie;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Compute the Aho-Corasick failure function for the trie nodes referenced
Packit Service fe3200
   from the given tree, given the failure function for their parent as
Packit Service fe3200
   well as a last resort failure node. */
Packit Service fe3200
static void
Packit Service fe3200
treefails (register struct tree const *tree, struct trie const *fail,
Packit Service fe3200
	   struct trie *recourse)
Packit Service fe3200
{
Packit Service fe3200
  register struct tree *link;
Packit Service fe3200
Packit Service fe3200
  if (!tree)
Packit Service fe3200
    return;
Packit Service fe3200
Packit Service fe3200
  treefails(tree->llink, fail, recourse);
Packit Service fe3200
  treefails(tree->rlink, fail, recourse);
Packit Service fe3200
Packit Service fe3200
  /* Find, in the chain of fails going back to the root, the first
Packit Service fe3200
     node that has a descendant on the current label. */
Packit Service fe3200
  while (fail)
Packit Service fe3200
    {
Packit Service fe3200
      link = fail->links;
Packit Service fe3200
      while (link && tree->label != link->label)
Packit Service fe3200
	if (tree->label < link->label)
Packit Service fe3200
	  link = link->llink;
Packit Service fe3200
	else
Packit Service fe3200
	  link = link->rlink;
Packit Service fe3200
      if (link)
Packit Service fe3200
	{
Packit Service fe3200
	  tree->trie->fail = link->trie;
Packit Service fe3200
	  return;
Packit Service fe3200
	}
Packit Service fe3200
      fail = fail->fail;
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  tree->trie->fail = recourse;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Set delta entries for the links of the given tree such that
Packit Service fe3200
   the preexisting delta value is larger than the current depth. */
Packit Service fe3200
static void
Packit Service fe3200
treedelta (register struct tree const *tree,
Packit Service fe3200
	   register unsigned int depth,
Packit Service fe3200
	   unsigned char delta[])
Packit Service fe3200
{
Packit Service fe3200
  if (!tree)
Packit Service fe3200
    return;
Packit Service fe3200
  treedelta(tree->llink, depth, delta);
Packit Service fe3200
  treedelta(tree->rlink, depth, delta);
Packit Service fe3200
  if (depth < delta[tree->label])
Packit Service fe3200
    delta[tree->label] = depth;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Return true if A has every label in B. */
Packit Service fe3200
static int
Packit Service fe3200
hasevery (register struct tree const *a, register struct tree const *b)
Packit Service fe3200
{
Packit Service fe3200
  if (!b)
Packit Service fe3200
    return 1;
Packit Service fe3200
  if (!hasevery(a, b->llink))
Packit Service fe3200
    return 0;
Packit Service fe3200
  if (!hasevery(a, b->rlink))
Packit Service fe3200
    return 0;
Packit Service fe3200
  while (a && b->label != a->label)
Packit Service fe3200
    if (b->label < a->label)
Packit Service fe3200
      a = a->llink;
Packit Service fe3200
    else
Packit Service fe3200
      a = a->rlink;
Packit Service fe3200
  return !!a;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Compute a vector, indexed by character code, of the trie nodes
Packit Service fe3200
   referenced from the given tree. */
Packit Service fe3200
static void
Packit Service fe3200
treenext (struct tree const *tree, struct trie *next[])
Packit Service fe3200
{
Packit Service fe3200
  if (!tree)
Packit Service fe3200
    return;
Packit Service fe3200
  treenext(tree->llink, next);
Packit Service fe3200
  treenext(tree->rlink, next);
Packit Service fe3200
  next[tree->label] = tree->trie;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Compute the shift for each trie node, as well as the delta
Packit Service fe3200
   table and next cache for the given keyword set. */
Packit Service fe3200
const char *
Packit Service fe3200
kwsprep (kwset_t kws)
Packit Service fe3200
{
Packit Service fe3200
  register struct kwset *kwset;
Packit Service fe3200
  register int i;
Packit Service fe3200
  register struct trie *curr;
Packit Service fe3200
  register unsigned char const *trans;
Packit Service fe3200
  unsigned char delta[NCHAR];
Packit Service fe3200
Packit Service fe3200
  kwset = (struct kwset *) kws;
Packit Service fe3200
Packit Service fe3200
  /* Initial values for the delta table; will be changed later.  The
Packit Service fe3200
     delta entry for a given character is the smallest depth of any
Packit Service fe3200
     node at which an outgoing edge is labeled by that character. */
Packit Service fe3200
  memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR);
Packit Service fe3200
Packit Service fe3200
  /* Check if we can use the simple boyer-moore algorithm, instead
Packit Service fe3200
     of the hairy commentz-walter algorithm. */
Packit Service fe3200
  if (kwset->words == 1 && kwset->trans == NULL)
Packit Service fe3200
    {
Packit Service fe3200
      char c;
Packit Service fe3200
Packit Service fe3200
      /* Looking for just one string.  Extract it from the trie. */
Packit Service fe3200
      kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
Packit Service fe3200
      if (!kwset->target)
Packit Service fe3200
	return "memory exhausted";
Packit Service fe3200
      for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
Packit Service fe3200
	{
Packit Service fe3200
	  kwset->target[i] = curr->links->label;
Packit Service fe3200
	  curr = curr->links->trie;
Packit Service fe3200
	}
Packit Service fe3200
      /* Build the Boyer Moore delta.  Boy that's easy compared to CW. */
Packit Service fe3200
      for (i = 0; i < kwset->mind; ++i)
Packit Service fe3200
	delta[U(kwset->target[i])] = kwset->mind - (i + 1);
Packit Service fe3200
      /* Find the minimal delta2 shift that we might make after
Packit Service fe3200
	 a backwards match has failed. */
Packit Service fe3200
      c = kwset->target[kwset->mind - 1];
Packit Service fe3200
      for (i = kwset->mind - 2; i >= 0; --i)
Packit Service fe3200
	if (kwset->target[i] == c)
Packit Service fe3200
	  break;
Packit Service fe3200
      kwset->mind2 = kwset->mind - (i + 1);
Packit Service fe3200
    }
Packit Service fe3200
  else
Packit Service fe3200
    {
Packit Service fe3200
      register struct trie *fail;
Packit Service fe3200
      struct trie *last, *next[NCHAR];
Packit Service fe3200
Packit Service fe3200
      /* Traverse the nodes of the trie in level order, simultaneously
Packit Service fe3200
	 computing the delta table, failure function, and shift function. */
Packit Service fe3200
      for (curr = last = kwset->trie; curr; curr = curr->next)
Packit Service fe3200
	{
Packit Service fe3200
	  /* Enqueue the immediate descendants in the level order queue. */
Packit Service fe3200
	  enqueue(curr->links, &last);
Packit Service fe3200
Packit Service fe3200
	  curr->shift = kwset->mind;
Packit Service fe3200
	  curr->maxshift = kwset->mind;
Packit Service fe3200
Packit Service fe3200
	  /* Update the delta table for the descendants of this node. */
Packit Service fe3200
	  treedelta(curr->links, curr->depth, delta);
Packit Service fe3200
Packit Service fe3200
	  /* Compute the failure function for the descendants of this node. */
Packit Service fe3200
	  treefails(curr->links, curr->fail, kwset->trie);
Packit Service fe3200
Packit Service fe3200
	  /* Update the shifts at each node in the current node's chain
Packit Service fe3200
	     of fails back to the root. */
Packit Service fe3200
	  for (fail = curr->fail; fail; fail = fail->fail)
Packit Service fe3200
	    {
Packit Service fe3200
	      /* If the current node has some outgoing edge that the fail
Packit Service fe3200
		 doesn't, then the shift at the fail should be no larger
Packit Service fe3200
		 than the difference of their depths. */
Packit Service fe3200
	      if (!hasevery(fail->links, curr->links))
Packit Service fe3200
		if (curr->depth - fail->depth < fail->shift)
Packit Service fe3200
		  fail->shift = curr->depth - fail->depth;
Packit Service fe3200
Packit Service fe3200
	      /* If the current node is accepting then the shift at the
Packit Service fe3200
		 fail and its descendants should be no larger than the
Packit Service fe3200
		 difference of their depths. */
Packit Service fe3200
	      if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
Packit Service fe3200
		fail->maxshift = curr->depth - fail->depth;
Packit Service fe3200
	    }
Packit Service fe3200
	}
Packit Service fe3200
Packit Service fe3200
      /* Traverse the trie in level order again, fixing up all nodes whose
Packit Service fe3200
	 shift exceeds their inherited maxshift. */
Packit Service fe3200
      for (curr = kwset->trie->next; curr; curr = curr->next)
Packit Service fe3200
	{
Packit Service fe3200
	  if (curr->maxshift > curr->parent->maxshift)
Packit Service fe3200
	    curr->maxshift = curr->parent->maxshift;
Packit Service fe3200
	  if (curr->shift > curr->maxshift)
Packit Service fe3200
	    curr->shift = curr->maxshift;
Packit Service fe3200
	}
Packit Service fe3200
Packit Service fe3200
      /* Create a vector, indexed by character code, of the outgoing links
Packit Service fe3200
	 from the root node. */
Packit Service fe3200
      for (i = 0; i < NCHAR; ++i)
Packit Service fe3200
	next[i] = NULL;
Packit Service fe3200
      treenext(kwset->trie->links, next);
Packit Service fe3200
Packit Service fe3200
      if ((trans = kwset->trans) != NULL)
Packit Service fe3200
	for (i = 0; i < NCHAR; ++i)
Packit Service fe3200
	  kwset->next[i] = next[U(trans[i])];
Packit Service fe3200
      else
Packit Service fe3200
	COPY_ARRAY(kwset->next, next, NCHAR);
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  /* Fix things up for any translation table. */
Packit Service fe3200
  if ((trans = kwset->trans) != NULL)
Packit Service fe3200
    for (i = 0; i < NCHAR; ++i)
Packit Service fe3200
      kwset->delta[i] = delta[U(trans[i])];
Packit Service fe3200
  else
Packit Service fe3200
    memcpy(kwset->delta, delta, NCHAR);
Packit Service fe3200
Packit Service fe3200
  return NULL;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Fast boyer-moore search. */
Packit Service fe3200
static size_t
Packit Service fe3200
bmexec (kwset_t kws, char const *text, size_t size)
Packit Service fe3200
{
Packit Service fe3200
  struct kwset const *kwset;
Packit Service fe3200
  register unsigned char const *d1;
Packit Service fe3200
  register char const *ep, *sp, *tp;
Packit Service fe3200
  register int d, gc, i, len, md2;
Packit Service fe3200
Packit Service fe3200
  kwset = (struct kwset const *) kws;
Packit Service fe3200
  len = kwset->mind;
Packit Service fe3200
Packit Service fe3200
  if (len == 0)
Packit Service fe3200
    return 0;
Packit Service fe3200
  if (len > size)
Packit Service fe3200
    return -1;
Packit Service fe3200
  if (len == 1)
Packit Service fe3200
    {
Packit Service fe3200
      tp = memchr (text, kwset->target[0], size);
Packit Service fe3200
      return tp ? tp - text : -1;
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  d1 = kwset->delta;
Packit Service fe3200
  sp = kwset->target + len;
Packit Service fe3200
  gc = U(sp[-2]);
Packit Service fe3200
  md2 = kwset->mind2;
Packit Service fe3200
  tp = text + len;
Packit Service fe3200
Packit Service fe3200
  /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
Packit Service fe3200
  if (size > 12 * len)
Packit Service fe3200
    /* 11 is not a bug, the initial offset happens only once. */
Packit Service fe3200
    for (ep = text + size - 11 * len;;)
Packit Service fe3200
      {
Packit Service fe3200
	while (tp <= ep)
Packit Service fe3200
	  {
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    if (d == 0)
Packit Service fe3200
	      goto found;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    if (d == 0)
Packit Service fe3200
	      goto found;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    if (d == 0)
Packit Service fe3200
	      goto found;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	    d = d1[U(tp[-1])], tp += d;
Packit Service fe3200
	  }
Packit Service fe3200
	break;
Packit Service fe3200
      found:
Packit Service fe3200
	if (U(tp[-2]) == gc)
Packit Service fe3200
	  {
Packit Service fe3200
	    for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
Packit Service fe3200
	      ;
Packit Service fe3200
	    if (i > len)
Packit Service fe3200
	      return tp - len - text;
Packit Service fe3200
	  }
Packit Service fe3200
	tp += md2;
Packit Service fe3200
      }
Packit Service fe3200
Packit Service fe3200
  /* Now we have only a few characters left to search.  We
Packit Service fe3200
     carefully avoid ever producing an out-of-bounds pointer. */
Packit Service fe3200
  ep = text + size;
Packit Service fe3200
  d = d1[U(tp[-1])];
Packit Service fe3200
  while (d <= ep - tp)
Packit Service fe3200
    {
Packit Service fe3200
      d = d1[U((tp += d)[-1])];
Packit Service fe3200
      if (d != 0)
Packit Service fe3200
	continue;
Packit Service fe3200
      if (U(tp[-2]) == gc)
Packit Service fe3200
	{
Packit Service fe3200
	  for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
Packit Service fe3200
	    ;
Packit Service fe3200
	  if (i > len)
Packit Service fe3200
	    return tp - len - text;
Packit Service fe3200
	}
Packit Service fe3200
      d = md2;
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  return -1;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Hairy multiple string search. */
Packit Service fe3200
static size_t
Packit Service fe3200
cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
Packit Service fe3200
{
Packit Service fe3200
  struct kwset const *kwset;
Packit Service fe3200
  struct trie * const *next;
Packit Service fe3200
  struct trie const *trie;
Packit Service fe3200
  struct trie const *accept;
Packit Service fe3200
  char const *beg, *lim, *mch, *lmch;
Packit Service fe3200
  register unsigned char c;
Packit Service fe3200
  register unsigned char const *delta;
Packit Service fe3200
  register int d;
Packit Service fe3200
  register char const *end, *qlim;
Packit Service fe3200
  register struct tree const *tree;
Packit Service fe3200
  register unsigned char const *trans;
Packit Service fe3200
Packit Service fe3200
  accept = NULL;
Packit Service fe3200
Packit Service fe3200
  /* Initialize register copies and look for easy ways out. */
Packit Service fe3200
  kwset = (struct kwset *) kws;
Packit Service fe3200
  if (len < kwset->mind)
Packit Service fe3200
    return -1;
Packit Service fe3200
  next = kwset->next;
Packit Service fe3200
  delta = kwset->delta;
Packit Service fe3200
  trans = kwset->trans;
Packit Service fe3200
  lim = text + len;
Packit Service fe3200
  end = text;
Packit Service fe3200
  if ((d = kwset->mind) != 0)
Packit Service fe3200
    mch = NULL;
Packit Service fe3200
  else
Packit Service fe3200
    {
Packit Service fe3200
      mch = text, accept = kwset->trie;
Packit Service fe3200
      goto match;
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  if (len >= 4 * kwset->mind)
Packit Service fe3200
    qlim = lim - 4 * kwset->mind;
Packit Service fe3200
  else
Packit Service fe3200
    qlim = NULL;
Packit Service fe3200
Packit Service fe3200
  while (lim - end >= d)
Packit Service fe3200
    {
Packit Service fe3200
      if (qlim && end <= qlim)
Packit Service fe3200
	{
Packit Service fe3200
	  end += d - 1;
Packit Service fe3200
	  while ((d = delta[c = *end]) && end < qlim)
Packit Service fe3200
	    {
Packit Service fe3200
	      end += d;
Packit Service fe3200
	      end += delta[U(*end)];
Packit Service fe3200
	      end += delta[U(*end)];
Packit Service fe3200
	    }
Packit Service fe3200
	  ++end;
Packit Service fe3200
	}
Packit Service fe3200
      else
Packit Service fe3200
	d = delta[c = (end += d)[-1]];
Packit Service fe3200
      if (d)
Packit Service fe3200
	continue;
Packit Service fe3200
      beg = end - 1;
Packit Service fe3200
      trie = next[c];
Packit Service fe3200
      if (trie->accepting)
Packit Service fe3200
	{
Packit Service fe3200
	  mch = beg;
Packit Service fe3200
	  accept = trie;
Packit Service fe3200
	}
Packit Service fe3200
      d = trie->shift;
Packit Service fe3200
      while (beg > text)
Packit Service fe3200
	{
Packit Service fe3200
	  c = trans ? trans[U(*--beg)] : *--beg;
Packit Service fe3200
	  tree = trie->links;
Packit Service fe3200
	  while (tree && c != tree->label)
Packit Service fe3200
	    if (c < tree->label)
Packit Service fe3200
	      tree = tree->llink;
Packit Service fe3200
	    else
Packit Service fe3200
	      tree = tree->rlink;
Packit Service fe3200
	  if (tree)
Packit Service fe3200
	    {
Packit Service fe3200
	      trie = tree->trie;
Packit Service fe3200
	      if (trie->accepting)
Packit Service fe3200
		{
Packit Service fe3200
		  mch = beg;
Packit Service fe3200
		  accept = trie;
Packit Service fe3200
		}
Packit Service fe3200
	    }
Packit Service fe3200
	  else
Packit Service fe3200
	    break;
Packit Service fe3200
	  d = trie->shift;
Packit Service fe3200
	}
Packit Service fe3200
      if (mch)
Packit Service fe3200
	goto match;
Packit Service fe3200
    }
Packit Service fe3200
  return -1;
Packit Service fe3200
Packit Service fe3200
 match:
Packit Service fe3200
  /* Given a known match, find the longest possible match anchored
Packit Service fe3200
     at or before its starting point.  This is nearly a verbatim
Packit Service fe3200
     copy of the preceding main search loops. */
Packit Service fe3200
  if (lim - mch > kwset->maxd)
Packit Service fe3200
    lim = mch + kwset->maxd;
Packit Service fe3200
  lmch = NULL;
Packit Service fe3200
  d = 1;
Packit Service fe3200
  while (lim - end >= d)
Packit Service fe3200
    {
Packit Service fe3200
      if ((d = delta[c = (end += d)[-1]]) != 0)
Packit Service fe3200
	continue;
Packit Service fe3200
      beg = end - 1;
Packit Service fe3200
      if (!(trie = next[c]))
Packit Service fe3200
	{
Packit Service fe3200
	  d = 1;
Packit Service fe3200
	  continue;
Packit Service fe3200
	}
Packit Service fe3200
      if (trie->accepting && beg <= mch)
Packit Service fe3200
	{
Packit Service fe3200
	  lmch = beg;
Packit Service fe3200
	  accept = trie;
Packit Service fe3200
	}
Packit Service fe3200
      d = trie->shift;
Packit Service fe3200
      while (beg > text)
Packit Service fe3200
	{
Packit Service fe3200
	  c = trans ? trans[U(*--beg)] : *--beg;
Packit Service fe3200
	  tree = trie->links;
Packit Service fe3200
	  while (tree && c != tree->label)
Packit Service fe3200
	    if (c < tree->label)
Packit Service fe3200
	      tree = tree->llink;
Packit Service fe3200
	    else
Packit Service fe3200
	      tree = tree->rlink;
Packit Service fe3200
	  if (tree)
Packit Service fe3200
	    {
Packit Service fe3200
	      trie = tree->trie;
Packit Service fe3200
	      if (trie->accepting && beg <= mch)
Packit Service fe3200
		{
Packit Service fe3200
		  lmch = beg;
Packit Service fe3200
		  accept = trie;
Packit Service fe3200
		}
Packit Service fe3200
	    }
Packit Service fe3200
	  else
Packit Service fe3200
	    break;
Packit Service fe3200
	  d = trie->shift;
Packit Service fe3200
	}
Packit Service fe3200
      if (lmch)
Packit Service fe3200
	{
Packit Service fe3200
	  mch = lmch;
Packit Service fe3200
	  goto match;
Packit Service fe3200
	}
Packit Service fe3200
      if (!d)
Packit Service fe3200
	d = 1;
Packit Service fe3200
    }
Packit Service fe3200
Packit Service fe3200
  if (kwsmatch)
Packit Service fe3200
    {
Packit Service fe3200
      kwsmatch->index = accept->accepting / 2;
Packit Service fe3200
      kwsmatch->offset[0] = mch - text;
Packit Service fe3200
      kwsmatch->size[0] = accept->depth;
Packit Service fe3200
    }
Packit Service fe3200
  return mch - text;
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Search through the given text for a match of any member of the
Packit Service fe3200
   given keyword set.  Return a pointer to the first character of
Packit Service fe3200
   the matching substring, or NULL if no match is found.  If FOUNDLEN
Packit Service fe3200
   is non-NULL store in the referenced location the length of the
Packit Service fe3200
   matching substring.  Similarly, if FOUNDIDX is non-NULL, store
Packit Service fe3200
   in the referenced location the index number of the particular
Packit Service fe3200
   keyword matched. */
Packit Service fe3200
size_t
Packit Service fe3200
kwsexec (kwset_t kws, char const *text, size_t size,
Packit Service fe3200
	 struct kwsmatch *kwsmatch)
Packit Service fe3200
{
Packit Service fe3200
  struct kwset const *kwset = (struct kwset *) kws;
Packit Service fe3200
  if (kwset->words == 1 && kwset->trans == NULL)
Packit Service fe3200
    {
Packit Service fe3200
      size_t ret = bmexec (kws, text, size);
Packit Service fe3200
      if (kwsmatch != NULL && ret != (size_t) -1)
Packit Service fe3200
	{
Packit Service fe3200
	  kwsmatch->index = 0;
Packit Service fe3200
	  kwsmatch->offset[0] = ret;
Packit Service fe3200
	  kwsmatch->size[0] = kwset->mind;
Packit Service fe3200
	}
Packit Service fe3200
      return ret;
Packit Service fe3200
    }
Packit Service fe3200
  else
Packit Service fe3200
    return cwexec(kws, text, size, kwsmatch);
Packit Service fe3200
}
Packit Service fe3200
Packit Service fe3200
/* Free the components of the given keyword set. */
Packit Service fe3200
void
Packit Service fe3200
kwsfree (kwset_t kws)
Packit Service fe3200
{
Packit Service fe3200
  struct kwset *kwset;
Packit Service fe3200
Packit Service fe3200
  kwset = (struct kwset *) kws;
Packit Service fe3200
  obstack_free(&kwset->obstack, NULL);
Packit Service fe3200
  free(kws);
Packit Service fe3200
}