Blob Blame History Raw
/* @(#)match.c	1.20 03/10/22 Copyright 1985, 1995-2003 J. Schilling */
#include <standard.h>
#include <patmatch.h>
/*
 *	Pattern matching functions
 *
 *	Copyright (c) 1985, 1995-2003 J. Schilling
 */
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; see the file COPYING.  If not, write to the Free Software
 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
/*
 *	The pattern matching functions below are based on the algorithm
 *	presented by Martin Richards in:
 *
 *	"A Compact Function for Regular Expression Pattern Matching",
 *	Software-Practice and Experience, Vol. 9, 527-534 (1979)
 *
 *	Several changes have been made to the original source which has been
 *	written in BCPL:
 *
 *	'/'	is replaced by	'!'		(to allow UNIX filenames)
 *	'(',')' are replaced by	'{', '}'
 *	'\''	is replaced by	'\\'		(UNIX compatible quote)
 *
 *	Character classes have been added to allow "[<character list>]"
 *	to be used.
 *	Start of line '^' and end of line '$' have been added.
 */

#ifdef	__LINE_MATCH
#define	opatmatch	opatlmatch
#define	patmatch	patlmatch
#endif

#define	ENDSTATE	(-1)
typedef	unsigned char	Uchar;

/*---------------------------------------------------------------------------
|
|	The Interpreter
|
+---------------------------------------------------------------------------*/


/*
 *	put adds a new state to the active list
 */
#define	 put(ret, state, sp, n)	{		\
	register int *lstate	= state;	\
	register int *lsp	= sp;		\
	register int ln		= n;		\
						\
	while (lstate < lsp) {			\
		if (*lstate++ == ln) {		\
			ret = lsp;		\
			lsp = 0;		\
			break;			\
		}				\
	}					\
	if (lsp) {				\
		*lstate++ = ln;			\
		ret = lstate;			\
	}					\
}


/*
 *	match a character in class
 */
#define	in_class(found, pat, c)	{			\
	register const Uchar	*lpat	= pat;		\
	register int		lc	= c;		\
	int	lo_bound;				\
	int	hi_bound;				\
							\
	found = FALSE;					\
							\
	if (*lpat == NOT) {				\
		lpat++;					\
		found = TRUE;				\
	}						\
	while (*lpat != RCLASS) {			\
		if (*lpat == QUOTE)			\
			lpat++;				\
		lo_bound = *lpat++;			\
		if (*lpat == RANGE) {			\
			lpat++;				\
			if (*lpat == QUOTE)		\
				lpat++;			\
			hi_bound = *lpat++;		\
		} else {				\
			hi_bound = lo_bound;		\
		}					\
		if (lo_bound <= lc && lc <= hi_bound) {	\
			found = !found;			\
			break;				\
		}					\
	}						\
}

/*
 *	opatmatch - the old external interpreter interface.
 *
 *	Trys to match a string beginning at offset
 *	against the compiled pattern.
 */
EXPORT Uchar
*opatmatch(pat, aux, str, soff, slen, alt)
	const Uchar	*pat;
	const int	*aux;
	const Uchar	*str;
	int		soff;
	int		slen;
	int		alt;
{
	int		state[MAXPAT];

	return (patmatch(pat, aux, str, soff, slen, alt, state));
}

/*
 *	patmatch - the external interpreter interface.
 *
 *	Trys to match a string beginning at offset
 *	against the compiled pattern.
 */
EXPORT Uchar *
patmatch(pat, aux, str, soff, slen, alt, state)
	const Uchar	*pat;
	const int	*aux;
	const Uchar	*str;
	int		soff;
	int		slen;
	int		alt;
	int		state[];
{
	register int	*sp;
	register int	*n;
	register int	*i;
	register int	p;
	register int	q, s, k;
	int		c;
	const Uchar	*lastp = (Uchar *)NULL;

#ifdef	__LINE_MATCH
for (; soff <= slen; soff++) {
#endif

	sp = state;
	put(sp, state, state, 0);
	if (alt != ENDSTATE)
		put(sp, state, sp, alt);

	for (s = soff; ; s++) {
		/*
		 * next char from input string
		 */
		if (s >= slen)
			c = 0;
		else
			c = str[s];
		/*
		 * first complete the closure
		 */
		for (n = state; n < sp; ) {
			p = *n++;		/* next state number */
			if (p == ENDSTATE)
				continue;
			q = aux[p];		/* get next state for pat */
			k = pat[p];		/* get next char from pat */
			switch (k) {

			case REP:
				put(sp, state, sp, p+1);
			case NIL:		/* NIL matches always */
			case STAR:
				put(sp, state, sp, q);
				break;
			case LBRACK:		/* alternations */
			case ALT:
				put(sp, state, sp, p+1);
				if (q != ENDSTATE)
					put(sp, state, sp, q);
				break;
			case START:
				if (s == 0)
					put(sp, state, sp, q);
				break;
			case END:
				if (c == '\0')
					put(sp, state, sp, q);
				break;
			}
		}

		for (i = state; i < sp; ) {
			if (*i++ == ENDSTATE) {
				lastp = &str[s];
				break;
			}
		}
		if (c == 0)
			return ((Uchar *)lastp);

		/*
		 * now try to match next character
		 */
		n = sp;
		sp = state;
		for (i = sp; i < n; ) {
			p = *i++;		/* next active state number */
			if (p == ENDSTATE)
				continue;
			k = pat[p];
			switch (k) {

			case ALT:
			case REP:
			case NIL:
			case LBRACK:
			case START:
			case END:
				continue;
			case LCLASS:
				in_class(q, &pat[p+1], c);
				if (!q)
					continue;
				break;
			case STAR:
				put(sp, state, sp, p);
				continue;
			case QUOTE:
				k = pat[p+1];
			default:
				if (k != c)
					continue;
			case ANY:
				break;
			}
			put(sp, state, sp, aux[p]);
		}
		if (sp == state) {		/* if no new states return */
#ifdef	__LINE_MATCH

			if (lastp || (soff == slen - 1))
				return ((Uchar *)lastp);
			else
				break;
#else
			return ((Uchar *)lastp);
#endif
		}
	}
#ifdef	__LINE_MATCH
}
return ((Uchar *)lastp);
#endif
}


#ifndef	__LINE_MATCH
/*---------------------------------------------------------------------------
|
|	The Compiler
|
+---------------------------------------------------------------------------*/

typedef	struct args {
	const Uchar	*pattern;
	int		*aux;
	int		patp;
	int		length;
	Uchar		Ch;
} arg_t;

LOCAL	void	nextitem __PR((arg_t *));
LOCAL	int	prim	 __PR((arg_t *));
LOCAL	int	expr	 __PR((arg_t *, int *));
LOCAL	void	setexits __PR((int *, int, int));
LOCAL	int	join	 __PR((int *, int, int));

/*
 *	'read' the next character from pattern
 */
#define	rch(ap)						\
{							\
	if (++(ap)->patp >= (ap)->length)		\
		(ap)->Ch = 0;				\
	else						\
		(ap)->Ch = (ap)->pattern[(ap)->patp];	\
}

/*
 *	get the next item from pattern
 */
LOCAL void
nextitem(ap)
	arg_t	*ap;
{
	if (ap->Ch == QUOTE)
		rch(ap);
	rch(ap);
}

/*
 *	parse a primary
 */
LOCAL int
prim(ap)
	arg_t	*ap;
{
	int	a  = ap->patp;
	int	op = ap->Ch;
	int	t;

	nextitem(ap);
	switch (op) {

	case '\0':
	case ALT:
	case RBRACK:
		return (ENDSTATE);
	case LCLASS:
		while (ap->Ch != RCLASS && ap->Ch != '\0')
			nextitem(ap);
		if (ap->Ch == '\0')
			return (ENDSTATE);
		nextitem(ap);
		break;
	case REP:
		t = prim(ap);
		if (t == ENDSTATE)
			return (ENDSTATE);
		setexits(ap->aux, t, a);
		break;
	case LBRACK:
		a = expr(ap, &ap->aux[a]);
		if (a == ENDSTATE || ap->Ch != RBRACK)
			return (ENDSTATE);
		nextitem(ap);
		break;
	}
	return (a);
}

/*
 *	parse an expression (a sequence of primaries)
 */
LOCAL int
expr(ap, altp)
	arg_t	*ap;
	int	*altp;
{
	int	exits = ENDSTATE;
	int	a;
	int	*aux = ap->aux;
	Uchar	Ch;

	for (;;) {
		a = prim(ap);
		Ch = ap->Ch;
		if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
			exits = join(aux, exits, a);
			if (Ch != ALT)
				return (exits);
			*altp = ap->patp;
			altp = &aux[ap->patp];
			nextitem(ap);
		} else
			setexits(aux, a, ap->patp);
	}
}

/*
 *	set all exits in a list to a specified value
 */
LOCAL void
setexits(aux, list, val)
	int	*aux;
	int	list;
	int	val;
{
	int	a;

	while (list != ENDSTATE) {
		a = aux[list];
		aux[list] = val;
		list = a;
	}
}

/*
 *	concatenate two lists
 */
LOCAL int
join(aux, a, b)
	int	*aux;
	int	a;
	int	b;
{
	int	t;

	if (a == ENDSTATE)
		return (b);
	t = a;
	while (aux[t] != ENDSTATE)
		t = aux[t];
	aux[t] = b;
	return (a);
}

/*
 *	patcompile - the external compiler interface.
 *
 *	The pattern is compiled into the aux array.
 *	Return value on success, is the outermost alternate which is != 0.
 *	Error is indicated by return of 0.
 */
EXPORT int
patcompile(pat, len, aux)
	const Uchar	*pat;
	int		len;
	int		*aux;
{
	arg_t	a;
	int	alt = ENDSTATE;
	int	i;

	a.pattern = pat;
	a.length  = len;
	a.aux	  = aux;
	a.patp    = -1;

	for (i = 0; i < len; i++)
		aux[i] = ENDSTATE;
	rch(&a);
	i = expr(&a, &alt);
	if (i == ENDSTATE)
		return (0);
	setexits(aux, i, ENDSTATE);
	return (alt);
}
#endif	/* LMATCH */