Blob Blame History Raw
/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1985-2012 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                 Eclipse Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*          http://www.eclipse.org/org/documents/epl-v10.html           *
*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#pragma prototyped

/*
 * posix regex compiler
 */

#include "reglib.h"

#if _PACKAGE_ast
#include "lclib.h"
#endif

#define serialize		re_serialize	/* hp.ia64 <unistd.h>! */

#define C_ESC			(-1)
#define C_MB			(-2)

#if _AST_REGEX_DEBUG

#define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
#define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
#define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_comp_debug")) debug |= strtoul(t, NiL, 0); } } while (0)

static unsigned long	debug;
static unsigned long	debug_flag;

#else

#define DEBUG_INIT()
#define DEBUG_TEST(f,y,n)	(n)
#define DEBUG_CODE(f,y,n)	do {n} while(0)

#endif

#if _PACKAGE_ast

typedef struct Cchr_s
{
	Dtlink_t	lnk;
	unsigned char	nam[2];
	Ckey_t		key;
} Cchr_t;

#endif

#define eat(p)		do{if ((p)->token.push)(p)->token.push=0;else (p)->cursor+=(p)->token.len;}while (0)

/*
 * determine whether greedy matching will work, i.e. produce
 * the best match first.  such expressions are "easy", and
 * need no backtracking once a complete match is found.  
 * if an expression has backreferences or alts it's hard
 * else if it has only one closure it's easy
 * else if all closures are simple (i.e. one-character) it's easy
 * else it's hard.
 */

typedef struct Stats_s
{
	unsigned long	l;	/* min length to left of x		*/
	unsigned long	k;	/* min length to left of y		*/
	unsigned long	m;	/* min length				*/
	unsigned long	n;	/* max length				*/
	unsigned short	a;	/* number of alternations		*/
	unsigned short	b;	/* number of backrefs			*/
	unsigned short	c;	/* number of closures			*/
	unsigned short	e;	/* $					*/
	unsigned short	i;	/* number of negations			*/
	unsigned short	p;	/* number of named subexpressions	*/
	unsigned short	s;	/* number of simple closures		*/
	unsigned short	t;	/* number of tries			*/
	unsigned short	u;	/* number of unnamed subexpressions	*/
	Rex_t*		x;	/* max length REX_STRING		*/
	Rex_t*		y;	/* max length REX_TRIE			*/
} Stats_t;

typedef struct Token_s
{
	unsigned long	min;
	unsigned long	max;
	short		lex;
	short		len;
	short		esc;
	short		att;
	short		push;
} Token_t;

typedef struct Cenv_s
{
	int		delimiter;	/* pattern delimiter		*/
	int		error;		/* last error			*/
	int		explicit;	/* explicit match on this char	*/
	int		mappeddot;	/* inverse mapped '.'		*/
	int		mappednewline;	/* inverse mapped '\n'		*/
	int		mappedslash;	/* inverse mapped '/'		*/
	regflags_t	flags;		/* flags arg to regcomp		*/
	int		type;		/* BRE,ERE,ARE,SRE,KRE		*/
	unsigned char*	cursor;		/* curent point in re		*/
	unsigned char*	pattern;	/* the original pattern		*/
	unsigned char*	literal;	/* literal restart pattern	*/
	int		parno;		/* number of last open paren	*/
	int		parnest;	/* paren nest count		*/
	int		posixkludge; 	/* to make * nonspecial		*/
	Token_t		token;		/* token lookahead		*/
	Stats_t		stats;		/* RE statistics		*/
	int		terminator;	/* pattern terminator		*/
	Rex_t*		paren[2*(BACK_REF_MAX+2)];
					/* paren[i]!=0 if \i defined	*/
	regex_t*	regex;		/* user handle			*/
	regdisc_t*	disc;		/* user discipline		*/
	unsigned char*	map;		/* external to native ccode map	*/
	unsigned char*	MAP;		/* fold and/or map		*/
} Cenv_t;

/*
 * allocate a new Rex_t node
 */

#if _BLD_DEBUG
#define node(a,b,c,d,e)	node(a,b,c,d,e, const char* file, unsigned int line)
#endif

static Rex_t*
node(Cenv_t* env, int type, int lo, int hi, size_t extra)
{
	register Rex_t*	e;

	DEBUG_TEST(0x0800,(sfprintf(sfstdout, "%s:%d node(%d,%d,%d,%u)\n", file, line, type, lo, hi, sizeof(Rex_t) + extra)),(0));
	if (e = (Rex_t*)alloc(env->disc, 0, sizeof(Rex_t) + extra))
	{
		memset(e, 0, sizeof(Rex_t) + extra);
		e->type = type;
		e->marked = 0;
		e->lo = lo;
		e->hi = hi;
		e->flags = env->flags;
		e->map = (e->flags & REG_ICASE) ? env->MAP : env->map;
		e->explicit = env->explicit;
		if (extra)
			e->re.data = (char*)e + sizeof(Rex_t);
	}
	return e;
}

#if _BLD_DEBUG
#undef	node
#define node(a,b,c,d,e)	node(a,b,c,d,e,__FILE__,__LINE__)
#endif

/*
 * free a Trie_node_t node
 */

static void
triedrop(regdisc_t* disc, Trie_node_t* e)
{
	if (e)
	{
		triedrop(disc, e->son);
		triedrop(disc, e->sib);
		alloc(disc, e, 0);
	}
}

/*
 * free a Rex_t node
 */

void
drop(regdisc_t* disc, Rex_t* e)
{
	int	i;
	Rex_t*	x;

	if (e && !(disc->re_flags & REG_NOFREE))
		do
		{
			switch (e->type)
			{
			case REX_ALT:
			case REX_CONJ:
				drop(disc, e->re.group.expr.binary.left);
				drop(disc, e->re.group.expr.binary.right);
				break;
			case REX_GROUP:
			case REX_GROUP_AHEAD:
			case REX_GROUP_AHEAD_NOT:
			case REX_GROUP_BEHIND:
			case REX_GROUP_BEHIND_NOT:
			case REX_GROUP_CUT:
			case REX_NEG:
			case REX_REP:
				drop(disc, e->re.group.expr.rex);
				break;
			case REX_TRIE:
				for (i = 0; i <= UCHAR_MAX; i++)
					triedrop(disc, e->re.trie.root[i]);
				break;
			}
			x = e->next;
			alloc(disc, e, 0);
		} while (e = x);
}

/*
 * mark e and descendants minimal
 */

static void
mark(register Rex_t* e, int set)
{
	if (e && !e->marked)
		do
		{
			e->marked = 1;
			if (set)
				e->flags |= REG_MINIMAL;
			else
				e->flags &= ~REG_MINIMAL;
			switch (e->type)
			{
			case REX_ALT:
			case REX_CONJ:
			case REX_GROUP_COND:
				if (e->re.group.expr.binary.left)
					mark(e->re.group.expr.binary.left, set);
				if (e->re.group.expr.binary.right)
					mark(e->re.group.expr.binary.right, set);
				break;
			case REX_GROUP:
			case REX_GROUP_AHEAD:
			case REX_GROUP_AHEAD_NOT:
			case REX_GROUP_BEHIND:
			case REX_GROUP_BEHIND_NOT:
			case REX_GROUP_CUT:
			case REX_NEG:
			case REX_REP:
			case REX_TRIE:
				mark(e->re.group.expr.rex, set);
				break;
			}
		} while (e = e->next);
}

/*
 * assign subexpression numbers by a preorder tree walk
 */

static int
serialize(Cenv_t* env, Rex_t* e, int n)
{
	do
	{
		e->serial = n++;
		switch (e->type)
		{
		case REX_ALT:
		case REX_GROUP_COND:
			if (e->re.group.expr.binary.left)
				n = serialize(env, e->re.group.expr.binary.left, n);
			e->re.group.expr.binary.serial = n++;
			if (e->re.group.expr.binary.right)
				n = serialize(env, e->re.group.expr.binary.right, n);
			break;
		case REX_CONJ:
			n = serialize(env, e->re.group.expr.binary.left, n);
			n = serialize(env, e->re.group.expr.binary.right, n);
			break;
		case REX_GROUP:
		case REX_GROUP_AHEAD:
		case REX_GROUP_AHEAD_NOT:
		case REX_GROUP_BEHIND:
		case REX_GROUP_BEHIND_NOT:
		case REX_GROUP_CUT:
		case REX_NEG:
		case REX_REP:
			n = serialize(env, e->re.group.expr.rex, n);
			break;
		}
	} while (e = e->next);
	return n;
}

/*
 * catenate e and f into a sequence, collapsing them if possible
 */

static Rex_t*
cat(Cenv_t* env, Rex_t* e, Rex_t* f)
{
	Rex_t*	g;

	if (!f)
	{
		drop(env->disc, e);
		return 0;
	}
	if (e->type == REX_NULL)
	{
		drop(env->disc, e);
		return f;
	}
	if (f->type == REX_NULL)
	{
		g = f->next;
		f->next = 0;
		drop(env->disc, f);
		f = g;
	}
	else if (e->type == REX_DOT && f->type == REX_DOT)
	{
		unsigned int	m = e->lo + f->lo;
		unsigned int	n = e->hi + f->hi;

		if (m <= RE_DUP_MAX)
		{
			if (e->hi > RE_DUP_MAX || f->hi > RE_DUP_MAX)
			{
				n = RE_DUP_INF;	
				goto combine;
			}
			else if (n <= RE_DUP_MAX)
			{
			combine:
				e->lo = m;
				e->hi = n;
				g = f->next;
				f->next = 0;
				drop(env->disc, f);
				f = g;
			}
		}
	}
	e->next = f;
	return e;
}

/*
 * collect re statistics
 */

static int
stats(register Cenv_t* env, register Rex_t* e)
{
	register unsigned long	n;
	register unsigned long	m;
	unsigned long		cm;
	unsigned long		nm;
	unsigned long		cn;
	unsigned long		nn;
	unsigned long		l;
	unsigned long		k;
	unsigned long		t;
	Rex_t*			q;
	Rex_t*			x;
	Rex_t*			y;
	unsigned char		c;
	unsigned char		b;

	do
	{
		switch (e->type)
		{
		case REX_ALT:
			x = env->stats.x;
			l = env->stats.l;
			y = env->stats.y;
			k = env->stats.k;
			t = env->stats.t;
			if (++env->stats.a <= 0)
				return 1;
			cm = env->stats.m;
			env->stats.m = 0;
			cn = env->stats.n;
			env->stats.n = 0;
			if (stats(env, e->re.group.expr.binary.left))
				return 1;
			m = env->stats.m;
			env->stats.m = 0;
			n = env->stats.n;
			env->stats.n = 0;
			if (e->re.group.expr.binary.right && stats(env, e->re.group.expr.binary.right))
				return 1;
			if (env->stats.m > m)
				env->stats.m = m;
			else
				m = env->stats.m;
			if ((env->stats.m += cm) < m)
				return 1;
			if (env->stats.n < n)
				env->stats.n = n;
			else
				n = env->stats.n;
			if ((env->stats.n += cn) < n)
				return 1;
			env->stats.x = x;
			env->stats.l = l;
			env->stats.y = y;
			env->stats.k = k;
			env->stats.t = t;
			break;
		case REX_BACK:
			if (++env->stats.b <= 0)
				return 1;
			break;
		case REX_CLASS:
		case REX_COLL_CLASS:
		case REX_DOT:
		case REX_ONECHAR:
			n = env->stats.m;
			if ((env->stats.m += e->lo) < n)
				return 1;
			if (e->hi != RE_DUP_INF)
			{
				n = env->stats.n;
				if ((env->stats.n += e->hi) < n)
					return 1;
			}
			if (e->lo != e->hi)
			{
				if (++env->stats.c <= 0)
					return 1;
				if (++env->stats.s <= 0)
					return 1;
			}
			break;
		case REX_CONJ:
			cm = env->stats.m;
			env->stats.m = 0;
			cn = env->stats.n;
			env->stats.n = 0;
			if (stats(env, e->re.group.expr.binary.left))
				return 1;
			nm = env->stats.m;
			env->stats.m = 0;
			nn = env->stats.n;
			env->stats.n = 0;
			if (stats(env, e->re.group.expr.binary.right))
				return 1;
			if (env->stats.m < nm)
				env->stats.m = nm;
			else
				nm = env->stats.m;
			if ((env->stats.m += cm) < nm)
				return 1;
			if (env->stats.n < nn)
				env->stats.n = nn;
			else
				nn = env->stats.n;
			if ((env->stats.n += cn) < nn)
				return 1;
			break;
		case REX_END:
			env->stats.e = 1;
			break;
		case REX_GROUP:
			if (e->re.group.number && ++env->stats.p <= 0 || !e->re.group.number && ++env->stats.u <= 0)
				return 1;
			if (stats(env, e->re.group.expr.rex))
				return 1;
			break;
		case REX_GROUP_AHEAD:
		case REX_GROUP_AHEAD_NOT:
		case REX_GROUP_BEHIND:
		case REX_GROUP_BEHIND_NOT:
			m = env->stats.m;
			n = env->stats.n;
			x = env->stats.x;
			y = env->stats.y;
			if (stats(env, e->re.group.expr.rex))
				return 1;
			env->stats.m = m;
			env->stats.n = n;
			env->stats.x = x;
			env->stats.y = y;
			switch (e->type)
			{
			case REX_GROUP_AHEAD:
			case REX_GROUP_BEHIND:
				if (++env->stats.u <= 0)
					return 1;
				break;
			}
			break;
		case REX_GROUP_COND:
			if (++env->stats.u <= 0)
				return 1;
			m = env->stats.m;
			n = env->stats.n;
			x = env->stats.x;
			y = env->stats.y;
			if (e->re.group.size > 0 && ++env->stats.b <= 0)
				return 1;
			if (e->re.group.expr.binary.left && stats(env, e->re.group.expr.binary.left))
				return 1;
			if (q = e->re.group.expr.binary.right)
			{
				if (q->re.group.expr.binary.left && stats(env, q->re.group.expr.binary.left))
					return 1;
				if (q->re.group.expr.binary.right && stats(env, q->re.group.expr.binary.right))
					return 1;
			}
			env->stats.m = m;
			env->stats.n = n;
			env->stats.x = x;
			env->stats.y = y;
			break;
		case REX_GROUP_CUT:
			if (++env->stats.u <= 0)
				return 1;
			m = env->stats.m;
			n = env->stats.n;
			x = env->stats.x;
			y = env->stats.y;
			if (stats(env, e->re.group.expr.rex))
				return 1;
			env->stats.m = m;
			env->stats.n = n;
			env->stats.x = x;
			env->stats.y = y;
			break;
		case REX_NEG:
			env->stats.i++;
			x = env->stats.x;
			l = env->stats.l;
			y = env->stats.y;
			k = env->stats.k;
			t = env->stats.t;
			cm = env->stats.m;
			env->stats.m = 0;
			if (stats(env, e->re.group.expr.rex))
				return 1;
			env->stats.m = !env->stats.m;
			if ((env->stats.m += cm) < cm)
				return 1;
			env->stats.x = x;
			env->stats.l = l;
			env->stats.y = y;
			env->stats.k = k;
			env->stats.t = t;
			break;
		case REX_REP:
			x = env->stats.x;
			l = env->stats.l;
			y = env->stats.y;
			k = env->stats.k;
			t = env->stats.t;
			if (++env->stats.c <= 0)
				return 1;
			b = env->stats.b;
			c = env->stats.c;
			cm = env->stats.m;
			env->stats.m = 0;
			if (stats(env, e->re.group.expr.rex))
				return 1;
			if (env->stats.m == 1 && b == env->stats.b && c == env->stats.c && ++env->stats.s <= 0)
				return 1;
			if (e->lo < 1)
			{
				env->stats.x = x;
				env->stats.l = l;
				env->stats.y = y;
				env->stats.k = k;
				env->stats.t = t;
				env->stats.m = cm;
			}
			else
			{
				m = env->stats.m;
				if ((env->stats.m *= e->lo) > 0 && env->stats.m < m)
					return 1;
				m = env->stats.m;
				if ((env->stats.m += cm) < m)
					return 1;
				if (env->stats.x != x)
					env->stats.l = cm;
				if (env->stats.y != y)
					env->stats.k = cm;
			}
			break;
		case REX_STRING:
			if (!e->map)
			{
				cm = env->stats.m;
				if ((env->stats.m += e->re.string.size) < cm)
					return 1;
				cn = env->stats.n;
				if ((env->stats.n += e->re.string.size) < cn)
					return 1;
				if (!env->stats.x || env->stats.x->re.string.size < e->re.string.size)
				{
					env->stats.x = e;
					env->stats.l = cm;
				}
			}
			break;
		case REX_TRIE:
			if (++env->stats.s <= 0)
				return 1;
			cm = env->stats.m;
			if ((env->stats.m += e->re.trie.min) < cm)
				return 1;
			cn = env->stats.n;
			if ((env->stats.n += e->re.trie.max) < cn)
				return 1;
			env->stats.t++;
			if (!env->stats.y || env->stats.y->re.trie.min < e->re.trie.min)
			{
				env->stats.y = e;
				env->stats.k = cm;
			}
			break;
		}
	} while (e = e->next);
	return 0;
}

static int	token(Cenv_t*);

static int
magic(register Cenv_t* env, register int c, int escaped)
{
	register char*	sp;
	register int	n;
	int		o = c;
	int		e = env->error;
	int		l = env->token.len;
	short*		mp;
	char*		ep;

	if (mp = state.magic[c])
	{
		c = mp[env->type+escaped];
		if (c >= T_META)
		{
			sp = (char*)env->cursor + env->token.len;
			switch (c)
			{
			case T_LEFT:
				n = 0;
				ep = sp;
				while (*sp >= '0' && *sp <= '9')
				{
					if (n > (INT_MAX / 10))
					{
						env->error = REG_BADBR;
						goto bad;
					}
					n = n * 10 + *sp++ - '0';
				}
				if (sp == ep)
				{
					if (env->type < SRE || *sp != ',')
					{
						env->error = *sp ? REG_BADBR : REG_EBRACE;
						goto bad;
					}
				}
				else if (n > RE_DUP_MAX)
				{
					env->error = REG_BADBR;
					goto bad;
				}
				env->token.min = n;
				if (*sp == ',')
				{
					n = 0;
					ep = ++sp;
					while (*sp >= '0' && *sp <= '9')
					{
						if (n > (INT_MAX / 10))
						{
							env->error = REG_BADBR;
							goto bad;
						}
						n = n * 10 + *sp++ - '0';
					}
					if (sp == ep)
						n = RE_DUP_INF;
					else if (n < env->token.min)
					{
						env->error = REG_BADBR;
						goto bad;
					}
				}
				env->token.max = n;
				switch (*sp)
				{
				case 0:
					env->error = REG_EBRACE;
					goto bad;
				case '\\':
					if (!escaped)
					{
						env->error = REG_BADBR;
						goto bad;
					}
					sp++;
					break;
				default:
					if (escaped)
					{
						env->error = REG_BADBR;
						goto bad;
					}
					break;
				}
				switch (*sp++)
				{
				case 0:
					env->error = REG_EBRACE;
					goto bad;
				case '}':
					break;
				default:
					env->error = REG_BADBR;
					goto bad;
				}
				env->token.len = sp - (char*)env->cursor;
				break;
			case T_RIGHT:
				env->error = REG_EBRACE;
				goto bad;
			case T_OPEN:
				if (env->type < SRE && *sp == '?')
				{
					env->token.len++;
					env->token.lex = 0;
					goto group;
				}
				break;
			case T_ESCAPE:
				c = chresc(sp - 2, &ep);
				if (ep < sp)
					goto bad;
				env->token.len += ep - sp;
				if (c >= T_META)
				{
					env->token.lex = c;
					c = C_ESC;
				}
				return c;
			case T_BACK+0:
			case T_BACK+1:
			case T_BACK+2:
			case T_BACK+3:
			case T_BACK+4:
			case T_BACK+5:
			case T_BACK+6:
			case T_BACK+7:
				n = chresc(sp - 2, &ep);
				if (ep > sp + 1)
				{
					env->token.len += ep - sp;
					return n;
				}
				/*FALLTHROUGH*/
			case T_BACK+8:
			case T_BACK+9:
				if (env->type == SRE || c == T_BACK && !(env->flags & (REG_LENIENT|REG_REGEXP)))
				{
					env->error = REG_BADESC;
					goto bad;
				}
				if ((env->flags & REG_MULTIREF) && isdigit(*sp))
				{
					c = (c - T_BACK) * 10 + (*sp - '0');
					if (c > 0 && c <= env->parno && env->paren[c])
						c += T_BACK;
					else
						c = chresc(sp - 2, &ep);
					env->token.len++;
				}
				if (c == T_BACK)
					c = 0;
				break;
			case T_BAD:
				if (escaped == 1 && (env->flags & (REG_LENIENT|REG_REGEXP)) && (c = mp[env->type+escaped+2]) >= T_META)
					return c;
				goto bad;
			}
			if (env->type >= SRE)
			{
				if (c == T_DOT)
					c = '.';
				else if (c < T_OPEN)
				{
					if (env->type == KRE && *(env->cursor + env->token.len) == '-' && *(env->cursor + env->token.len + 1) == '(')
					{
						env->token.len++;
						env->token.att = 1;
					}
					if (env->type == KRE && *(env->cursor + env->token.len) == '(')
					{
						env->token.len++;
						switch (c)
						{
						case T_AT:
							break;
						case T_PERCENT:
							env->token.lex = c;
							goto group;
						case T_TILDE:
							env->token.lex = 0;
							goto group;
						default:
							env->token.lex = c;
							break;
						}
						c = T_OPEN;
					}
					else if (c == T_STAR)
						c = T_DOTSTAR;
					else if (c == T_QUES)
						c = T_DOT;
					else
					{
						c = o;
						env->token.len = l;
					}
				}
				else if (c > T_BACK)
				{
					c = (c - T_BACK) * 2 - 1;
					c = (c > env->parno || !env->paren[c]) ? o : T_BACK + c;
				}
				else if (env->type == KRE && !env->parnest && (env->flags & REG_SHELL_GROUP))
				{
					if (c == T_AND)
						c = '&';
					else if (c == T_BAR)
						c = '|';
					else if (c == T_OPEN)
						c = '(';
				}
			}
		}
	}
	else if (escaped == 2)
	{
		if (env->type >= SRE && !(env->flags & REG_SHELL_ESCAPED) || (env->flags & REG_ESCAPE) && (c == '[' || c == '-' || c == ']' || env->delimiter && c == env->delimiter))
			/*ok*/;
		else
		{
			env->error = REG_BADESC;
			goto bad;
		}
	}
	else if (escaped && !(env->flags & (REG_LENIENT|REG_REGEXP)) && c != ']')
	{
		env->error = REG_BADESC;
		goto bad;
	}
	return c;
 group:
	sp = (char*)env->cursor + env->token.len;
	switch (*sp++)
	{
	case ')':
		break;
	case '#':
		for (;;)
		{
			switch (*sp++)
			{
			case 0:
				env->error = REG_EPAREN;
				return T_BAD;
			case ')':
				break;
			default:
				continue;
			}
			break;
		}
		break;
	default:
		return T_GROUP;
	}
	env->cursor = (unsigned char*)sp;
	return token(env);
 bad:
	if (escaped == 2)
		env->error = e;
	else if (env->flags & (REG_LENIENT|REG_REGEXP))
		return o;
	else if (escaped == 1 && !env->error)
	{
		if (mp || o == ']')
			return o;
		env->error = REG_BADESC;
	}
	return T_BAD;
}

static int
token(register Cenv_t* env)
{
	int	c;
	int	posixkludge;

	if (env->token.push)
		return env->token.lex;
	env->token.att = env->token.esc = 0;
	if ((env->token.len = MBSIZE(env->cursor)) > 1)
		return env->token.lex = C_MB;
	env->token.lex = 0;
	for (;;)
	{
		c = *env->cursor;
		if (c == 0 || c == env->delimiter || c == env->terminator)
			return T_END;
		if (!(env->flags & REG_COMMENT))
			break;
		if (c == '#')
		{
			do
			{
				c = *++env->cursor;
				if (c == 0 || c == env->delimiter)
					return T_END;
			} while (c != '\n');
		}
		else if (!isspace(c))
			break;
		env->cursor++;
	}
	if (c == '\n' && (env->flags & REG_MULTIPLE) && !env->delimiter)
	{
		if (env->parnest)
		{
			env->error = REG_EPAREN;
			return T_BAD;
		}
		env->parno = 0;
		env->pattern = env->cursor + 1;
		return T_BAR;
	}
	if (env->flags & REG_LITERAL)
		return c;
	if (posixkludge = env->posixkludge)
	{
		env->posixkludge = 0;
		if (c == '*')
			return c;
	}
	if (c == '\\')
	{
		if (env->flags & REG_SHELL_ESCAPED)
			return c;
		if (!(c = *(env->cursor + 1)) || c == env->terminator)
		{
			if (env->flags & (REG_LENIENT|REG_REGEXP))
			{
				if (c)
				{
					env->token.esc = env->token.len;
					env->token.len += MBSIZE(env->cursor + 1);
					return c;
				}
				return '\\';
			}
			env->error = REG_EESCAPE;
			return T_BAD;
		}
		env->token.esc = env->token.len;
		env->token.len += MBSIZE(env->cursor + 1);
		if (env->delimiter && c == 'n')
			return '\n';
		else if (c == env->delimiter)
			return magic(env, c, 0);
		else if (c == '(' && env->type == BRE)
			env->posixkludge = 1;
		else if (c == ')' && env->type == BRE && env->parnest <= 0)
		{
			env->error = REG_EPAREN;
			return T_BAD;
		}
		else if (isspace(c) && (env->flags & REG_COMMENT))
			return c;
		return magic(env, c, 1);
	}
	else if (c == '$')
	{
		if (env->type == BRE && (*(env->cursor + 1) == 0 || *(env->cursor + 1) == env->delimiter || *(env->cursor + 1) == env->terminator || *(env->cursor + 1) == '\\' && *(env->cursor + 2) == ')') || (env->flags & REG_MULTIPLE) && *(env->cursor + 1) == '\n')
			return T_DOLL;
	}
	else if (c == '^')
	{
		if (env->type == BRE && (env->cursor == env->pattern || posixkludge == 1))
		{
			env->posixkludge = 2;
			return T_CFLX;
		}
	}
	else if (c == ')')
	{
		if (env->type != BRE && env->parnest <= 0)
			return c;
	}
	else if (c == '/' && env->explicit == env->mappedslash)
	{
		while (*(env->cursor + env->token.len) == c)
			env->token.len++;
		return T_SLASHPLUS;
	}
	return magic(env, c, 0);
}

static Celt_t*
col(Celt_t* ce, int ic, unsigned char* bp, int bw, int bc, unsigned char* ep, int ew, int ec)
{
	register char*		s;
	register unsigned char*	k;
	register unsigned char*	e;
	register int		c;
	register int		cc;
	int			bt;
	int			et;
	Ckey_t			key;

	cc = 0;
	for (;;)
	{
		k = key;
		if (bw == 1)
		{
			c = bc;
			if (ic)
			{
				if (isupper(c))
				{
					c = tolower(c);
					cc = -1;
				}
				else if (islower(c))
				{
					c = toupper(c);
					cc = -1;
				}
			}
			*k++ = c;
		}
		else if (bw < COLL_KEY_MAX)
		{
			s = (char*)bp;
			if (ic)
			{
				c = mbchar(s);
				if (iswupper(c))
				{
					c = towlower(c);
					cc = 1;
				}
				else if (iswlower(c))
				{
					c = towupper(c);
					cc = 1;
				}
			}
			if (cc > 0)
			{
				cc = -1;
				k += mbconv((char*)k, c);
			}
			else
				for (e = k + bw; k < e; *k++ = *s++);
		}
		*k = 0;
		mbxfrm(ce->beg, key, COLL_KEY_MAX);
		if (ep)
		{
			k = key;
			c = mbchar(k);
			if (iswupper(c))
				bt = COLL_range_uc;
			else if (iswlower(c))
				bt = COLL_range_lc;
			else
				bt = COLL_range;
			k = key;
			if (ew == 1)
			{
				c = ec;
				if (ic)
				{
					if (isupper(c))
					{
						c = tolower(c);
						cc = -1;
					}
					else if (islower(c))
					{
						c = toupper(c);
						cc = -1;
					}
				}
				*k++ = c;
			}
			else if (ew < COLL_KEY_MAX)
			{
				s = (char*)ep;
				if (ic)
				{
					c = mbchar(s);
					if (iswupper(c))
					{
						c = towlower(c);
						cc = 1;
					}
					else if (iswlower(c))
					{
						c = towupper(c);
						cc = 1;
					}
				}
				if (cc > 0)
				{
					cc = -1;
					k += mbconv((char*)k, c);
				}
				else
					for (e = k + ew; k < e; *k++ = *s++);
			}
			*k = 0;
			mbxfrm(ce->end, key, COLL_KEY_MAX);
			k = key;
			c = mbchar(k);
			if (iswupper(c))
				et = COLL_range_uc;
			else if (iswlower(c))
				et = COLL_range_lc;
			else
				et = COLL_range;
			ce->typ = bt == et ? bt : COLL_range;
		}
		else
			ce->typ = COLL_char;
		ce++;
		if (!ic || !cc)
			break;
		ic = 0;
	}
	return ce;
}

static Rex_t*
bra(Cenv_t* env)
{
	Rex_t*		e;
	int		c;
	int		i;
	int		w;
	int		neg;
	int		last;
	int		inrange;
	int		complicated;
	int		collate;
	int		elements;
	unsigned char*	first;
	unsigned char*	start;
	unsigned char*	begin;
	unsigned char*	s;
	regclass_t	f;
	unsigned char	buf[4 * (COLL_KEY_MAX + 1)];
#if _PACKAGE_ast
	int		ic;
	char		mbc[COLL_KEY_MAX + 1];
#endif

	if (!(e = node(env, REX_CLASS, 1, 1, sizeof(Set_t))))
		return 0;
	collate = complicated = elements = 0;
	if (*env->cursor == '^' || env->type >= SRE && *env->cursor == '!')
	{
		env->cursor++;
		complicated = neg = 1;
	}
	else
		neg = 0;
	first = env->cursor;
	start = first + MBSIZE(first);
	if (*env->cursor == 0 || *(env->cursor + 1) == 0 || *env->cursor == env->terminator || *(env->cursor + 1) == env->terminator || (env->flags & REG_ESCAPE) && (*env->cursor == env->delimiter || *env->cursor != '\\' && *(env->cursor + 1) == env->delimiter))
		goto error;
	begin = env->cursor + MBSIZE(env->cursor);

	/*
	 * inrange: 0=no, 1=possibly, 2=definitely
	 */

	inrange = 0;
	for (;;)
	{
		if (!(c = *env->cursor) || c == env->terminator || c == env->delimiter && (env->flags & REG_ESCAPE))
			goto error;
		env->cursor += (w = MBSIZE(env->cursor));
		if (c == '\\' && ((env->flags & REG_CLASS_ESCAPE) || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE)))
		{
			if (*env->cursor)
			{
				if (*env->cursor == 'n')
				{
					env->cursor++;
					c = '\n';
				}
				else if (env->type < SRE || !(env->flags & REG_SHELL_ESCAPED))
				{
					env->token.len = 1;
					w = magic(env, *env->cursor, 2);
					if (env->token.len > 1 || w != T_BAD)
					{
						if (env->token.len == 1 && (f = classfun(w)))
						{
							if (inrange > 1)
							{
								if (env->type < SRE && !(env->flags & (REG_LENIENT|REG_REGEXP)))
									goto erange;
								inrange = 0;
							}
							env->cursor++;
							for (c = 0; c <= UCHAR_MAX; c++)
								if ((*f)(c))
									setadd(e->re.charclass, c);
							complicated++;
							elements++;
							continue;
						}
						if (env->token.len > 1 || w >= 0 && w < T_META)
						{
							c = w;
							if (c > UCHAR_MAX)
							{
								if (env->type < SRE && !(env->flags & (REG_LENIENT|REG_REGEXP)) && !mbwide())
									goto erange;
								c = UCHAR_MAX;
							}
							env->cursor += env->token.len;
						}
					}
				}
			}
		}
		else if (c == ']')
		{
			if (env->cursor == begin)
			{
				last = c;
				inrange = 1;
				continue;
			}
			if (inrange != 0)
			{
				setadd(e->re.charclass, last);
				elements++;
				if (inrange == 2)
				{
					setadd(e->re.charclass, '-');
					elements++;
				}
			}
			break;
		}
		else if (c == '-')
		{
			if (!inrange && env->cursor != begin && *env->cursor != ']')
			{
				if (env->type < SRE && !(env->flags & (REG_LENIENT|REG_REGEXP)))
					goto erange;
				continue;
			}
			else if (inrange == 1)
			{
				inrange = 2;
				complicated++;
				continue;
			}
		}
		else if (c == '[')
		{
			switch (*env->cursor)
			{
			case 0:
				goto error;
			case ':':
				if (env->flags & REG_REGEXP)
					goto normal;
				if (inrange == 1)
				{
					setadd(e->re.charclass, last);
					elements++;
				}
				if (!(f = regclass((char*)env->cursor, (char**)&env->cursor)))
				{
					if (env->cursor == start && (c = *(env->cursor + 1)))
					{
						s = start = env->cursor + 1;
						while (*++s && *s != ':');
						if (*s == ':' && *(s + 1) == ']' && *(s + 2) == ']')
						{
							if ((i = (s - start)) == 1)
							{
								switch (c)
								{
								case '<':
									i = REX_WBEG;
									break;
								case '>':
									i = REX_WEND;
									break;
								default:
									i = 0;
									break;
								}
								if (i)
								{
									env->cursor = s + 3;
									drop(env->disc, e);
									return node(env, i, 0, 0, 0);
								}
							}
						}
					}
					env->error = REG_ECTYPE;
					goto error;
				}
				for (c = 0; c <= UCHAR_MAX; c++)
					if ((*f)(c))
						setadd(e->re.charclass, c);
				inrange = 0;
				complicated++;
				elements++;
				continue;
			case '=':
				if (env->flags & REG_REGEXP)
					goto normal;
				if (inrange == 2)
					goto erange;
				if (inrange == 1)
				{
					setadd(e->re.charclass, last);
					elements++;
				}
				if ((c = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)buf, sizeof(buf), NiL)) < 0)
					goto ecollate;
				if (c > 1)
					collate++;
				else
					setadd(e->re.charclass, buf[0]);
				c = buf[0];
				inrange = 0;
				complicated++;
				elements++;
				continue;
			case '.':
				if (env->flags & REG_REGEXP)
					goto normal;
				if ((c = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)buf, sizeof(buf), NiL)) < 0)
					goto ecollate;
				if (c > 1)
					collate++;
				c = buf[0];
				complicated++;
				break;
			default:
			normal:
				if (*env->cursor == env->terminator || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE))
					goto error;
				break;
			}
		}
		else if (w > 1)
			complicated++;
		if (inrange == 2)
		{
			if (last <= c)
			{
				for (i = last; i <= c; i++)
					setadd(e->re.charclass, i);
				inrange = env->type >= SRE || (env->flags & (REG_LENIENT|REG_REGEXP));
				elements += 2;
			}
			else if (env->type >= SRE)
			{
				setadd(e->re.charclass, last);
				setadd(e->re.charclass, c);
				elements += 2;
				inrange = 1;
			}
			else if (!(env->flags & (REG_LENIENT|REG_REGEXP)))
				goto erange;
			else
				inrange = 0;
		}
		else if (inrange == 1)
		{
			setadd(e->re.charclass, last);
			elements++;
		}
		else
			inrange = 1;
		last = c;
	}
#if _PACKAGE_ast
	if (complicated && mbcoll())
	{
		Dt_t*			dt;
		Cchr_t*			cc;
		Cchr_t*			tc;
		Cchr_t*			xc;
		Celt_t*			ce;
		Cchr_t			key;
		int			rw;
		int			rc;
		wchar_t			wc;
		unsigned char*		rp;
		unsigned char*		pp;
		char			cb[2][COLL_KEY_MAX+1];

		static Dtdisc_t		disc;

		static const char	primary[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";

		if (!(dt = (Dt_t*)LCINFO(AST_LC_COLLATE)->data))
		{
			disc.key = offsetof(Cchr_t, key);
			if ((cc = newof(0, Cchr_t, elementsof(primary), 0)) && (dt = dtopen(&disc, Dtoset)))
			{
				for (i = 0; i < elementsof(primary) - 1; i++, cc++)
				{
					cc->nam[0] = primary[i];
					mbxfrm(cc->key, cc->nam, COLL_KEY_MAX);
					dtinsert(dt, cc);
				}
				for (i = 0; i < elementsof(cc->key); i++)
					cc->key[i] = ~0;
				dtinsert(dt, cc);
				LCINFO(AST_LC_COLLATE)->data = (void*)dt;
			}
			else
			{
				if (cc)
					free(cc);
				drop(env->disc, e);
				return 0;
			}
		}
		if (dt)
		{
			drop(env->disc, e);
			if (ic = env->flags & REG_ICASE)
				elements *= 2;
			if (!(e = node(env, REX_COLL_CLASS, 1, 1, (elements + 3) * sizeof(Celt_t))))
				return 0;
			ce = (Celt_t*)e->re.data;
			e->re.collate.invert = neg;
			e->re.collate.elements = ce;
			env->cursor = first;
			inrange = 0;
			for (;;)
			{
				if ((c = *env->cursor) == 0 || c == env->terminator || (env->flags & REG_ESCAPE) && c == env->delimiter)
					goto error;
				pp = env->cursor;
				env->cursor += (w = MBSIZE(env->cursor));
				if (c == '\\' && ((env->flags & REG_CLASS_ESCAPE) || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE)))
				{
					if (*env->cursor)
					{
						if (*env->cursor == 'n')
						{
							pp = env->cursor++;
							c = '\n';
						}
						else if (env->type < SRE || !(env->flags & REG_SHELL_ESCAPED))
						{
							env->token.len = 1;
							w = magic(env, *env->cursor, 2);
							if (env->token.len > 1 || w != T_BAD)
							{
								if (env->token.len == 1 && (f = classfun(w)))
								{
									if (inrange > 1)
									{
										if (env->type < SRE && !(env->flags & (REG_LENIENT|REG_REGEXP)))
											goto erange;
										inrange = 0;
									}
									env->cursor++;
									ce->fun = f;
									ce->typ = COLL_call;
									ce++;
									continue;
								}
								if (env->token.len > 1 || w >= 0 && w < T_META)
								{
									c = w;
									w = mbconv(mbc, c);
									pp = (unsigned char*)mbc;
									env->cursor += env->token.len;
								}
							}
						}
					}
				}
				else if (c == ']')
				{
					if (env->cursor == begin)
					{
						rp = pp;
						rw = w;
						inrange = 1;
						continue;
					}
					if (inrange != 0)
					{
						ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
						if (inrange == 2)
							ce = col(ce, ic, NiL, 1, '-', NiL, 0, 0);
					}
					break;
				}
				else if (c == '-')
				{
					if (!inrange && env->cursor != begin && *env->cursor != ']')
					{
						if (env->type < SRE && !(env->flags & (REG_LENIENT|REG_REGEXP)))
							goto erange;
						continue;
					}
					else if (inrange == 1)
					{
						inrange = 2;
						continue;
					}
				}
				else if (c == '[')
				{
					switch (*env->cursor)
					{
					case 0:
						goto error;
					case ':':
						if (env->flags & REG_REGEXP)
							goto complicated_normal;
						if (inrange == 1)
							ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
						if (!(f = regclass((char*)env->cursor, (char**)&env->cursor)))
						{
							if (env->cursor == start && (c = *(env->cursor + 1)) && *(env->cursor + 2) == ':' && *(env->cursor + 3) == ']' && *(env->cursor + 4) == ']')
							{
								switch (c)
								{
								case '<':
									i = REX_WBEG;
									break;
								case '>':
									i = REX_WEND;
									break;
								default:
									i = 0;
									break;
								}
								if (i)
								{
									env->cursor += 5;
									drop(env->disc, e);
									return node(env, i, 0, 0, 0);
								}
							}
							env->error = REG_ECTYPE;
							goto error;
						}
						ce->fun = f;
						ce->typ = COLL_call;
						ce++;
						inrange = 0;
						continue;
					case '=':
						if (env->flags & REG_REGEXP)
							goto complicated_normal;
						if (inrange == 2)
							goto erange;
						if (inrange == 1)
							ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
						pp = (unsigned char*)cb[inrange];
						rp = env->cursor + 1;
						if ((rw = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)pp, COLL_KEY_MAX, &wc)) < 0)
							goto ecollate;
						c = 0;
						if (ic)
						{
							if (iswupper(wc))
							{
								wc = towlower(wc);
								rw = mbconv((char*)pp, wc);
								c = 'u';
							}
							else if (iswlower(wc))
								c = 'l';
						}
						i = 1;
						for (;;)
						{
							mbxfrm(key.key, (char*)pp, COLL_KEY_MAX);
							if (!(cc = (Cchr_t*)dtsearch(dt, &key)) && !(cc = (Cchr_t*)dtprev(dt, &key)))
							{
								if (i)
								{
									c = *pp;
									goto singleton;
								}
								goto ecollate;
							}
							xc = (tc = (Cchr_t*)dtprev(dt, cc)) && !strcasecmp((char*)tc->nam, (char*)cc->nam) ? tc : cc;
							if (c == 'l' || c == 'L' && !(c = 0))
								ce->typ = COLL_range_lc;
							else if (c == 'u' || c == 'U' && !(c = 0))
								ce->typ = COLL_range_uc;
							else
								ce->typ = COLL_range;
							strcpy((char*)ce->beg, (char*)xc->key);
							if (!(cc = (Cchr_t*)dtnext(dt, cc)))
							{
								if (i)
								{
									c = *pp;
									goto singleton;
								}
								goto ecollate;
							}
							if (!strcasecmp((char*)xc->nam, (char*)cc->nam) && (tc = (Cchr_t*)dtnext(dt, cc)))
								cc = tc;
							strcpy((char*)ce->end, (char*)cc->key);
							ce->max = -1;
							ce++;
							if (!c)
								break;
							if (c == 'u')
							{
								wc = towlower(wc);
								c = 'L';
							}
							else
							{
								wc = towupper(wc);
								c = 'U';
							}
							rw = mbconv((char*)pp, wc);
							i = 0;
						}
						inrange = 0;
						c = *pp;
						continue;
					case '.':
						if (env->flags & REG_REGEXP)
							goto complicated_normal;
						pp = (unsigned char*)cb[inrange];
						if ((w = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)pp, COLL_KEY_MAX, NiL)) < 0)
							goto ecollate;
						c = *pp;
						break;
					default:
					complicated_normal:
						if (*env->cursor == env->terminator || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE))
							goto error;
						break;
					}
				}
			singleton:
				if (inrange == 2)
				{
					ce = col(ce, ic, rp, rw, rc, pp, w, c);
					if (strcmp((char*)ce->beg, (char*)ce->end) > 0)
					{
						if (env->type < SRE && !(env->flags & (REG_LENIENT|REG_REGEXP)))
							goto erange;
						(ce-1)->typ = COLL_char;
						strcpy((char*)ce->beg, (char*)(ce-1)->end);
						ce->typ = COLL_char;
						ce++;
					}
					inrange = env->type >= SRE || (env->flags & (REG_LENIENT|REG_REGEXP));
				}
				else if (inrange == 1)
					ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
				else
					inrange = 1;
				rp = pp;
				rw = w;
				rc = c;
			}
			ce->typ = COLL_end;
			return e;
		}
	}
#endif
	if (collate)
		goto ecollate;
	if (env->flags & REG_ICASE)
		for (i = 0; i <= UCHAR_MAX; i++)
			if (settst(e->re.charclass, i))
			{
				if (isupper(i))
					c = tolower(i);
				else if (islower(i))
					c = toupper(i);
				else
					continue;
				setadd(e->re.charclass, c);
			}
	if (neg)
	{
		for (i = 0; i < elementsof(e->re.charclass->bits); i++)
			e->re.charclass->bits[i] ^= ~0;
		if (env->explicit >= 0)
			setclr(e->re.charclass, env->explicit);
	}
	return e;
 ecollate:
	env->error = REG_ECOLLATE;
	goto error;
 erange:
	env->error = REG_ERANGE;
 error:
	drop(env->disc, e);
	if (!env->error)
		env->error = REG_EBRACK;
	return 0;
}

static Rex_t*
ccl(Cenv_t* env, int type)
{
	int		i;
	Rex_t*		e;
	Celt_t*		ce;
	regclass_t	f;

	if (!(f = classfun(type)))
	{
		env->error = REG_BADESC;
		return 0;
	}
	if (!mbcoll())
	{
		if (!(e = node(env, REX_CLASS, 1, 1, sizeof(Set_t))))
			return 0;
		for (i = 0; i <= UCHAR_MAX; i++)
			if ((*f)(i))
				setadd(e->re.charclass, i);
		if (env->explicit >= 0)
			setclr(e->re.charclass, env->explicit);
	}
	else
	{
		if (!(e = node(env, REX_COLL_CLASS, 1, 1, 2 * sizeof(Celt_t))))
			return 0;
		ce = (Celt_t*)e->re.data;
		e->re.collate.invert = 0;
		e->re.collate.elements = ce;
		ce->fun = f;
		ce->typ = COLL_call;
		ce++;
		ce->typ = COLL_end;
	}
	return e;
}

static Rex_t*
rep(Cenv_t* env, Rex_t* e, int number, int last)
{
	Rex_t*		f;
	unsigned long	m = 0;
	unsigned long	n = RE_DUP_INF;
	int		minimal = -1;

	if (!e)
		return 0;
	switch (token(env))
	{
	case T_BANG:
		eat(env);
		if (!(f = node(env, REX_NEG, m, n, 0)))
		{
			drop(env->disc, e);
			return 0;
		}
		f->re.group.expr.rex = e;
		return f;
	case T_QUES:
		eat(env);
		n = 1;
		break;
	case T_STAR:
		eat(env);
		break;
	case T_PLUS:
		eat(env);
		m = 1;
		break;
	case T_LEFT:
		eat(env);
		m = env->token.min;
		n = env->token.max;
		break;
	default:
		return e;
	}
	if (env->token.att)
		minimal = 1;
	else if (env->type < SRE)
		switch (token(env))
		{
		case T_QUES:
			eat(env);
			minimal = !(env->flags & REG_MINIMAL);
			break;
		case T_STAR: /*AST*/
			eat(env);
			minimal = !!(env->flags & REG_MINIMAL);
			break;
		}
	switch (e->type)
	{
	case REX_DOT:
	case REX_CLASS:
	case REX_COLL_CLASS:
	case REX_ONECHAR:
		e->lo = m;
		e->hi = n;
		if (minimal >= 0)
			mark(e, minimal);
		return e;
#if HUH_2002_08_07
	case REX_BEG:
#endif
	case REX_BEG_STR:
	case REX_END_STR:
	case REX_FIN_STR:
	case REX_WBEG:
	case REX_WEND:
	case REX_WORD:
	case REX_WORD_NOT:
		env->error = REG_BADRPT;
		drop(env->disc, e);
		return 0;
	}
	if (m == 1 && n == 1)
	{
		if (minimal >= 0)
			mark(e, minimal);
		return e;
	}
	if (!(f = node(env, REX_REP, m, n, 0)))
	{
		drop(env->disc, e);
		return 0;
	}
	f->re.group.expr.rex = e;
	f->re.group.number = number;
	f->re.group.last = last;
	if (minimal >= 0)
		mark(f, minimal);
	if (m <= n && n)
	{
		for (; e && e->type >= REX_GROUP && e->type <= REX_GROUP_CUT; e = e->re.group.expr.rex);
		if (e && e->type == REX_NEG)
			f->type = REX_GROUP;
	}
	return f;
}

static int
isstring(Rex_t* e)
{
	switch (e->type)
	{
	case REX_ONECHAR:
		return e->lo == 1 && e->hi == 1;
	case REX_STRING:
		return 1;
	}
	return 0;
}

static Trie_node_t*
trienode(Cenv_t* env, int c)
{
	Trie_node_t*	t;

	if (t = (Trie_node_t*)alloc(env->disc, 0, sizeof(Trie_node_t)))
	{
		memset(t, 0, sizeof(Trie_node_t));
		t->c = c;
	}
	return t;
}

static int
insert(Cenv_t* env, Rex_t* f, Rex_t* g)
{
	unsigned char*	s;
	unsigned char*	e;
	Trie_node_t*	t;
	int		len;
	unsigned char	tmp[2];

	switch (f->type)
	{
	case REX_ONECHAR:
		*(s = tmp) = f->re.onechar;
		e = s + 1;
		break;
	case REX_STRING:
		s = f->re.string.base;
		e = s + f->re.string.size;
		break;
	default:
		return 1;
	}
	if (!(t = g->re.trie.root[*s]) && !(t = g->re.trie.root[*s] = trienode(env, *s)))
		return 1;
	for (len = 1;;)
	{
		if (t->c == *s)
		{
			if (++s >= e)
				break;
			if (!t->son && !(t->son = trienode(env, *s)))
				return 1;
			t = t->son;
			len++;
		}
		else
		{
			if (!t->sib && !(t->sib = trienode(env, *s)))
				return 1;
			t = t->sib;
		}
	}
	if (g->re.trie.min > len)
		g->re.trie.min = len;
	if (g->re.trie.max < len)
		g->re.trie.max = len;
	t->end = 1;
	return 0;
}

/*
 * trie() tries to combine nontrivial e and f into a REX_TRIE
 * unless 0 is returned, e and f are deleted as far as possible
 * also called by regcomb
 */

static Rex_t*
trie(Cenv_t* env, Rex_t* e, Rex_t* f)
{
	Rex_t*	g;

	if (e->next || f->next || !isstring(e) || e->flags != f->flags)
		return 0;
	if (isstring(f))
	{
		if (!(g = node(env, REX_TRIE, 0, 0, (UCHAR_MAX + 1) * sizeof(Trie_node_t*))))
			return 0;
		g->re.trie.min = INT_MAX;
		if (insert(env, f, g))
			goto nospace;
		drop(env->disc, f);
	}
	else if (f->type != REX_TRIE)
		return 0;
	else
		g = f;
	if (insert(env, e, g))
		goto nospace;
	drop(env->disc, e);
	return g;
 nospace:
	if (g != f)
		drop(env->disc, g);
	return 0;
}

static Rex_t*		alt(Cenv_t*, int, int);

static int
chr(register Cenv_t* env, int* escaped)
{
	unsigned char*	p;
	int		c;

	*escaped = 0;
	if (!(c = *env->cursor))
		return -1;
	env->cursor++;
	if (c == '\\')
	{
		if (env->flags & REG_SHELL_ESCAPED)
			return c;
		if (!(c = *(env->cursor + 1)) || c == env->terminator)
		{
			if (env->flags & (REG_LENIENT|REG_REGEXP))
				return c ? c : '\\';
			env->error = REG_EESCAPE;
			return -1;
		}
		p = env->cursor;
		c = chresc((char*)env->cursor - 1, (char**)&env->cursor);
		*escaped = env->cursor - p;
	}
	return c;
}

/*
 * open the perly gates
 */

static Rex_t*
grp(Cenv_t* env, int parno)
{
	Rex_t*		e;
	Rex_t*		f;
	int		c;
	int		g;
	int		i;
	int		n;
	int		x;
	int		esc;
	int		typ;
	int		beg;
	unsigned char*	p;

	g = env->flags;
	beg = env->pattern == env->cursor - env->token.len;
	if (!(c = env->token.lex) && (c = *env->cursor))
		env->cursor++;
	env->token.len = 0;
	env->parnest++;
	typ = -1;
	switch (c)
	{
	case '-':
	case '+':
	case 'a':
	case 'g':
	case 'i':
	case 'l':
	case 'm':
	case 'p':
	case 'r':
	case 's':
	case 'x':
	case 'A':
	case 'B':
	case 'E':
	case 'F':
	case 'G':
	case 'I':
	case 'K':
	case 'L':
	case 'M':	/* glob(3) */
	case 'N':	/* glob(3) */
	case 'O':	/* glob(3) */
	case 'P':
	case 'R':	/* pcre */
	case 'S':
	case 'U':	/* pcre */
	case 'V':
	case 'X':	/* pcre */
		x = REX_GROUP;
		i = 1;
		env->token.push = 1;
		for (;;)
		{
			switch (c)
			{
			case ')':
				if (!(env->flags & REG_LITERAL))
				{
					env->error = REG_BADRPT;
					return 0;
				}
				/*FALLTHROUGH*/
			case 0:
			case T_CLOSE:
				x = 0;
				goto done;
			case ':':
				eat(env);
				if (token(env) == T_CLOSE)
					x = 0;
				goto done;
			case '-':
				i = 0;
				break;
			case '+':
				i = 1;
				break;
			case 'a':
				if (i)
					env->flags |= (REG_LEFT|REG_RIGHT);
				else
					env->flags &= ~(REG_LEFT|REG_RIGHT);
				break;
			case 'g':
				if (i)
					env->flags &= ~REG_MINIMAL;
				else
					env->flags |= REG_MINIMAL;
				break;
			case 'i':
				if (i)
					env->flags |= REG_ICASE;
				else
					env->flags &= ~REG_ICASE;
				break;
			case 'l':
				if (i)
					env->flags |= REG_LEFT;
				else
					env->flags &= ~REG_LEFT;
				break;
			case 'm':
				if (i)
					env->flags |= REG_NEWLINE;
				else
					env->flags &= ~REG_NEWLINE;
				env->explicit = (env->flags & (REG_NEWLINE|REG_SPAN)) == REG_NEWLINE ? env->mappednewline : -1;
				break;
			case 'p':
				if (i)
					env->flags &= ~REG_LENIENT;
				else
					env->flags |= REG_LENIENT;
				break;
			case 'r':
				if (i)
					env->flags |= REG_RIGHT;
				else
					env->flags &= ~REG_RIGHT;
				break;
			case 's':
				if (i)
					env->flags |= REG_SPAN;
				else
					env->flags &= ~REG_SPAN;
				env->explicit = (env->flags & (REG_NEWLINE|REG_SPAN)) == REG_NEWLINE ? env->mappednewline : -1;
				break;
			case 'x':
				if (i)
					env->flags |= REG_COMMENT;
				else
					env->flags &= ~REG_COMMENT;
				break;
			case 'X':
				if (typ >= 0 || env->type == ERE && (env->flags & REG_CLASS_ESCAPE))
					break; /* PCRE_EXTRA */
				/*FALLTHROUGH*/
			case 'A':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_AUGMENTED|REG_EXTENDED;
				typ = ARE;
				break;
			case 'B':
			case 'G':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				typ = BRE;
				break;
			case 'E':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_EXTENDED;
				typ = ERE;
				break;
			case 'F':
			case 'L':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_LITERAL;
				typ = ERE;
				break;
			case 'K':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_AUGMENTED|REG_SHELL|REG_LEFT|REG_RIGHT;
				typ = KRE;
				break;
			case 'M':
				/* used by caller to disable glob(3) GLOB_BRACE */
				break;
			case 'N':
				/* used by caller to disable glob(3) GLOB_NOCHECK */
				break;
			case 'O':
				/* used by caller to disable glob(3) GLOB_STARSTAR */
				break;
			case 'P':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_EXTENDED|REG_CLASS_ESCAPE;
				typ = ERE;
				break;
			case 'S':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_SHELL|REG_LEFT|REG_RIGHT;
				typ = SRE;
				break;
			case 'U': /* PCRE_UNGREEDY */
				if (typ >= 0 || env->type == ERE && (env->flags & REG_CLASS_ESCAPE))
				{
					if (i)
						env->flags |= REG_MINIMAL;
					else
						env->flags &= ~REG_MINIMAL;
				}
				break;
			case 'V':
				env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_REGEXP|REG_SHELL|REG_LEFT|REG_RIGHT);
				env->flags |= REG_REGEXP;
				typ = BRE;
				break;
			default:
				env->error = REG_BADRPT;
				return 0;
			}
			eat(env);
			c = token(env);
		}
	done:
		break;
	case ':':
		x = REX_GROUP;
		break;
	case '=':
		x = REX_GROUP_AHEAD;
		break;
	case '!':
		x = REX_GROUP_AHEAD_NOT;
		break;
	case '<':
		switch (token(env))
		{
		case '=':
			x = REX_GROUP_BEHIND;
			break;
		case '!':
		case T_BANG:
			x = REX_GROUP_BEHIND_NOT;
			break;
		default:
			env->error = REG_BADRPT;
			return 0;
		}
		eat(env);
		break;
	case '>':
		x = REX_GROUP_CUT;
		break;
	case '%':
	case T_PERCENT:
		e = node(env, REX_NEST, 0, 0, (UCHAR_MAX + 1) * sizeof(unsigned short));
		e->re.nest.primary = isalnum(*env->cursor) ? -1 : *env->cursor;
		n = 1;
		for (;;)
		{
			switch (i = chr(env, &esc))
			{
			case -1:
			case 0:
			invalid:
				env->cursor -= esc + 1;
				env->error = REG_EPAREN;
				return 0;
			case 'D':
				x = REX_NEST_delimiter;
				/*FALLTHROUGH*/
			delimiter:
				if ((i = chr(env, &esc)) < 0)
					goto invalid;
				if (e->re.nest.type[i] & ~x)
					goto invalid;
				e->re.nest.type[i] = x;
				continue;
			case 'E':
				x = REX_NEST_escape;
				goto delimiter;
			case 'L':
				x = REX_NEST_literal;
				goto quote;
			case 'O':
				switch (i = chr(env, &esc))
				{
				case 'T':
					e->re.nest.type[UCHAR_MAX+1] |= REX_NEST_terminator;
					break;
				default:
					goto invalid;
				}
				continue;
			case 'Q':
				x = REX_NEST_quote;
				/*FALLTHROUGH*/
			quote:
				if ((i = chr(env, &esc)) < 0)
					goto invalid;
				if (e->re.nest.type[i] & ~x)
					goto invalid;
				e->re.nest.type[i] = x|REX_NEST_open|REX_NEST_close|(i<<REX_NEST_SHIFT);
				continue;
			case 'S':
				x = REX_NEST_separator;
				goto delimiter;
			case 'T':
				x = REX_NEST_terminator;
				goto delimiter;
			case '|':
			case '&':
				if (!esc)
					goto invalid;
				goto nesting;
			case '(':
				if (!esc)
					n++;
				goto nesting;
			case ')':
				if (!esc && !--n)
					break;
				goto nesting;
			default:
			nesting:
				if (isalnum(i) || (e->re.nest.type[i] & (REX_NEST_close|REX_NEST_escape|REX_NEST_literal|REX_NEST_quote|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)))
					goto invalid;
				e->re.nest.type[i] = REX_NEST_open;
				if ((x = chr(env, &esc)) < 0 || (e->re.nest.type[x] & (REX_NEST_close|REX_NEST_escape|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)))
					goto invalid;
				if (!esc)
				{
					if (x == ')' && !--n)
						goto invalid;
					else if (x == '(')
						n++;
				}
				e->re.nest.type[x] |= REX_NEST_close;
				e->re.nest.type[i] |= x << REX_NEST_SHIFT;
				continue;
			}
			break;
		}
		env->parnest--;
		if (c == T_PERCENT)
			for (n = 0; n < 2; n++)
			{
				parno = ++env->parno;
				if (!(f = node(env, REX_GROUP, 0, 0, 0)))
				{
					drop(env->disc, e);
					return 0;
				}
				if (parno < elementsof(env->paren))
					env->paren[parno] = f;
				f->re.group.back = 0;
				f->re.group.number = parno;
				f->re.group.expr.rex = e;
				e = f;
			}
		return e;
	case '(':
		c = 0;
		if (isdigit(*env->cursor))
		{
			f = 0;
			do
			{
				if (c > (INT_MAX / 10))
				{
					env->error = REG_BADRPT;
					return 0;
				}
				c = c * 10 + (*env->cursor++ - '0');
			} while (isdigit(*env->cursor));
			if (*env->cursor++ != ')')
			{
				env->error = REG_BADRPT;
				return 0;
			}
			if (c && env->type >= SRE)
				c = c * 2 - 1;
			if (!c || c > env->parno || !env->paren[c])
			{
				if (!(env->flags & (REG_LENIENT|REG_REGEXP)))
				{
					env->error = REG_ESUBREG;
					return 0;
				}
				if (c)
					c = -1;
			}
		}
		else
		{
			if (env->type < SRE && *env->cursor++ != '?')
			{
				env->error = REG_BADRPT;
				return 0;
			}
			if (!(f = grp(env, parno + 1)) && env->error)
				return 0;
		}
		if (!(e = node(env, REX_GROUP_COND, 0, 0, 0)))
		{
			drop(env->disc, f);
			return 0;
		}
		e->re.group.size = c;
		e->re.group.expr.binary.left = f;
		if (!(e->re.group.expr.binary.right = alt(env, parno, 1)))
		{
			drop(env->disc, e);
			return 0;
		}
		if (token(env) != T_CLOSE)
		{
			env->error = REG_EPAREN;
			return 0;
		}
		eat(env);
		env->parnest--;
		return rep(env, e, parno, parno);
	case '{':
		p = env->cursor;
		n = 1;
		while (c = *env->cursor)
		{
			if (c == '\\' && *(env->cursor + 1))
				env->cursor++;
			else if (c == '{')
				n++;
			else if (c == '}' && !--n)
				break;
			else if (c == env->delimiter || c == env->terminator)
				break;
			env->cursor++;
		}
		if (c != '}')
		{
			env->error = REG_EBRACE;
			return 0;
		}
		if (*++env->cursor != ')')
		{
			env->error = REG_EPAREN;
			return 0;
		}
		env->cursor++;
		env->parnest--;
		if (env->disc->re_version < REG_VERSION_EXEC)
		{
			env->error = REG_BADRPT;
			return 0;
		}
		if (!env->disc->re_execf)
			return 0;
		if (!(e = node(env, REX_EXEC, 0, 0, 0)))
			return 0;
		e->re.exec.text = (const char*)p;
		e->re.exec.size = env->cursor - p - 2;
		if (!env->disc->re_compf)
			e->re.exec.data = 0;
		else
			e->re.exec.data = (*env->disc->re_compf)(env->regex, e->re.exec.text, e->re.exec.size, env->disc);
		return e;
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
		c -= '0';
		while (isdigit(*env->cursor))
		{
			if (c > (INT_MAX / 10))
			{
				env->error = REG_ESUBREG;
				return 0;
			}
			c = c * 10 + *env->cursor++ - '0';
		}
		if (*env->cursor == ')')
		{
			env->cursor++;
			env->parnest--;
			env->token.len = 1;
			if (c > env->parno || !env->paren[c])
			{
				env->error = REG_ESUBREG;
				return 0;
			}
			env->paren[c]->re.group.back = 1;
			return rep(env, node(env, REX_BACK, c, 0, 0), 0, 0);
		}
		/*FALLTHROUGH*/
	default:
		env->error = REG_BADRPT;
		return 0;
	}
	p = env->pattern;
	i = env->type;
	if (x)
	{
		if (typ >= 0)
			env->type = typ;
		if (!(e = alt(env, parno, 0)))
			goto nope;
		env->flags = g;
		env->type = i;
	}
	c = token(env);
	env->parnest--;
	if (c != T_CLOSE && (!(env->flags & REG_LITERAL) || c != ')'))
	{
		env->error = REG_EPAREN;
		goto nope;
	}
	eat(env);
	if (typ >= 0 && beg)
		env->pattern = env->cursor;
	if (!x)
	{
		if (typ >= 0)
			env->type = typ;
		return 0;
	}
	if (!(f = node(env, x, 0, 0, 0)))
	{
		drop(env->disc, e);
		goto nope;
	}
	f->re.group.expr.rex = e;
	if (x == REX_GROUP_BEHIND || x == REX_GROUP_BEHIND_NOT)
	{
		if (stats(env, e))
		{
			drop(env->disc, f);
			if (!env->error)
				env->error = REG_ECOUNT;
			goto nope;
		}
		f->re.group.size = env->stats.m;
		memset(&env->stats, 0, sizeof(env->stats));
	}
	switch (x)
	{
	case REX_GROUP:
	case REX_GROUP_CUT:
		f = rep(env, f, parno, env->parno);
		break;
	}
	if (f)
		return f;
 nope:
	env->flags = g;
	env->pattern = p;
	env->type = i;
	return 0;
}

static Rex_t*
seq(Cenv_t* env)
{
	Rex_t*		e;
	Rex_t*		f;
	Token_t		tok;
	int		c;
	int		i;
	int		n;
	int		x;
	int		parno;
	int		type;
	regflags_t	flags;
	unsigned char*	s;
	unsigned char*	p;
	unsigned char*	t;
	unsigned char*	u;
	unsigned char	buf[256];

	for (;;)
	{
		s = buf;
		while ((c = token(env)) < T_META && s < &buf[sizeof(buf) - env->token.len])
		{	
			x = c;
			p = env->cursor;
			if (c >= 0)
			{
				n = 1;
				*s++ = (env->flags & REG_ICASE) ? toupper(c) : c;
			}
			else if (c == C_ESC || (env->flags & REG_ICASE))
			{
				c = (c == C_ESC) ? env->token.lex : mbchar(p);
				if (env->flags & REG_ICASE)
					c = towupper(c);
				if ((&buf[sizeof(buf)] - s) < MB_CUR_MAX)
					break;
				if ((n = mbconv((char*)s, c)) < 0)
					*s++ = c;
				else if (n)
					s += n;
				else
				{
					n = 1;
					*s++ = 0;
				}
			}
			else
			{
				n = env->token.len - env->token.esc;
				for (t = p, u = s + n; s < u; *s++ = *t++);
			}
			eat(env);
		}
		if (c == T_BAD)
			return 0;
		if (s > buf)
			switch (c)
			{
			case T_STAR:
			case T_PLUS:
			case T_LEFT:
			case T_QUES:
			case T_BANG:
				if ((s -= n) == buf)
					e = 0;
				else
				{
					i = s - buf;
					if (!(e = node(env, REX_STRING, 0, 0, i)))
						return 0;
					memcpy((char*)(e->re.string.base = (unsigned char*)e->re.data), (char*)buf, i);
					e->re.string.size = i;
				}
				if (x >= 0)
				{
					if (!(f = node(env, REX_ONECHAR, 1, 1, 0)))
					{
						drop(env->disc, e);
						return 0;
					}
					f->re.onechar = (env->flags & REG_ICASE) ? toupper(x) : x;
				}
				else
				{
					if (!(f = node(env, REX_STRING, 0, 0, n)))
						return 0;
					memcpy((char*)(f->re.string.base = (unsigned char*)f->re.data), (char*)p, n);
					f->re.string.size = n;
				}
				if (!(f = rep(env, f, 0, 0)) || !(f = cat(env, f, seq(env))))
				{
					drop(env->disc, e);
					return 0;
				}
				if (e)
					f = cat(env, e, f);
				return f;
			default:
				c = s - buf;
				if (!(e = node(env, REX_STRING, 0, 0, c)))
					return 0;
				memcpy((char*)(e->re.string.base = (unsigned char*)e->re.data), (char*)buf, c);
				e->re.string.size = c;
				return cat(env, e, seq(env));
			}
		else if (c > T_BACK)
		{
			eat(env);
			c -= T_BACK;
			if (c > env->parno || !env->paren[c])
			{
				env->error = REG_ESUBREG;
				return 0;
			}
			env->paren[c]->re.group.back = 1;
			e = rep(env, node(env, REX_BACK, c, 0, 0), 0, 0);
		}
		else
			switch (c)
			{
			case T_AND:
			case T_CLOSE:
			case T_BAR:
			case T_END:
				return node(env, REX_NULL, 0, 0, 0);
			case T_DOLL:
				eat(env);
				e = rep(env, node(env, REX_END, 0, 0, 0), 0, 0);
				break;
			case T_CFLX:
				eat(env);
				if ((e = node(env, REX_BEG, 0, 0, 0)) && (env->flags & REG_EXTENDED))
					e = rep(env, e, 0, 0);
				break;
			case T_OPEN:
				tok = env->token;
				eat(env);
				flags = env->flags;
				type = env->type;
				if (env->token.att)
					env->flags |= REG_MINIMAL;
				env->parnest++;
				if (env->type == KRE)
					++env->parno;
				parno = ++env->parno;
				if (!(e = alt(env, parno + 1, 0)))
					break;
				if (e->type == REX_NULL && env->type == ERE && !(env->flags & (REG_NULL|REG_REGEXP)))
				{
					drop(env->disc, e);
					env->error = (*env->cursor == 0 || *env->cursor == env->delimiter || *env->cursor == env->terminator) ? REG_EPAREN : REG_ENULL;
					return 0;
				} 
				if (token(env) != T_CLOSE)
				{
					drop(env->disc, e);
					env->error = REG_EPAREN;
					return 0;
				} 
				env->parnest--;
				eat(env);
				if (!(f = node(env, REX_GROUP, 0, 0, 0)))
				{
					drop(env->disc, e);
					return 0;
				}
				if (parno < elementsof(env->paren))
					env->paren[parno] = f;
				f->re.group.back = 0;
				f->re.group.number = parno;
				f->re.group.expr.rex = e;
				if (tok.lex)
				{
					tok.push = 1;
					env->token = tok;
				}
				if (!(e = rep(env, f, parno, env->parno)))
					return 0;
				if (env->type == KRE)
				{
					if (!(f = node(env, REX_GROUP, 0, 0, 0)))
					{
						drop(env->disc, e);
						return 0;
					}
					if (--parno < elementsof(env->paren))
						env->paren[parno] = f;
					f->re.group.back = 0;
					f->re.group.number = parno;
					f->re.group.expr.rex = e;
					e = f;
				}
				env->flags = flags;
				env->type = type;
				break;
			case T_GROUP:
				p = env->cursor;
				eat(env);
				flags = env->flags;
				type = env->type;
				if (!(e = grp(env, env->parno + 1)))
				{
					if (env->error)
						return 0;
					if (env->literal == env->pattern && env->literal == p)
						env->literal = env->cursor;
					continue;
				}
				env->flags = flags;
				env->type = type;
				break;
			case T_BRA:
				eat(env);
				if (e = bra(env))
					e = rep(env, e, 0, 0);
				break;
			case T_ALNUM:
			case T_ALNUM_NOT:
			case T_DIGIT:
			case T_DIGIT_NOT:
			case T_SPACE:
			case T_SPACE_NOT:
				eat(env);
				if (e = ccl(env, c))
					e = rep(env, e, 0, 0);
				break;
			case T_LT:
				eat(env);
				e = rep(env, node(env, REX_WBEG, 0, 0, 0), 0, 0);
				break;
			case T_GT:
				eat(env);
				e = rep(env, node(env, REX_WEND, 0, 0, 0), 0, 0);
				break;
			case T_DOT:
				eat(env);
				e = rep(env, node(env, REX_DOT, 1, 1, 0), 0, 0);
				break;
			case T_DOTSTAR:
				eat(env);
				env->token.lex = T_STAR;
				env->token.push = 1;
				e = rep(env, node(env, REX_DOT, 1, 1, 0), 0, 0);
				break;
			case T_SLASHPLUS:
				eat(env);
				env->token.lex = T_PLUS;
				env->token.push = 1;
				if (e = node(env, REX_ONECHAR, 1, 1, 0))
				{
					e->re.onechar = '/';
					e = rep(env, e, 0, 0);
				}
				break;
			case T_WORD:
				eat(env);
				e = rep(env, node(env, REX_WORD, 0, 0, 0), 0, 0);
				break;
			case T_WORD_NOT:
				eat(env);
				e = rep(env, node(env, REX_WORD_NOT, 0, 0, 0), 0, 0);
				break;
			case T_BEG_STR:
				eat(env);
				e = rep(env, node(env, REX_BEG_STR, 0, 0, 0), 0, 0);
				break;
			case T_END_STR:
				eat(env);
				e = rep(env, node(env, REX_END_STR, 0, 0, 0), 0, 0);
				break;
			case T_FIN_STR:
				eat(env);
				e = rep(env, node(env, REX_FIN_STR, 0, 0, 0), 0, 0);
				break;
			default:
				env->error = REG_BADRPT;
				return 0;
			}
		if (e && *env->cursor != 0 && *env->cursor != env->delimiter && *env->cursor != env->terminator)
			e = cat(env, e, seq(env));
		return e;
	}
}

static Rex_t*
con(Cenv_t* env)
{
	Rex_t*	e;
	Rex_t*	f;
	Rex_t*	g;

	if (!(e = seq(env)) || !(env->flags & REG_AUGMENTED) || token(env) != T_AND)
		return e;
	eat(env);
	if (!(f = con(env)))
	{
		drop(env->disc, e);
		return 0;
	}
	if (!(g = node(env, REX_CONJ, 0, 0, 0)))
	{
		drop(env->disc, e);
		drop(env->disc, f);
		return 0;
	}
	g->re.group.expr.binary.left = e;
	g->re.group.expr.binary.right = f;
	return g;
}

static Rex_t*
alt(Cenv_t* env, int number, int cond)
{
	Rex_t*	e;
	Rex_t*	f;
	Rex_t*	g;

	if (!(e = con(env)))
		return 0;
	else if (token(env) != T_BAR)
	{
		if (!cond)
			return e;
		f = 0;
		if (e->type == REX_NULL)
			goto bad;
	}
	else
	{
		eat(env);
		if (!(f = alt(env, number, 0)))
		{
			drop(env->disc, e);
			return 0;
		}
		if ((e->type == REX_NULL || f->type == REX_NULL) && !(env->flags & (REG_NULL|REG_REGEXP)))
			goto bad;
		if (!cond && (g = trie(env, e, f)))
			return g;
	}
	if (!(g = node(env, REX_ALT, 0, 0, 0)))
	{
		env->error = REG_ESPACE;
		goto bad;
	}
	g->re.group.number = number;
	g->re.group.last = env->parno;
	g->re.group.expr.binary.left = e;
	g->re.group.expr.binary.right = f;
	return g;
 bad:
	drop(env->disc, e);
	drop(env->disc, f);
	if (!env->error)
		env->error = REG_ENULL;
	return 0;
}

/*
 * add v to REX_BM tables
 */

static void
bmstr(Cenv_t* env, register Rex_t* a, unsigned char* v, int n, Bm_mask_t b)
{
	int	c;
	int	m;
	size_t	z;

	for (m = 0; m < n; m++)
	{
		if (!(z = n - m - 1))
			z = HIT;
		c = v[m];
		a->re.bm.mask[m][c] |= b;
		if (z == HIT || !a->re.bm.skip[c] || a->re.bm.skip[c] > z && a->re.bm.skip[c] < HIT)
			a->re.bm.skip[c] = z;
		if (a->flags & REG_ICASE)
		{
			if (isupper(c))
				c = tolower(c);
			else if (islower(c))
				c = toupper(c);
			else
				continue;
			a->re.bm.mask[m][c] |= b;
			if (z == HIT || !a->re.bm.skip[c] || a->re.bm.skip[c] > z && a->re.bm.skip[c] < HIT)
				a->re.bm.skip[c] = z;
		}
	}
}

/*
 * set up BM table from trie
 */

static int
bmtrie(Cenv_t* env, Rex_t* a, unsigned char* v, Trie_node_t* x, int n, int m, Bm_mask_t b)
{
	do
	{
		v[m] = x->c;
		if (m >= (n - 1))
		{
			bmstr(env, a, v, n, b);
			if (!(b <<= 1))
			{
				b = 1;
				a->re.bm.complete = 0;
			}
			else if (x->son)
				a->re.bm.complete = 0;
		}
		else if (x->son)
			b = bmtrie(env, a, v, x->son, n, m + 1, b);
	} while (x = x->sib);
	return b;
}

/*
 * rewrite the expression tree for some special cases
 * 1. it is a null expression - illegal
 * 2. max length fixed string found -- use BM algorithm
 * 3. it begins with an unanchored string - use KMP algorithm
 * 0 returned on success
 */		

static int
special(Cenv_t* env, regex_t* p)
{
	Rex_t*		a;
	Rex_t*		e;
	Rex_t*		t;
	Rex_t*		x;
	Rex_t*		y;
	unsigned char*	s;
	int*		f;
	int		n;
	int		m;
	int		k;

	DEBUG_INIT();
	if (e = p->env->rex)
	{
		if ((x = env->stats.x) && x->re.string.size < 3)
			x = 0;
		if ((t = env->stats.y) && t->re.trie.min < 3)
			t = 0;
		if (x && t)
		{
			if (x->re.string.size >= t->re.trie.min)
				t = 0;
			else
				x = 0;
		}
		if (x || t)
		{
			Bm_mask_t**	mask;
			Bm_mask_t*	h;
			unsigned char*	v;
			size_t*		q;
			size_t		l;
			int		i;
			int		j;

			if (x)
			{
				y = x;
				n = m = x->re.string.size;
				l = env->stats.l;
			}
			else
			{
				y = t;
				n = t->re.trie.min;
				m = t->re.trie.max;
				l = env->stats.k;
			}
			if (!(q = (size_t*)alloc(env->disc, 0, (n + 1) * sizeof(size_t))))
				return 1;
			if (!(a = node(env, REX_BM, 0, 0, n * (sizeof(Bm_mask_t*) + (UCHAR_MAX + 1) * sizeof(Bm_mask_t)) + (UCHAR_MAX + n + 2) * sizeof(size_t))))
			{
				alloc(env->disc, q, 0);
				return 1;
			}
			a->flags = y->flags;
			a->map = y->map;
			a->re.bm.size = n;
			a->re.bm.back = (y == e || y == e->re.group.expr.rex) ? (m - n) : -1;
			a->re.bm.left = l - 1;
			a->re.bm.right = env->stats.m - l - n;
			a->re.bm.complete = (env->stats.e || y != e && (e->type != REX_GROUP || y != e->re.group.expr.rex) || e->next || ((a->re.bm.left + a->re.bm.right) >= 0)) ? 0 : n;
			h = (Bm_mask_t*)&a->re.bm.mask[n];
			a->re.bm.skip = (size_t*)(h + n * (UCHAR_MAX + 1));
			a->re.bm.fail = &a->re.bm.skip[UCHAR_MAX + 1];
			for (m = 0; m <= UCHAR_MAX; m++)
				a->re.bm.skip[m] = n;
			a->re.bm.skip[0] = a->re.bm.skip[env->mappednewline] = (y->next && y->next->type == REX_END) ? HIT : (n + a->re.bm.left);
			for (i = 1; i <= n; i++)
				a->re.bm.fail[i] = 2 * n - i;
			mask = a->re.bm.mask;
			for (m = 0; m < n; m++)
			{
				mask[m] = h;
				h += UCHAR_MAX + 1;
			}
			if (x)
				bmstr(env, a, x->re.string.base, n, 1);
			else
			{
				v = (unsigned char*)q;
				memset(v, 0, n);
				m = 1;
				for (i = 0; i <= UCHAR_MAX; i++)
					if (t->re.trie.root[i])
						m = bmtrie(env, a, v, t->re.trie.root[i], n, 0, m);
			}
			mask--;
			memset(q, 0, n * sizeof(*q));
			for (k = (j = n) + 1; j > 0; j--, k--)
			{
				DEBUG_TEST(0x0010,(sfprintf(sfstderr, "BM#0: k=%d j=%d\n", k, j)),(0));
				for (q[j] = k; k <= n; k = q[k])
				{
					for (m = 0; m <= UCHAR_MAX; m++)
						if (mask[k][m] == mask[j][m])
						{
							DEBUG_TEST(0x0010,sfprintf(sfstderr, "CUT1: mask[%d][%c]=mask[%d][%c]\n", k, m, j, m), (0));
							goto cut;
						}
					DEBUG_TEST(0x0010,sfprintf(sfstderr, "BM#2: fail[%d]=%d => %d\n", k, a->re.bm.fail[k], (a->re.bm.fail[k] > n - j) ? (n - j) : a->re.bm.fail[k]), (0));
					if (a->re.bm.fail[k] > n - j)
						a->re.bm.fail[k] = n - j;
				}
			cut:	;
			}
			for (i = 1; i <= n; i++)
				if (a->re.bm.fail[i] > n + k - i)
				{
					DEBUG_TEST(0x0010,sfprintf(sfstderr, "BM#4: fail[%d]=%d => %d\n", i, a->re.bm.fail[i], n + k - i), (0));
					a->re.bm.fail[i] = n + k - i;
				}
#if _AST_REGEX_DEBUG
			if (DEBUG_TEST(0x0020,(1),(0)))
			{
				sfprintf(sfstderr, "STAT: complete=%d n=%d k=%d l=%d r=%d y=%d:%d e=%d:%d\n", a->re.bm.complete, n, k, a->re.bm.left, a->re.bm.right, y->type, y->next ? y->next->type : 0, e->type, e->next ? e->next->type : 0);
				for (m = 0; m < n; m++)
					for (i = 1; i <= UCHAR_MAX; i++)
						if (a->re.bm.mask[m][i])
							sfprintf(sfstderr, "MASK: [%d]['%c'] = %032..2u\n", m, i, a->re.bm.mask[m][i]);
				for (i = ' '; i <= UCHAR_MAX; i++)
					if (a->re.bm.skip[i] >= HIT)
						sfprintf(sfstderr, "SKIP: ['%c'] =   *\n", i);
					else if (a->re.bm.skip[i] > 0 && a->re.bm.skip[i] < n)
						sfprintf(sfstderr, "SKIP: ['%c'] = %3d\n", i, a->re.bm.skip[i]);
				for (j = 31; j >= 0; j--)
				{
					sfprintf(sfstderr, "      ");
				next:
					for (m = 0; m < n; m++)
					{
						for (i = 0040; i < 0177; i++)
							if (a->re.bm.mask[m][i] & (1 << j))
							{
								sfprintf(sfstderr, "  %c", i);
								break;
							}
						if (i >= 0177)
						{
							if (j > 0)
							{
								j--;
								goto next;
							}
							sfprintf(sfstderr, "  ?");
						}
					}
					sfprintf(sfstderr, "\n");
				}
				sfprintf(sfstderr, "FAIL: ");
				for (m = 1; m <= n; m++)
					sfprintf(sfstderr, "%3d", a->re.bm.fail[m]);
				sfprintf(sfstderr, "\n");
			}
#endif
			alloc(env->disc, q, 0);
			a->next = e;
			p->env->rex = a;
			return 0;
		}
		switch (e->type)
		{
		case REX_BEG:
			if (env->flags & REG_NEWLINE)
				return 0;
			break;
		case REX_GROUP:
			if (env->stats.b)
				return 0;
			e = e->re.group.expr.rex;
			if (e->type != REX_DOT)
				return 0;
			/*FALLTHROUGH*/
		case REX_DOT:
			if (e->lo == 0 && e->hi == RE_DUP_INF)
				break;
			return 0;
		case REX_NULL:
			if (env->flags & (REG_NULL|REG_REGEXP))
				break;
			env->error = REG_ENULL;
			return 1;
		case REX_STRING:
			if ((env->flags & (REG_LEFT|REG_LITERAL|REG_RIGHT)) || e->map)
				return 0;
			s = e->re.string.base;
			n = e->re.string.size;
			if (!(a = node(env, REX_KMP, 0, 0, n * (sizeof(int*) + 1))))
				return 1;
			a->flags = e->flags;
			a->map = e->map;
			f = a->re.string.fail;
			memcpy((char*)(a->re.string.base = (unsigned char*)&f[n]), (char*)s, n);
			s = a->re.string.base;
			a->re.string.size = n;
			f[0] = m = -1;
			for (k = 1; k < n; k++)
			{
				while (m >= 0 && s[m+1] != s[k])
					m = f[m];
				if (s[m+1] == s[k])
					m++;
				f[k] = m;
			}
			a->next = e->next;
			p->env->rex = a;
			e->next = 0;
			drop(env->disc, e);
			break;
		default:
			return 0;
		}
	}
	p->env->once = 1;
	return 0;
}		

int
regcomp(regex_t* p, const char* pattern, regflags_t flags)
{
	Rex_t*			e;
	Rex_t*			f;
	regdisc_t*		disc;
	unsigned char*		fold;
	int			i;
	Cenv_t			env;

	if (!p)
		return REG_BADPAT;
	if (flags & REG_DISCIPLINE)
	{
		flags &= ~REG_DISCIPLINE;
		disc = p->re_disc;
	}
	else
		disc = &state.disc;
	if (!disc->re_errorlevel)
		disc->re_errorlevel = 2;
	p->env = 0;
	if (!pattern)
		return fatal(disc, REG_BADPAT, pattern);
	if (!state.initialized)
	{
		state.initialized = 1;
		for (i = 0; i < elementsof(state.escape); i++)
			state.magic[state.escape[i].key] = state.escape[i].val;
	}
	if (!(fold = (unsigned char*)LCINFO(AST_LC_CTYPE)->data))
	{
		if (!(fold = newof(0, unsigned char, UCHAR_MAX, 1)))
			return fatal(disc, REG_ESPACE, pattern);
		for (i = 0; i <= UCHAR_MAX; i++)
			fold[i] = toupper(i);
		LCINFO(AST_LC_CTYPE)->data = (void*)fold;
	}
 again:
	if (!(p->env = (Env_t*)alloc(disc, 0, sizeof(Env_t))))
		return fatal(disc, REG_ESPACE, pattern);
	memset(p->env, 0, sizeof(*p->env));
	memset(&env, 0, sizeof(env));
	env.regex = p;
	env.flags = flags;
	env.disc = p->env->disc = disc;
	if (env.flags & REG_AUGMENTED)
		env.flags |= REG_EXTENDED;
	env.mappeddot = '.';
	env.mappednewline = '\n';
	env.mappedslash = '/';
	if (disc->re_version >= REG_VERSION_MAP && disc->re_map)
	{
		env.map = disc->re_map;
		env.MAP = p->env->fold;
		for (i = 0; i <= UCHAR_MAX; i++)
		{
			env.MAP[i] = fold[env.map[i]];
			if (env.map[i] == '.')
				env.mappeddot = i;
			if (env.map[i] == '\n')
				env.mappednewline = i;
			if (env.map[i] == '/')
				env.mappedslash = i;
		}
	}
	else
		env.MAP = fold;
	env.type = (env.flags & REG_AUGMENTED) ? ARE : (env.flags & REG_EXTENDED) ? ERE : BRE;
	env.explicit = -1;
	if (env.flags & REG_SHELL)
	{
		if (env.flags & REG_SHELL_PATH)
			env.explicit = env.mappedslash;
		if (!(env.flags & REG_SHELL_ESCAPED))
			env.flags |= REG_CLASS_ESCAPE;
		env.flags |= REG_LENIENT|REG_NULL;
		env.type = env.type == BRE ? SRE : KRE;
	}
	else
		env.flags &= ~(REG_SHELL_DOT|REG_SHELL_ESCAPED|REG_SHELL_GROUP|REG_SHELL_PATH);
	if ((env.flags & (REG_NEWLINE|REG_SPAN)) == REG_NEWLINE)
		env.explicit = env.mappednewline;
	p->env->leading = (env.flags & REG_SHELL_DOT) ? env.mappeddot : -1;
	env.posixkludge = !(env.flags & (REG_EXTENDED|REG_SHELL));
	env.token.lex = 0;
	env.token.push = 0;
	if (env.flags & REG_DELIMITED)
	{
		switch (env.delimiter = *pattern++)
		{
		case 0:
		case '\\':
		case '\n':
		case '\r':
			env.error = REG_EDELIM;
			goto bad;
		}
		env.terminator = '\n';
	}
	env.literal = env.pattern = env.cursor = (unsigned char*)pattern;
	if (!(p->env->rex = alt(&env, 1, 0)))
		goto bad;
	if (env.parnest)
	{
		env.error = REG_EPAREN;
		goto bad;
	}
	if ((env.flags & REG_LEFT) && p->env->rex->type != REX_BEG)
	{
		if (p->env->rex->type == REX_ALT)
			env.flags &= ~REG_FIRST;
		if (!(e = node(&env, REX_BEG, 0, 0, 0)))
		{
			regfree(p);
			return fatal(disc, REG_ESPACE, pattern);
		}
		e->next = p->env->rex;
		p->env->rex = e;
		p->env->once = 1;
	}
	for (e = p->env->rex; e->next; e = e->next);
	p->env->done.type = REX_DONE;
	p->env->done.flags = e->flags;
	if ((env.flags & REG_RIGHT) && e->type != REX_END)
	{
		if (p->env->rex->type == REX_ALT)
			env.flags &= ~REG_FIRST;
		if (!(f = node(&env, REX_END, 0, 0, 0)))
		{
			regfree(p);
			return fatal(disc, REG_ESPACE, pattern);
		}
		f->flags = e->flags;
		f->map = e->map;
		e->next = f;
	}
	if (stats(&env, p->env->rex))
	{
		if (!env.error)
			env.error = REG_ECOUNT;
		goto bad;
	}
	if (env.stats.b)
		p->env->hard = p->env->separate = 1;
	else if (!(env.flags & REG_FIRST) && (env.stats.a || env.stats.c > 1 && env.stats.c != env.stats.s || env.stats.t && (env.stats.t > 1 || env.stats.a || env.stats.c)))
		p->env->hard = 1;
	if (p->env->hard || env.stats.c || env.stats.i)
		p->env->stats.re_min = p->env->stats.re_max = -1;
	else
	{
		if (!(p->env->stats.re_min = env.stats.m))
			p->env->stats.re_min = -1;
		if (!(p->env->stats.re_max = env.stats.n))
			p->env->stats.re_max = -1;
	}
	if (special(&env, p))
		goto bad;
	serialize(&env, p->env->rex, 1);
	p->re_nsub = env.stats.p;
	if (env.type == KRE)
		p->re_nsub /= 2;
	if (env.flags & REG_DELIMITED)
	{
		p->re_npat = env.cursor - env.pattern + 1;
		if (*env.cursor == env.delimiter)
			p->re_npat++;
		else if (env.flags & REG_MUSTDELIM)
		{
			env.error = REG_EDELIM;
			goto bad;
		}
		else
			env.flags &= ~REG_DELIMITED;
	}
	p->env->explicit = env.explicit;
	p->env->flags = env.flags & REG_COMP;
	p->env->min = env.stats.m;
	p->env->nsub = env.stats.p + env.stats.u;
	p->env->refs = 1;
	return 0;
 bad:
	regfree(p);
	if (!env.error)
		env.error = REG_ESPACE;
	if (env.type >= SRE && env.error != REG_ESPACE && !(flags & REG_LITERAL))
	{
		flags |= REG_LITERAL;
		pattern = (const char*)env.literal;
		goto again;
	}
	return fatal(disc, env.error, pattern);
}

/*
 * regcomp() on sized pattern
 * the lazy way around adding and checking env.end
 */

int
regncomp(regex_t* p, const char* pattern, size_t size, regflags_t flags)
{
	char*	s;
	int	r;

	if (!(s = malloc(size + 1)))
		return fatal((flags & REG_DISCIPLINE) ? p->re_disc : &state.disc, REG_ESPACE, pattern);
	memcpy(s, pattern, size);
	s[size] = 0;
	r = regcomp(p, s, flags);
	free(s);
	return r;
}

/*
 * combine two compiled regular expressions if possible,
 * replacing first with the combination and freeing second.
 * return 0 on success.
 * the only combinations handled are building a Trie
 * from String|Kmp|Trie and String|Kmp
 */

int
regcomb(regex_t* p, regex_t* q)
{
	Rex_t*	e = p->env->rex;
	Rex_t*	f = q->env->rex;
	Rex_t*	g;
	Rex_t*	h;
	Cenv_t	env;

	if (!e || !f)
		return fatal(p->env->disc, REG_BADPAT, NiL);
	if (p->env->separate || q->env->separate)
		return REG_ESUBREG;
	memset(&env, 0, sizeof(env));
	env.disc = p->env->disc;
	if (e->type == REX_BM)
	{
		p->env->rex = e->next;
		e->next = 0;
		drop(env.disc, e);
		e = p->env->rex;
	}
	if (f->type == REX_BM)
	{
		q->env->rex = f->next;
		f->next = 0;
		drop(env.disc, f);
		f = q->env->rex;
	}
	if (e->type == REX_BEG && f->type == REX_BEG)
	{
		p->env->flags |= REG_LEFT;
		p->env->rex = e->next;
		e->next = 0;
		drop(env.disc, e);
		e = p->env->rex;
		q->env->rex = f->next;
		f->next = 0;
		drop(env.disc, f);
		f = q->env->rex;
	}
	for (g = e; g->next; g = g->next);
	for (h = f; h->next; h = h->next);
	if (g->next && g->next->type == REX_END && h->next && h->next->type == REX_END)
	{
		p->env->flags |= REG_RIGHT;
		drop(env.disc, g->next);
		g->next = 0;
		drop(env.disc, h->next);
		h->next = 0;
	}
	if (!(g = trie(&env, f, e)))
		return fatal(p->env->disc, REG_BADPAT, NiL);
	p->env->rex = g;
	if (!q->env->once)
		p->env->once = 0;
	q->env->rex = 0;
	if (p->env->flags & REG_LEFT)
	{
		if (!(e = node(&env, REX_BEG, 0, 0, 0)))
		{
			regfree(p);
			return fatal(p->env->disc, REG_ESPACE, NiL);
		}
		e->next = p->env->rex;
		p->env->rex = e;
		p->env->once = 1;
	}
	if (p->env->flags & REG_RIGHT)
	{
		for (f = p->env->rex; f->next; f = f->next);
		if (f->type != REX_END)
		{
			if (!(e = node(&env, REX_END, 0, 0, 0)))
			{
				regfree(p);
				return fatal(p->env->disc, REG_ESPACE, NiL);
			}
			f->next = e;
		}
	}
	env.explicit = p->env->explicit;
	env.flags = p->env->flags;
	env.disc = p->env->disc;
	if (stats(&env, p->env->rex))
	{
		regfree(p);
		return fatal(p->env->disc, env.error ? env.error : REG_ECOUNT, NiL);
	}
	if (special(&env, p))
	{
		regfree(p);
		return fatal(p->env->disc, env.error ? env.error : REG_ESPACE, NiL);
	}
	p->env->min = g->re.trie.min;
	return 0;
}

/*
 * copy a reference of p into q
 * p and q may then have separate regsubcomp() instantiations
 */

int
regdup(regex_t* p, regex_t* q)
{
	if (!p || !q)
		return REG_BADPAT;
	*q = *p;
	p->env->refs++;
	q->re_sub = 0;
	return 0;
}