Blame re.c

Packit Service f629e6
/*
Packit Service f629e6
 * re.c - compile regular expressions.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * Copyright (C) 1991-2017 the Free Software Foundation, Inc.
Packit Service f629e6
 *
Packit Service f629e6
 * This file is part of GAWK, the GNU implementation of the
Packit Service f629e6
 * AWK Programming Language.
Packit Service f629e6
 *
Packit Service f629e6
 * GAWK is free software; you can redistribute it and/or modify
Packit Service f629e6
 * it under the terms of the GNU General Public License as published by
Packit Service f629e6
 * the Free Software Foundation; either version 3 of the License, or
Packit Service f629e6
 * (at your option) any later version.
Packit Service f629e6
 *
Packit Service f629e6
 * GAWK is distributed in the hope that it will be useful,
Packit Service f629e6
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service f629e6
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service f629e6
 * GNU General Public License for more details.
Packit Service f629e6
 *
Packit Service f629e6
 * You should have received a copy of the GNU General Public License
Packit Service f629e6
 * along with this program; if not, write to the Free Software
Packit Service f629e6
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
#include "awk.h"
Packit Service f629e6
Packit Service f629e6
#include "localeinfo.h"
Packit Service f629e6
Packit Service f629e6
static reg_syntax_t syn;
Packit Service f629e6
static void check_bracket_exp(char *s, size_t len);
Packit Service f629e6
const char *regexflags2str(int flags);
Packit Service f629e6
Packit Service f629e6
static struct localeinfo localeinfo;
Packit Service f629e6
Packit Service f629e6
/* make_regexp --- generate compiled regular expressions */
Packit Service f629e6
Packit Service f629e6
Regexp *
Packit Service f629e6
make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
Packit Service f629e6
{
Packit Service f629e6
	static char metas[] = ".*+(){}[]|?^$\\";
Packit Service f629e6
	Regexp *rp;
Packit Service f629e6
	const char *rerr;
Packit Service f629e6
	const char *src = s;
Packit Service f629e6
	static char *buf = NULL;
Packit Service f629e6
	static size_t buflen;
Packit Service f629e6
	const char *end = s + len;
Packit Service f629e6
	char *dest;
Packit Service f629e6
	int c, c2;
Packit Service f629e6
	static bool first = true;
Packit Service f629e6
	static bool no_dfa = false;
Packit Service f629e6
	int i;
Packit Service f629e6
	static struct dfa* dfaregs[2] = { NULL, NULL };
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * The number of bytes in the current multibyte character.
Packit Service f629e6
	 * It is 0, when the current character is a singlebyte character.
Packit Service f629e6
	 */
Packit Service f629e6
	size_t is_multibyte = 0;
Packit Service f629e6
	mbstate_t mbs;
Packit Service f629e6
Packit Service f629e6
	memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize.  */
Packit Service f629e6
Packit Service f629e6
	if (first) {
Packit Service f629e6
		/* for debugging and testing */
Packit Service f629e6
		no_dfa = (getenv("GAWK_NO_DFA") != NULL);
Packit Service f629e6
		/* don't set first to false here, we do it below */
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* always check */
Packit Service f629e6
	check_bracket_exp((char *) s, len);
Packit Service f629e6
Packit Service f629e6
	/* Handle escaped characters first. */
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Build a copy of the string (in buf) with the
Packit Service f629e6
	 * escaped characters translated, and generate the regex
Packit Service f629e6
	 * from that.
Packit Service f629e6
	 */
Packit Service f629e6
	if (buf == NULL) {
Packit Service f629e6
		emalloc(buf, char *, len + 1, "make_regexp");
Packit Service f629e6
		buflen = len;
Packit Service f629e6
	} else if (len > buflen) {
Packit Service f629e6
		erealloc(buf, char *, len + 1, "make_regexp");
Packit Service f629e6
		buflen = len;
Packit Service f629e6
	}
Packit Service f629e6
	dest = buf;
Packit Service f629e6
Packit Service f629e6
	while (src < end) {
Packit Service f629e6
		if (gawk_mb_cur_max > 1 && ! is_multibyte) {
Packit Service f629e6
			/* The previous byte is a singlebyte character, or last byte
Packit Service f629e6
			   of a multibyte character.  We check the next character.  */
Packit Service f629e6
			is_multibyte = mbrlen(src, end - src, &mbs);
Packit Service f629e6
			if (   is_multibyte == 1
Packit Service f629e6
			    || is_multibyte == (size_t) -1
Packit Service f629e6
			    || is_multibyte == (size_t) -2
Packit Service f629e6
			    || is_multibyte == 0) {
Packit Service f629e6
				/* We treat it as a single-byte character.  */
Packit Service f629e6
				is_multibyte = 0;
Packit Service f629e6
			}
Packit Service f629e6
		}
Packit Service f629e6
Packit Service f629e6
		/* We skip multibyte character, since it must not be a special
Packit Service f629e6
		   character.  */
Packit Service f629e6
		if ((gawk_mb_cur_max == 1 || ! is_multibyte) &&
Packit Service f629e6
		    (*src == '\\')) {
Packit Service f629e6
			c = *++src;
Packit Service f629e6
			switch (c) {
Packit Service f629e6
			case 'a':
Packit Service f629e6
			case 'b':
Packit Service f629e6
			case 'f':
Packit Service f629e6
			case 'n':
Packit Service f629e6
			case 'r':
Packit Service f629e6
			case 't':
Packit Service f629e6
			case 'v':
Packit Service f629e6
			case 'x':
Packit Service f629e6
			case '0':
Packit Service f629e6
			case '1':
Packit Service f629e6
			case '2':
Packit Service f629e6
			case '3':
Packit Service f629e6
			case '4':
Packit Service f629e6
			case '5':
Packit Service f629e6
			case '6':
Packit Service f629e6
			case '7':
Packit Service f629e6
				c2 = parse_escape(&src;;
Packit Service f629e6
				if (c2 < 0)
Packit Service f629e6
					cant_happen();
Packit Service f629e6
				/*
Packit Service f629e6
				 * Unix awk treats octal (and hex?) chars
Packit Service f629e6
				 * literally in re's, so escape regexp
Packit Service f629e6
				 * metacharacters.
Packit Service f629e6
				 */
Packit Service f629e6
				if (do_traditional
Packit Service f629e6
				    && ! do_posix
Packit Service f629e6
				    && (isdigit(c) || c == 'x')
Packit Service f629e6
				    && strchr("()|*+?.^$\\[]", c2) != NULL)
Packit Service f629e6
					*dest++ = '\\';
Packit Service f629e6
				*dest++ = (char) c2;
Packit Service f629e6
				break;
Packit Service f629e6
			case '8':
Packit Service f629e6
			case '9':	/* a\9b not valid */
Packit Service f629e6
				*dest++ = c;
Packit Service f629e6
				src++;
Packit Service f629e6
				break;
Packit Service f629e6
			case 'y':	/* normally \b */
Packit Service f629e6
				/* gnu regex op */
Packit Service f629e6
				if (! do_traditional) {
Packit Service f629e6
					*dest++ = '\\';
Packit Service f629e6
					*dest++ = 'b';
Packit Service f629e6
					src++;
Packit Service f629e6
					break;
Packit Service f629e6
				}
Packit Service f629e6
				/* else, fall through */
Packit Service f629e6
			default:
Packit Service f629e6
				*dest++ = '\\';
Packit Service f629e6
				*dest++ = (char) c;
Packit Service f629e6
				src++;
Packit Service f629e6
				break;
Packit Service f629e6
			} /* switch */
Packit Service f629e6
		} else {
Packit Service f629e6
			c = *src;
Packit Service f629e6
			*dest++ = *src++;	/* not '\\' */
Packit Service f629e6
		}
Packit Service f629e6
		if (gawk_mb_cur_max > 1 && is_multibyte)
Packit Service f629e6
			is_multibyte--;
Packit Service f629e6
	} /* while */
Packit Service f629e6
Packit Service f629e6
	*dest = '\0';
Packit Service f629e6
	len = dest - buf;
Packit Service f629e6
Packit Service f629e6
	ezalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
Packit Service f629e6
	rp->pat.allocated = 0;	/* regex will allocate the buffer */
Packit Service f629e6
	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Lo these many years ago, had I known what a P.I.T.A. IGNORECASE
Packit Service f629e6
	 * was going to turn out to be, I wouldn't have bothered with it.
Packit Service f629e6
	 *
Packit Service f629e6
	 * In the case where we have a multibyte character set, we have no
Packit Service f629e6
	 * choice but to use RE_ICASE, since the casetable is for single-byte
Packit Service f629e6
	 * character sets only.
Packit Service f629e6
	 *
Packit Service f629e6
	 * On the other hand, if we do have a single-byte character set,
Packit Service f629e6
	 * using the casetable should give  a performance improvement, since
Packit Service f629e6
	 * it's computed only once, not each time a regex is compiled.  We
Packit Service f629e6
	 * also think it's probably better for portability.  See the
Packit Service f629e6
	 * discussion by the definition of casetable[] in eval.c.
Packit Service f629e6
	 */
Packit Service f629e6
Packit Service f629e6
	ignorecase = !! ignorecase;	/* force to 1 or 0 */
Packit Service f629e6
	if (ignorecase) {
Packit Service f629e6
		if (gawk_mb_cur_max > 1) {
Packit Service f629e6
			syn |= RE_ICASE;
Packit Service f629e6
			rp->pat.translate = NULL;
Packit Service f629e6
		} else {
Packit Service f629e6
			syn &= ~RE_ICASE;
Packit Service f629e6
			rp->pat.translate = (RE_TRANSLATE_TYPE) casetable;
Packit Service f629e6
		}
Packit Service f629e6
	} else {
Packit Service f629e6
		rp->pat.translate = NULL;
Packit Service f629e6
		syn &= ~RE_ICASE;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* initialize dfas to hold syntax */
Packit Service f629e6
	if (first) {
Packit Service f629e6
		first = false;
Packit Service f629e6
		dfaregs[0] = dfaalloc();
Packit Service f629e6
		dfaregs[1] = dfaalloc();
Packit Service f629e6
		dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR);
Packit Service f629e6
		dfasyntax(dfaregs[1], & localeinfo, syn | RE_ICASE, DFA_ANCHOR);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	re_set_syntax(syn);
Packit Service f629e6
Packit Service f629e6
	if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) {
Packit Service f629e6
		refree(rp);
Packit Service f629e6
		if (! canfatal) {
Packit Service f629e6
			/* rerr already gettextized inside regex routines */
Packit Service f629e6
			error("%s: /%s/", rerr, buf);
Packit Service f629e6
 			return NULL;
Packit Service f629e6
		}
Packit Service f629e6
		fatal("%s: /%s/", rerr, buf);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* gack. this must be done *after* re_compile_pattern */
Packit Service f629e6
	rp->pat.newline_anchor = false; /* don't get \n in middle of string */
Packit Service f629e6
	if (dfa && ! no_dfa) {
Packit Service f629e6
		rp->dfareg = dfaalloc();
Packit Service f629e6
		dfacopysyntax(rp->dfareg, dfaregs[ignorecase]);
Packit Service f629e6
		dfacomp(buf, len, rp->dfareg, true);
Packit Service f629e6
	} else
Packit Service f629e6
		rp->dfareg = NULL;
Packit Service f629e6
Packit Service f629e6
	/* Additional flags that help with RS as regexp. */
Packit Service f629e6
	for (i = 0; i < len; i++) {
Packit Service f629e6
		if (strchr(metas, buf[i]) != NULL) {
Packit Service f629e6
			rp->has_meta = true;
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	for (i = len - 1; i >= 0; i--) {
Packit Service f629e6
		if (strchr("*+|?", buf[i]) != NULL) {
Packit Service f629e6
			rp->maybe_long = true;
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	return rp;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* research --- do a regexp search. use dfa if possible */
Packit Service f629e6
Packit Service f629e6
int
Packit Service f629e6
research(Regexp *rp, char *str, int start,
Packit Service f629e6
	 size_t len, int flags)
Packit Service f629e6
{
Packit Service f629e6
	const char *ret = str;
Packit Service f629e6
	bool try_backref = false;
Packit Service f629e6
	int need_start;
Packit Service f629e6
	int no_bol;
Packit Service f629e6
	int res;
Packit Service f629e6
Packit Service f629e6
	need_start = ((flags & RE_NEED_START) != 0);
Packit Service f629e6
	no_bol = ((flags & RE_NO_BOL) != 0);
Packit Service f629e6
Packit Service f629e6
	if (no_bol)
Packit Service f629e6
		rp->pat.not_bol = 1;
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Always do dfa search if can; if it fails, then even if
Packit Service f629e6
	 * need_start is true, we won't bother with the regex search.
Packit Service f629e6
	 *
Packit Service f629e6
	 * The dfa matcher doesn't have a no_bol flag, so don't bother
Packit Service f629e6
	 * trying it in that case.
Packit Service f629e6
	 *
Packit Service f629e6
	 * 7/2008: Skip the dfa matcher if need_start. The dfa matcher
Packit Service f629e6
	 * has bugs in certain multibyte cases and it's too difficult
Packit Service f629e6
	 * to try to special case things.
Packit Service f629e6
	 * 7/2017: Apparently there are some cases where DFA gets
Packit Service f629e6
	 * stuck, even in the C locale, so we use dfa only if not need_start.
Packit Service f629e6
	 *
Packit Service f629e6
	 * Should that issue ever get resolved, note this comment:
Packit Service f629e6
	 *
Packit Service f629e6
	 * 7/2016: The dfa matcher can't handle a case where searching
Packit Service f629e6
	 * starts in the middle of a string, so don't bother trying it
Packit Service f629e6
	 * in that case.
Packit Service f629e6
	 *	if (rp->dfa && ! no_bol && start == 0) ...
Packit Service f629e6
	 */
Packit Service f629e6
	if (rp->dfareg != NULL && ! no_bol && ! need_start) {
Packit Service f629e6
		struct dfa *superset = dfasuperset(rp->dfareg);
Packit Service f629e6
		if (superset)
Packit Service f629e6
			ret = dfaexec(superset, str+start, str+start+len,
Packit Service f629e6
							true, NULL, NULL);
Packit Service f629e6
Packit Service f629e6
		if (ret && (! need_start
Packit Service f629e6
				|| (! superset && dfaisfast(rp->dfareg))))
Packit Service f629e6
			ret = dfaexec(rp->dfareg, str+start, str+start+len,
Packit Service f629e6
						true, NULL, &try_backref);
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (ret) {
Packit Service f629e6
		if (   rp->dfareg == NULL
Packit Service f629e6
			|| start != 0
Packit Service f629e6
			|| no_bol
Packit Service f629e6
			|| need_start
Packit Service f629e6
			|| try_backref) {
Packit Service f629e6
			/*
Packit Service f629e6
			 * Passing NULL as last arg speeds up search for cases
Packit Service f629e6
			 * where we don't need the start/end info.
Packit Service f629e6
			 */
Packit Service f629e6
			res = re_search(&(rp->pat), str, start+len,
Packit Service f629e6
				start, len, need_start ? &(rp->regs) : NULL);
Packit Service f629e6
		} else
Packit Service f629e6
			res = 1;
Packit Service f629e6
	} else
Packit Service f629e6
		res = -1;
Packit Service f629e6
Packit Service f629e6
	rp->pat.not_bol = 0;
Packit Service f629e6
	return res;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* refree --- free up the dynamic memory used by a compiled regexp */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
refree(Regexp *rp)
Packit Service f629e6
{
Packit Service f629e6
	if (rp == NULL)
Packit Service f629e6
		return;
Packit Service f629e6
	rp->pat.translate = NULL;
Packit Service f629e6
	regfree(& rp->pat);
Packit Service f629e6
	if (rp->regs.start)
Packit Service f629e6
		free(rp->regs.start);
Packit Service f629e6
	if (rp->regs.end)
Packit Service f629e6
		free(rp->regs.end);
Packit Service f629e6
	if (rp->dfareg != NULL) {
Packit Service f629e6
		dfafree(rp->dfareg);
Packit Service f629e6
		free(rp->dfareg);
Packit Service f629e6
	}
Packit Service f629e6
	efree(rp);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* dfaerror --- print an error message for the dfa routines */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
dfaerror(const char *s)
Packit Service f629e6
{
Packit Service f629e6
	fatal("%s", s);
Packit Service f629e6
	exit(EXIT_FATAL);	/* for DJGPP */
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* re_update --- recompile a dynamic regexp */
Packit Service f629e6
Packit Service f629e6
Regexp *
Packit Service f629e6
re_update(NODE *t)
Packit Service f629e6
{
Packit Service f629e6
	NODE *t1;
Packit Service f629e6
Packit Service f629e6
	if (t->type == Node_val && (t->flags & REGEX) != 0)
Packit Service f629e6
		return t->typed_re->re_reg[IGNORECASE];
Packit Service f629e6
Packit Service f629e6
	if ((t->re_flags & CONSTANT) != 0) {
Packit Service f629e6
		/* it's a constant, so just return it as is */
Packit Service f629e6
		assert(t->type == Node_regex);
Packit Service f629e6
		return t->re_reg[IGNORECASE];
Packit Service f629e6
	}
Packit Service f629e6
	t1 = t->re_exp;
Packit Service f629e6
	if (t->re_text != NULL) {
Packit Service f629e6
		/* if contents haven't changed, just return it */
Packit Service f629e6
		if (cmp_nodes(t->re_text, t1, true) == 0)
Packit Service f629e6
			return t->re_reg[IGNORECASE];
Packit Service f629e6
		/* things changed, fall through to recompile */
Packit Service f629e6
		unref(t->re_text);
Packit Service f629e6
	}
Packit Service f629e6
	/* get fresh copy of the text of the regexp */
Packit Service f629e6
	t->re_text = dupnode(t1);
Packit Service f629e6
Packit Service f629e6
	/* text changed */
Packit Service f629e6
Packit Service f629e6
	/* free old */
Packit Service f629e6
	if (t->re_reg[0] != NULL)
Packit Service f629e6
		refree(t->re_reg[0]);
Packit Service f629e6
	if (t->re_reg[1] != NULL)
Packit Service f629e6
		refree(t->re_reg[1]);
Packit Service f629e6
	if (t->re_cnt > 0)
Packit Service f629e6
		t->re_cnt++;
Packit Service f629e6
	if (t->re_cnt > 10)
Packit Service f629e6
		t->re_cnt = 0;
Packit Service f629e6
	if (t->re_text == NULL) {
Packit Service f629e6
		/* reset regexp text if needed */
Packit Service f629e6
		t1 = t->re_exp;
Packit Service f629e6
		unref(t->re_text);
Packit Service f629e6
		t->re_text = dupnode(t1);
Packit Service f629e6
	}
Packit Service f629e6
	/* compile it */
Packit Service f629e6
	t->re_reg[0] = make_regexp(t->re_text->stptr, t->re_text->stlen,
Packit Service f629e6
				false, t->re_cnt, true);
Packit Service f629e6
	t->re_reg[1] = make_regexp(t->re_text->stptr, t->re_text->stlen,
Packit Service f629e6
				true, t->re_cnt, true);
Packit Service f629e6
Packit Service f629e6
	return t->re_reg[IGNORECASE];
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* resetup --- choose what kind of regexps we match */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
resetup()
Packit Service f629e6
{
Packit Service f629e6
	// init localeinfo for dfa
Packit Service f629e6
	init_localeinfo(& localeinfo);
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Syntax bits: _that_ is yet another mind trip.  Recreational drugs
Packit Service f629e6
	 * are helpful for recovering from the experience.
Packit Service f629e6
	 *
Packit Service f629e6
	 *	Aharon Robbins <arnold@skeeve.com>
Packit Service f629e6
	 *	Sun, 21 Oct 2007 23:55:33 +0200
Packit Service f629e6
	 */
Packit Service f629e6
	if (do_posix)
Packit Service f629e6
		syn = RE_SYNTAX_POSIX_AWK;	/* strict POSIX re's */
Packit Service f629e6
	else if (do_traditional)
Packit Service f629e6
		syn = RE_SYNTAX_AWK;		/* traditional Unix awk re's */
Packit Service f629e6
	else
Packit Service f629e6
		syn = RE_SYNTAX_GNU_AWK;	/* POSIX re's + GNU ops */
Packit Service f629e6
Packit Service f629e6
	/*
Packit Service f629e6
	 * Interval expressions are now on by default, as POSIX is
Packit Service f629e6
	 * wide-spread enough that people want it. The do_intervals
Packit Service f629e6
	 * variable remains for use with --traditional.
Packit Service f629e6
	 */
Packit Service f629e6
	if (do_intervals)
Packit Service f629e6
		syn |= RE_INTERVALS | RE_INVALID_INTERVAL_ORD | RE_NO_BK_BRACES;
Packit Service f629e6
Packit Service f629e6
	(void) re_set_syntax(syn);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* using_utf8 --- are we using utf8 */
Packit Service f629e6
Packit Service f629e6
bool
Packit Service f629e6
using_utf8(void)
Packit Service f629e6
{
Packit Service f629e6
	return localeinfo.using_utf8;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* reisstring --- return true if the RE match is a simple string match */
Packit Service f629e6
Packit Service f629e6
int
Packit Service f629e6
reisstring(const char *text, size_t len, Regexp *re, const char *buf)
Packit Service f629e6
{
Packit Service f629e6
	int res;
Packit Service f629e6
	const char *matched;
Packit Service f629e6
Packit Service f629e6
	/* simple checking for meta characters in re */
Packit Service f629e6
	if (re->has_meta)
Packit Service f629e6
		return false;	/* give up early, can't be string match */
Packit Service f629e6
Packit Service f629e6
	/* make accessable to gdb */
Packit Service f629e6
	matched = &buf[RESTART(re, buf)];
Packit Service f629e6
Packit Service f629e6
	res = (memcmp(text, matched, len) == 0);
Packit Service f629e6
Packit Service f629e6
	return res;
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* reflags2str --- make a regex flags value readable */
Packit Service f629e6
Packit Service f629e6
const char *
Packit Service f629e6
reflags2str(int flagval)
Packit Service f629e6
{
Packit Service f629e6
	static const struct flagtab values[] = {
Packit Service f629e6
		{ RE_BACKSLASH_ESCAPE_IN_LISTS, "RE_BACKSLASH_ESCAPE_IN_LISTS" },
Packit Service f629e6
		{ RE_BK_PLUS_QM, "RE_BK_PLUS_QM" },
Packit Service f629e6
		{ RE_CHAR_CLASSES, "RE_CHAR_CLASSES" },
Packit Service f629e6
		{ RE_CONTEXT_INDEP_ANCHORS, "RE_CONTEXT_INDEP_ANCHORS" },
Packit Service f629e6
		{ RE_CONTEXT_INDEP_OPS, "RE_CONTEXT_INDEP_OPS" },
Packit Service f629e6
		{ RE_CONTEXT_INVALID_OPS, "RE_CONTEXT_INVALID_OPS" },
Packit Service f629e6
		{ RE_DOT_NEWLINE, "RE_DOT_NEWLINE" },
Packit Service f629e6
		{ RE_DOT_NOT_NULL, "RE_DOT_NOT_NULL" },
Packit Service f629e6
		{ RE_HAT_LISTS_NOT_NEWLINE, "RE_HAT_LISTS_NOT_NEWLINE" },
Packit Service f629e6
		{ RE_INTERVALS, "RE_INTERVALS" },
Packit Service f629e6
		{ RE_LIMITED_OPS, "RE_LIMITED_OPS" },
Packit Service f629e6
		{ RE_NEWLINE_ALT, "RE_NEWLINE_ALT" },
Packit Service f629e6
		{ RE_NO_BK_BRACES, "RE_NO_BK_BRACES" },
Packit Service f629e6
		{ RE_NO_BK_PARENS, "RE_NO_BK_PARENS" },
Packit Service f629e6
		{ RE_NO_BK_REFS, "RE_NO_BK_REFS" },
Packit Service f629e6
		{ RE_NO_BK_VBAR, "RE_NO_BK_VBAR" },
Packit Service f629e6
		{ RE_NO_EMPTY_RANGES, "RE_NO_EMPTY_RANGES" },
Packit Service f629e6
		{ RE_UNMATCHED_RIGHT_PAREN_ORD, "RE_UNMATCHED_RIGHT_PAREN_ORD" },
Packit Service f629e6
		{ RE_NO_POSIX_BACKTRACKING, "RE_NO_POSIX_BACKTRACKING" },
Packit Service f629e6
		{ RE_NO_GNU_OPS, "RE_NO_GNU_OPS" },
Packit Service f629e6
		{ RE_INVALID_INTERVAL_ORD, "RE_INVALID_INTERVAL_ORD" },
Packit Service f629e6
		{ RE_ICASE, "RE_ICASE" },
Packit Service f629e6
		{ RE_CARET_ANCHORS_HERE, "RE_CARET_ANCHORS_HERE" },
Packit Service f629e6
		{ RE_CONTEXT_INVALID_DUP, "RE_CONTEXT_INVALID_DUP" },
Packit Service f629e6
		{ RE_NO_SUB, "RE_NO_SUB" },
Packit Service f629e6
		{ 0,	NULL },
Packit Service f629e6
	};
Packit Service f629e6
Packit Service f629e6
	if (flagval == RE_SYNTAX_EMACS) /* == 0 */
Packit Service f629e6
		return "RE_SYNTAX_EMACS";
Packit Service f629e6
Packit Service f629e6
	return genflags2str(flagval, values);
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/*
Packit Service f629e6
 * dfawarn() is called by the dfa routines whenever a regex is compiled
Packit Service f629e6
 * must supply a dfawarn.
Packit Service f629e6
 */
Packit Service f629e6
Packit Service f629e6
void
Packit Service f629e6
dfawarn(const char *dfa_warning)
Packit Service f629e6
{
Packit Service f629e6
	/*
Packit Service f629e6
	 * This routine does nothing, since gawk does its own
Packit Service f629e6
	 * (better) check for bad [[:foo:]] syntax.
Packit Service f629e6
	 */
Packit Service f629e6
}
Packit Service f629e6
Packit Service f629e6
/* check_bracket_exp --- look for /[:space:]/ that should be /[[:space:]]/ */
Packit Service f629e6
Packit Service f629e6
static void
Packit Service f629e6
check_bracket_exp(char *s, size_t length)
Packit Service f629e6
{
Packit Service f629e6
	static struct reclass {
Packit Service f629e6
		const char *name;
Packit Service f629e6
		size_t len;
Packit Service f629e6
		bool warned;
Packit Service f629e6
	} classes[] = {
Packit Service f629e6
		/*
Packit Service f629e6
		 * Ordered by what we hope is frequency,
Packit Service f629e6
		 * since it's linear searched.
Packit Service f629e6
		 */
Packit Service f629e6
		{ "[:alpha:]", 9, false },
Packit Service f629e6
		{ "[:digit:]", 9, false },
Packit Service f629e6
		{ "[:alnum:]", 9, false },
Packit Service f629e6
		{ "[:upper:]", 9, false },
Packit Service f629e6
		{ "[:lower:]", 9, false },
Packit Service f629e6
		{ "[:space:]", 9, false },
Packit Service f629e6
		{ "[:xdigit:]", 10, false },
Packit Service f629e6
		{ "[:punct:]", 9, false },
Packit Service f629e6
		{ "[:print:]", 9, false },
Packit Service f629e6
		{ "[:graph:]", 9, false },
Packit Service f629e6
		{ "[:cntrl:]", 9, false },
Packit Service f629e6
		{ "[:blank:]", 9, false },
Packit Service f629e6
		{ NULL, 0 }
Packit Service f629e6
	};
Packit Service f629e6
	int i;
Packit Service f629e6
	bool found = false;
Packit Service f629e6
	char save;
Packit Service f629e6
	char *sp, *sp2, *end;
Packit Service f629e6
	int len;
Packit Service f629e6
	int count = 0;
Packit Service f629e6
Packit Service f629e6
	if (length == 0)
Packit Service f629e6
		return;
Packit Service f629e6
Packit Service f629e6
	end = s + length;
Packit Service f629e6
	save = s[length];
Packit Service f629e6
	s[length] = '\0';
Packit Service f629e6
	sp = s;
Packit Service f629e6
Packit Service f629e6
again:
Packit Service f629e6
	sp = sp2 = memchr(sp, '[', (end - sp));
Packit Service f629e6
	if (sp == NULL)
Packit Service f629e6
		goto done;
Packit Service f629e6
Packit Service f629e6
	for (count++, sp++; *sp != '\0'; sp++) {
Packit Service f629e6
		if (*sp == '[')
Packit Service f629e6
			count++;
Packit Service f629e6
		/*
Packit Service f629e6
		 * ] as first char after open [ is skipped
Packit Service f629e6
		 * \] is skipped
Packit Service f629e6
		 * [^]] is skipped
Packit Service f629e6
		 */
Packit Service f629e6
		if (*sp == ']' && sp > sp2) {
Packit Service f629e6
			 if (sp[-1] != '['
Packit Service f629e6
			     && sp[-1] != '\\')
Packit Service f629e6
				 ;
Packit Service f629e6
			 else if ((sp - sp2) >= 2
Packit Service f629e6
				  && sp[-1] == '^' && sp[-2] == '[')
Packit Service f629e6
				 ;
Packit Service f629e6
			 else
Packit Service f629e6
				count--;
Packit Service f629e6
		}
Packit Service f629e6
Packit Service f629e6
		if (count == 0) {
Packit Service f629e6
			sp++;	/* skip past ']' */
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (count > 0) {	/* bad regex, give up */
Packit Service f629e6
		goto done;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	/* sp2 has start */
Packit Service f629e6
Packit Service f629e6
	for (i = 0; classes[i].name != NULL; i++) {
Packit Service f629e6
		if (classes[i].warned)
Packit Service f629e6
			continue;
Packit Service f629e6
		len = classes[i].len;
Packit Service f629e6
		if (   len == (sp - sp2)
Packit Service f629e6
		    && memcmp(sp2, classes[i].name, len) == 0) {
Packit Service f629e6
			found = true;
Packit Service f629e6
			break;
Packit Service f629e6
		}
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (found && ! classes[i].warned) {
Packit Service f629e6
		warning(_("regexp component `%.*s' should probably be `[%.*s]'"),
Packit Service f629e6
				len, sp2, len, sp2);
Packit Service f629e6
		classes[i].warned = true;
Packit Service f629e6
	}
Packit Service f629e6
Packit Service f629e6
	if (sp < end) {
Packit Service f629e6
		found = false;
Packit Service f629e6
		goto again;
Packit Service f629e6
	}
Packit Service f629e6
done:
Packit Service f629e6
	s[length] = save;
Packit Service f629e6
}