Tree - source-git/gawk - CentOS Git server

source-git / gawk

Blame re.c

Blob History Raw

Packit	575503	`/*`
Packit	575503	`* re.c - compile regular expressions.`
Packit	575503	`*/`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* Copyright (C) 1991-2017 the Free Software Foundation, Inc.`
Packit	575503	`*`
Packit	575503	`* This file is part of GAWK, the GNU implementation of the`
Packit	575503	`* AWK Programming Language.`
Packit	575503	`*`
Packit	575503	`* GAWK is free software; you can redistribute it and/or modify`
Packit	575503	`* it under the terms of the GNU General Public License as published by`
Packit	575503	`* the Free Software Foundation; either version 3 of the License, or`
Packit	575503	`* (at your option) any later version.`
Packit	575503	`*`
Packit	575503	`* GAWK is distributed in the hope that it will be useful,`
Packit	575503	`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	575503	`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
Packit	575503	`* GNU General Public License for more details.`
Packit	575503	`*`
Packit	575503	`* You should have received a copy of the GNU General Public License`
Packit	575503	`* along with this program; if not, write to the Free Software`
Packit	575503	`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA`
Packit	575503	`*/`
Packit	575503
Packit	575503	`#include "awk.h"`
Packit	575503
Packit	575503	`#include "localeinfo.h"`
Packit	575503
Packit	575503	`static reg_syntax_t syn;`
Packit	575503	`static void check_bracket_exp(char *s, size_t len);`
Packit	575503	`const char *regexflags2str(int flags);`
Packit	575503
Packit	575503	`static struct localeinfo localeinfo;`
Packit	575503
Packit	575503	`/* make_regexp --- generate compiled regular expressions */`
Packit	575503
Packit	575503	`Regexp *`
Packit	575503	`make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)`
Packit	575503	`{`
Packit	575503	`static char metas[] = ".*+(){}[]\|?^$\\";`
Packit	575503	`Regexp *rp;`
Packit	575503	`const char *rerr;`
Packit	575503	`const char *src = s;`
Packit	575503	`static char *buf = NULL;`
Packit	575503	`static size_t buflen;`
Packit	575503	`const char *end = s + len;`
Packit	575503	`char *dest;`
Packit	575503	`int c, c2;`
Packit	575503	`static bool first = true;`
Packit	575503	`static bool no_dfa = false;`
Packit	575503	`int i;`
Packit	575503	`static struct dfa* dfaregs[2] = { NULL, NULL };`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* The number of bytes in the current multibyte character.`
Packit	575503	`* It is 0, when the current character is a singlebyte character.`
Packit	575503	`*/`
Packit	575503	`size_t is_multibyte = 0;`
Packit	575503	`mbstate_t mbs;`
Packit	575503
Packit	575503	`memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */`
Packit	575503
Packit	575503	`if (first) {`
Packit	575503	`/* for debugging and testing */`
Packit	575503	`no_dfa = (getenv("GAWK_NO_DFA") != NULL);`
Packit	575503	`/* don't set first to false here, we do it below */`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* always check */`
Packit	575503	`check_bracket_exp((char *) s, len);`
Packit	575503
Packit	575503	`/* Handle escaped characters first. */`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* Build a copy of the string (in buf) with the`
Packit	575503	`* escaped characters translated, and generate the regex`
Packit	575503	`* from that.`
Packit	575503	`*/`
Packit	575503	`if (buf == NULL) {`
Packit	575503	`emalloc(buf, char *, len + 1, "make_regexp");`
Packit	575503	`buflen = len;`
Packit	575503	`} else if (len > buflen) {`
Packit	575503	`erealloc(buf, char *, len + 1, "make_regexp");`
Packit	575503	`buflen = len;`
Packit	575503	`}`
Packit	575503	`dest = buf;`
Packit	575503
Packit	575503	`while (src < end) {`
Packit	575503	`if (gawk_mb_cur_max > 1 && ! is_multibyte) {`
Packit	575503	`/* The previous byte is a singlebyte character, or last byte`
Packit	575503	`of a multibyte character. We check the next character. */`
Packit	575503	`is_multibyte = mbrlen(src, end - src, &mbs);`
Packit	575503	`if ( is_multibyte == 1`
Packit	575503	`\|\| is_multibyte == (size_t) -1`
Packit	575503	`\|\| is_multibyte == (size_t) -2`
Packit	575503	`\|\| is_multibyte == 0) {`
Packit	575503	`/* We treat it as a single-byte character. */`
Packit	575503	`is_multibyte = 0;`
Packit	575503	`}`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* We skip multibyte character, since it must not be a special`
Packit	575503	`character. */`
Packit	575503	`if ((gawk_mb_cur_max == 1 \|\| ! is_multibyte) &&`
Packit	575503	`(*src == '\\')) {`
Packit	575503	`c = *++src;`
Packit	575503	`switch (c) {`
Packit	575503	`case 'a':`
Packit	575503	`case 'b':`
Packit	575503	`case 'f':`
Packit	575503	`case 'n':`
Packit	575503	`case 'r':`
Packit	575503	`case 't':`
Packit	575503	`case 'v':`
Packit	575503	`case 'x':`
Packit	575503	`case '0':`
Packit	575503	`case '1':`
Packit	575503	`case '2':`
Packit	575503	`case '3':`
Packit	575503	`case '4':`
Packit	575503	`case '5':`
Packit	575503	`case '6':`
Packit	575503	`case '7':`
Packit	575503	`c2 = parse_escape(&src;;`
Packit	575503	`if (c2 < 0)`
Packit	575503	`cant_happen();`
Packit	575503	`/*`
Packit	575503	`* Unix awk treats octal (and hex?) chars`
Packit	575503	`* literally in re's, so escape regexp`
Packit	575503	`* metacharacters.`
Packit	575503	`*/`
Packit	575503	`if (do_traditional`
Packit	575503	`&& ! do_posix`
Packit	575503	`&& (isdigit(c) \|\| c == 'x')`
Packit	575503	`&& strchr("()\|*+?.^$\\[]", c2) != NULL)`
Packit	575503	`*dest++ = '\\';`
Packit	575503	`*dest++ = (char) c2;`
Packit	575503	`break;`
Packit	575503	`case '8':`
Packit	575503	`case '9': /* a\9b not valid */`
Packit	575503	`*dest++ = c;`
Packit	575503	`src++;`
Packit	575503	`break;`
Packit	575503	`case 'y': /* normally \b */`
Packit	575503	`/* gnu regex op */`
Packit	575503	`if (! do_traditional) {`
Packit	575503	`*dest++ = '\\';`
Packit	575503	`*dest++ = 'b';`
Packit	575503	`src++;`
Packit	575503	`break;`
Packit	575503	`}`
Packit	575503	`/* else, fall through */`
Packit	575503	`default:`
Packit	575503	`*dest++ = '\\';`
Packit	575503	`*dest++ = (char) c;`
Packit	575503	`src++;`
Packit	575503	`break;`
Packit	575503	`} /* switch */`
Packit	575503	`} else {`
Packit	575503	`c = *src;`
Packit	575503	`dest++ = src++; /* not '\\' */`
Packit	575503	`}`
Packit	575503	`if (gawk_mb_cur_max > 1 && is_multibyte)`
Packit	575503	`is_multibyte--;`
Packit	575503	`} /* while */`
Packit	575503
Packit	575503	`*dest = '\0';`
Packit	575503	`len = dest - buf;`
Packit	575503
Packit	575503	`ezalloc(rp, Regexp , sizeof(rp), "make_regexp");`
Packit	575503	`rp->pat.allocated = 0; /* regex will allocate the buffer */`
Packit	575503	`emalloc(rp->pat.fastmap, char *, 256, "make_regexp");`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* Lo these many years ago, had I known what a P.I.T.A. IGNORECASE`
Packit	575503	`* was going to turn out to be, I wouldn't have bothered with it.`
Packit	575503	`*`
Packit	575503	`* In the case where we have a multibyte character set, we have no`
Packit	575503	`* choice but to use RE_ICASE, since the casetable is for single-byte`
Packit	575503	`* character sets only.`
Packit	575503	`*`
Packit	575503	`* On the other hand, if we do have a single-byte character set,`
Packit	575503	`* using the casetable should give a performance improvement, since`
Packit	575503	`* it's computed only once, not each time a regex is compiled. We`
Packit	575503	`* also think it's probably better for portability. See the`
Packit	575503	`* discussion by the definition of casetable[] in eval.c.`
Packit	575503	`*/`
Packit	575503
Packit	575503	`ignorecase = !! ignorecase; /* force to 1 or 0 */`
Packit	575503	`if (ignorecase) {`
Packit	575503	`if (gawk_mb_cur_max > 1) {`
Packit	575503	`syn \|= RE_ICASE;`
Packit	575503	`rp->pat.translate = NULL;`
Packit	575503	`} else {`
Packit	575503	`syn &= ~RE_ICASE;`
Packit	575503	`rp->pat.translate = (RE_TRANSLATE_TYPE) casetable;`
Packit	575503	`}`
Packit	575503	`} else {`
Packit	575503	`rp->pat.translate = NULL;`
Packit	575503	`syn &= ~RE_ICASE;`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* initialize dfas to hold syntax */`
Packit	575503	`if (first) {`
Packit	575503	`first = false;`
Packit	575503	`dfaregs[0] = dfaalloc();`
Packit	575503	`dfaregs[1] = dfaalloc();`
Packit	575503	`dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR);`
Packit	575503	`dfasyntax(dfaregs[1], & localeinfo, syn \| RE_ICASE, DFA_ANCHOR);`
Packit	575503	`}`
Packit	575503
Packit	575503	`re_set_syntax(syn);`
Packit	575503
Packit	575503	`if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) {`
Packit	575503	`refree(rp);`
Packit	575503	`if (! canfatal) {`
Packit	575503	`/* rerr already gettextized inside regex routines */`
Packit	575503	`error("%s: /%s/", rerr, buf);`
Packit	575503	`return NULL;`
Packit	575503	`}`
Packit	575503	`fatal("%s: /%s/", rerr, buf);`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* gack. this must be done after re_compile_pattern */`
Packit	575503	`rp->pat.newline_anchor = false; /* don't get \n in middle of string */`
Packit	575503	`if (dfa && ! no_dfa) {`
Packit	575503	`rp->dfareg = dfaalloc();`
Packit	575503	`dfacopysyntax(rp->dfareg, dfaregs[ignorecase]);`
Packit	575503	`dfacomp(buf, len, rp->dfareg, true);`
Packit	575503	`} else`
Packit	575503	`rp->dfareg = NULL;`
Packit	575503
Packit	575503	`/* Additional flags that help with RS as regexp. */`
Packit	575503	`for (i = 0; i < len; i++) {`
Packit	575503	`if (strchr(metas, buf[i]) != NULL) {`
Packit	575503	`rp->has_meta = true;`
Packit	575503	`break;`
Packit	575503	`}`
Packit	575503	`}`
Packit	575503
Packit	575503	`for (i = len - 1; i >= 0; i--) {`
Packit	575503	`if (strchr("*+\|?", buf[i]) != NULL) {`
Packit	575503	`rp->maybe_long = true;`
Packit	575503	`break;`
Packit	575503	`}`
Packit	575503	`}`
Packit	575503
Packit	575503	`return rp;`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* research --- do a regexp search. use dfa if possible */`
Packit	575503
Packit	575503	`int`
Packit	575503	`research(Regexp rp, char str, int start,`
Packit	575503	`size_t len, int flags)`
Packit	575503	`{`
Packit	575503	`const char *ret = str;`
Packit	575503	`bool try_backref = false;`
Packit	575503	`int need_start;`
Packit	575503	`int no_bol;`
Packit	575503	`int res;`
Packit	575503
Packit	575503	`need_start = ((flags & RE_NEED_START) != 0);`
Packit	575503	`no_bol = ((flags & RE_NO_BOL) != 0);`
Packit	575503
Packit	575503	`if (no_bol)`
Packit	575503	`rp->pat.not_bol = 1;`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* Always do dfa search if can; if it fails, then even if`
Packit	575503	`* need_start is true, we won't bother with the regex search.`
Packit	575503	`*`
Packit	575503	`* The dfa matcher doesn't have a no_bol flag, so don't bother`
Packit	575503	`* trying it in that case.`
Packit	575503	`*`
Packit	575503	`* 7/2008: Skip the dfa matcher if need_start. The dfa matcher`
Packit	575503	`* has bugs in certain multibyte cases and it's too difficult`
Packit	575503	`* to try to special case things.`
Packit	575503	`* 7/2017: Apparently there are some cases where DFA gets`
Packit	575503	`* stuck, even in the C locale, so we use dfa only if not need_start.`
Packit	575503	`*`
Packit	575503	`* Should that issue ever get resolved, note this comment:`
Packit	575503	`*`
Packit	575503	`* 7/2016: The dfa matcher can't handle a case where searching`
Packit	575503	`* starts in the middle of a string, so don't bother trying it`
Packit	575503	`* in that case.`
Packit	575503	`* if (rp->dfa && ! no_bol && start == 0) ...`
Packit	575503	`*/`
Packit	575503	`if (rp->dfareg != NULL && ! no_bol && ! need_start) {`
Packit	575503	`struct dfa *superset = dfasuperset(rp->dfareg);`
Packit	575503	`if (superset)`
Packit	575503	`ret = dfaexec(superset, str+start, str+start+len,`
Packit	575503	`true, NULL, NULL);`
Packit	575503
Packit	575503	`if (ret && (! need_start`
Packit	575503	`\|\| (! superset && dfaisfast(rp->dfareg))))`
Packit	575503	`ret = dfaexec(rp->dfareg, str+start, str+start+len,`
Packit	575503	`true, NULL, &try_backref);`
Packit	575503	`}`
Packit	575503
Packit	575503	`if (ret) {`
Packit	575503	`if ( rp->dfareg == NULL`
Packit	575503	`\|\| start != 0`
Packit	575503	`\|\| no_bol`
Packit	575503	`\|\| need_start`
Packit	575503	`\|\| try_backref) {`
Packit	575503	`/*`
Packit	575503	`* Passing NULL as last arg speeds up search for cases`
Packit	575503	`* where we don't need the start/end info.`
Packit	575503	`*/`
Packit	575503	`res = re_search(&(rp->pat), str, start+len,`
Packit	575503	`start, len, need_start ? &(rp->regs) : NULL);`
Packit	575503	`} else`
Packit	575503	`res = 1;`
Packit	575503	`} else`
Packit	575503	`res = -1;`
Packit	575503
Packit	575503	`rp->pat.not_bol = 0;`
Packit	575503	`return res;`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* refree --- free up the dynamic memory used by a compiled regexp */`
Packit	575503
Packit	575503	`void`
Packit	575503	`refree(Regexp *rp)`
Packit	575503	`{`
Packit	575503	`if (rp == NULL)`
Packit	575503	`return;`
Packit	575503	`rp->pat.translate = NULL;`
Packit	575503	`regfree(& rp->pat);`
Packit	575503	`if (rp->regs.start)`
Packit	575503	`free(rp->regs.start);`
Packit	575503	`if (rp->regs.end)`
Packit	575503	`free(rp->regs.end);`
Packit	575503	`if (rp->dfareg != NULL) {`
Packit	575503	`dfafree(rp->dfareg);`
Packit	575503	`free(rp->dfareg);`
Packit	575503	`}`
Packit	575503	`efree(rp);`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* dfaerror --- print an error message for the dfa routines */`
Packit	575503
Packit	575503	`void`
Packit	575503	`dfaerror(const char *s)`
Packit	575503	`{`
Packit	575503	`fatal("%s", s);`
Packit	575503	`exit(EXIT_FATAL); /* for DJGPP */`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* re_update --- recompile a dynamic regexp */`
Packit	575503
Packit	575503	`Regexp *`
Packit	575503	`re_update(NODE *t)`
Packit	575503	`{`
Packit	575503	`NODE *t1;`
Packit	575503
Packit	575503	`if (t->type == Node_val && (t->flags & REGEX) != 0)`
Packit	575503	`return t->typed_re->re_reg[IGNORECASE];`
Packit	575503
Packit	575503	`if ((t->re_flags & CONSTANT) != 0) {`
Packit	575503	`/* it's a constant, so just return it as is */`
Packit	575503	`assert(t->type == Node_regex);`
Packit	575503	`return t->re_reg[IGNORECASE];`
Packit	575503	`}`
Packit	575503	`t1 = t->re_exp;`
Packit	575503	`if (t->re_text != NULL) {`
Packit	575503	`/* if contents haven't changed, just return it */`
Packit	575503	`if (cmp_nodes(t->re_text, t1, true) == 0)`
Packit	575503	`return t->re_reg[IGNORECASE];`
Packit	575503	`/* things changed, fall through to recompile */`
Packit	575503	`unref(t->re_text);`
Packit	575503	`}`
Packit	575503	`/* get fresh copy of the text of the regexp */`
Packit	575503	`t->re_text = dupnode(t1);`
Packit	575503
Packit	575503	`/* text changed */`
Packit	575503
Packit	575503	`/* free old */`
Packit	575503	`if (t->re_reg[0] != NULL)`
Packit	575503	`refree(t->re_reg[0]);`
Packit	575503	`if (t->re_reg[1] != NULL)`
Packit	575503	`refree(t->re_reg[1]);`
Packit	575503	`if (t->re_cnt > 0)`
Packit	575503	`t->re_cnt++;`
Packit	575503	`if (t->re_cnt > 10)`
Packit	575503	`t->re_cnt = 0;`
Packit	575503	`if (t->re_text == NULL) {`
Packit	575503	`/* reset regexp text if needed */`
Packit	575503	`t1 = t->re_exp;`
Packit	575503	`unref(t->re_text);`
Packit	575503	`t->re_text = dupnode(t1);`
Packit	575503	`}`
Packit	575503	`/* compile it */`
Packit	575503	`t->re_reg[0] = make_regexp(t->re_text->stptr, t->re_text->stlen,`
Packit	575503	`false, t->re_cnt, true);`
Packit	575503	`t->re_reg[1] = make_regexp(t->re_text->stptr, t->re_text->stlen,`
Packit	575503	`true, t->re_cnt, true);`
Packit	575503
Packit	575503	`return t->re_reg[IGNORECASE];`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* resetup --- choose what kind of regexps we match */`
Packit	575503
Packit	575503	`void`
Packit	575503	`resetup()`
Packit	575503	`{`
Packit	575503	`// init localeinfo for dfa`
Packit	575503	`init_localeinfo(& localeinfo);`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* Syntax bits: _that_ is yet another mind trip. Recreational drugs`
Packit	575503	`* are helpful for recovering from the experience.`
Packit	575503	`*`
Packit	575503	`* Aharon Robbins <arnold@skeeve.com>`
Packit	575503	`* Sun, 21 Oct 2007 23:55:33 +0200`
Packit	575503	`*/`
Packit	575503	`if (do_posix)`
Packit	575503	`syn = RE_SYNTAX_POSIX_AWK; /* strict POSIX re's */`
Packit	575503	`else if (do_traditional)`
Packit	575503	`syn = RE_SYNTAX_AWK; /* traditional Unix awk re's */`
Packit	575503	`else`
Packit	575503	`syn = RE_SYNTAX_GNU_AWK; /* POSIX re's + GNU ops */`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* Interval expressions are now on by default, as POSIX is`
Packit	575503	`* wide-spread enough that people want it. The do_intervals`
Packit	575503	`* variable remains for use with --traditional.`
Packit	575503	`*/`
Packit	575503	`if (do_intervals)`
Packit	575503	`syn \|= RE_INTERVALS \| RE_INVALID_INTERVAL_ORD \| RE_NO_BK_BRACES;`
Packit	575503
Packit	575503	`(void) re_set_syntax(syn);`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* using_utf8 --- are we using utf8 */`
Packit	575503
Packit	575503	`bool`
Packit	575503	`using_utf8(void)`
Packit	575503	`{`
Packit	575503	`return localeinfo.using_utf8;`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* reisstring --- return true if the RE match is a simple string match */`
Packit	575503
Packit	575503	`int`
Packit	575503	`reisstring(const char text, size_t len, Regexp re, const char *buf)`
Packit	575503	`{`
Packit	575503	`int res;`
Packit	575503	`const char *matched;`
Packit	575503
Packit	575503	`/* simple checking for meta characters in re */`
Packit	575503	`if (re->has_meta)`
Packit	575503	`return false; /* give up early, can't be string match */`
Packit	575503
Packit	575503	`/* make accessable to gdb */`
Packit	575503	`matched = &buf[RESTART(re, buf)];`
Packit	575503
Packit	575503	`res = (memcmp(text, matched, len) == 0);`
Packit	575503
Packit	575503	`return res;`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* reflags2str --- make a regex flags value readable */`
Packit	575503
Packit	575503	`const char *`
Packit	575503	`reflags2str(int flagval)`
Packit	575503	`{`
Packit	575503	`static const struct flagtab values[] = {`
Packit	575503	`{ RE_BACKSLASH_ESCAPE_IN_LISTS, "RE_BACKSLASH_ESCAPE_IN_LISTS" },`
Packit	575503	`{ RE_BK_PLUS_QM, "RE_BK_PLUS_QM" },`
Packit	575503	`{ RE_CHAR_CLASSES, "RE_CHAR_CLASSES" },`
Packit	575503	`{ RE_CONTEXT_INDEP_ANCHORS, "RE_CONTEXT_INDEP_ANCHORS" },`
Packit	575503	`{ RE_CONTEXT_INDEP_OPS, "RE_CONTEXT_INDEP_OPS" },`
Packit	575503	`{ RE_CONTEXT_INVALID_OPS, "RE_CONTEXT_INVALID_OPS" },`
Packit	575503	`{ RE_DOT_NEWLINE, "RE_DOT_NEWLINE" },`
Packit	575503	`{ RE_DOT_NOT_NULL, "RE_DOT_NOT_NULL" },`
Packit	575503	`{ RE_HAT_LISTS_NOT_NEWLINE, "RE_HAT_LISTS_NOT_NEWLINE" },`
Packit	575503	`{ RE_INTERVALS, "RE_INTERVALS" },`
Packit	575503	`{ RE_LIMITED_OPS, "RE_LIMITED_OPS" },`
Packit	575503	`{ RE_NEWLINE_ALT, "RE_NEWLINE_ALT" },`
Packit	575503	`{ RE_NO_BK_BRACES, "RE_NO_BK_BRACES" },`
Packit	575503	`{ RE_NO_BK_PARENS, "RE_NO_BK_PARENS" },`
Packit	575503	`{ RE_NO_BK_REFS, "RE_NO_BK_REFS" },`
Packit	575503	`{ RE_NO_BK_VBAR, "RE_NO_BK_VBAR" },`
Packit	575503	`{ RE_NO_EMPTY_RANGES, "RE_NO_EMPTY_RANGES" },`
Packit	575503	`{ RE_UNMATCHED_RIGHT_PAREN_ORD, "RE_UNMATCHED_RIGHT_PAREN_ORD" },`
Packit	575503	`{ RE_NO_POSIX_BACKTRACKING, "RE_NO_POSIX_BACKTRACKING" },`
Packit	575503	`{ RE_NO_GNU_OPS, "RE_NO_GNU_OPS" },`
Packit	575503	`{ RE_INVALID_INTERVAL_ORD, "RE_INVALID_INTERVAL_ORD" },`
Packit	575503	`{ RE_ICASE, "RE_ICASE" },`
Packit	575503	`{ RE_CARET_ANCHORS_HERE, "RE_CARET_ANCHORS_HERE" },`
Packit	575503	`{ RE_CONTEXT_INVALID_DUP, "RE_CONTEXT_INVALID_DUP" },`
Packit	575503	`{ RE_NO_SUB, "RE_NO_SUB" },`
Packit	575503	`{ 0, NULL },`
Packit	575503	`};`
Packit	575503
Packit	575503	`if (flagval == RE_SYNTAX_EMACS) /* == 0 */`
Packit	575503	`return "RE_SYNTAX_EMACS";`
Packit	575503
Packit	575503	`return genflags2str(flagval, values);`
Packit	575503	`}`
Packit	575503
Packit	575503	`/*`
Packit	575503	`* dfawarn() is called by the dfa routines whenever a regex is compiled`
Packit	575503	`* must supply a dfawarn.`
Packit	575503	`*/`
Packit	575503
Packit	575503	`void`
Packit	575503	`dfawarn(const char *dfa_warning)`
Packit	575503	`{`
Packit	575503	`/*`
Packit	575503	`* This routine does nothing, since gawk does its own`
Packit	575503	`* (better) check for bad [[:foo:]] syntax.`
Packit	575503	`*/`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* check_bracket_exp --- look for /[:space:]/ that should be /[[:space:]]/ */`
Packit	575503
Packit	575503	`static void`
Packit	575503	`check_bracket_exp(char *s, size_t length)`
Packit	575503	`{`
Packit	575503	`static struct reclass {`
Packit	575503	`const char *name;`
Packit	575503	`size_t len;`
Packit	575503	`bool warned;`
Packit	575503	`} classes[] = {`
Packit	575503	`/*`
Packit	575503	`* Ordered by what we hope is frequency,`
Packit	575503	`* since it's linear searched.`
Packit	575503	`*/`
Packit	575503	`{ "[:alpha:]", 9, false },`
Packit	575503	`{ "[:digit:]", 9, false },`
Packit	575503	`{ "[:alnum:]", 9, false },`
Packit	575503	`{ "[:upper:]", 9, false },`
Packit	575503	`{ "[:lower:]", 9, false },`
Packit	575503	`{ "[:space:]", 9, false },`
Packit	575503	`{ "[:xdigit:]", 10, false },`
Packit	575503	`{ "[:punct:]", 9, false },`
Packit	575503	`{ "[:print:]", 9, false },`
Packit	575503	`{ "[:graph:]", 9, false },`
Packit	575503	`{ "[:cntrl:]", 9, false },`
Packit	575503	`{ "[:blank:]", 9, false },`
Packit	575503	`{ NULL, 0 }`
Packit	575503	`};`
Packit	575503	`int i;`
Packit	575503	`bool found = false;`
Packit	575503	`char save;`
Packit	575503	`char sp, sp2, *end;`
Packit	575503	`int len;`
Packit	575503	`int count = 0;`
Packit	575503
Packit	575503	`if (length == 0)`
Packit	575503	`return;`
Packit	575503
Packit	575503	`end = s + length;`
Packit	575503	`save = s[length];`
Packit	575503	`s[length] = '\0';`
Packit	575503	`sp = s;`
Packit	575503
Packit	575503	`again:`
Packit	575503	`sp = sp2 = memchr(sp, '[', (end - sp));`
Packit	575503	`if (sp == NULL)`
Packit	575503	`goto done;`
Packit	575503
Packit	575503	`for (count++, sp++; *sp != '\0'; sp++) {`
Packit	575503	`if (*sp == '[')`
Packit	575503	`count++;`
Packit	575503	`/*`
Packit	575503	`* ] as first char after open [ is skipped`
Packit	575503	`* \] is skipped`
Packit	575503	`* [^]] is skipped`
Packit	575503	`*/`
Packit	575503	`if (*sp == ']' && sp > sp2) {`
Packit	575503	`if (sp[-1] != '['`
Packit	575503	`&& sp[-1] != '\\')`
Packit	575503	`;`
Packit	575503	`else if ((sp - sp2) >= 2`
Packit	575503	`&& sp[-1] == '^' && sp[-2] == '[')`
Packit	575503	`;`
Packit	575503	`else`
Packit	575503	`count--;`
Packit	575503	`}`
Packit	575503
Packit	575503	`if (count == 0) {`
Packit	575503	`sp++; /* skip past ']' */`
Packit	575503	`break;`
Packit	575503	`}`
Packit	575503	`}`
Packit	575503
Packit	575503	`if (count > 0) { /* bad regex, give up */`
Packit	575503	`goto done;`
Packit	575503	`}`
Packit	575503
Packit	575503	`/* sp2 has start */`
Packit	575503
Packit	575503	`for (i = 0; classes[i].name != NULL; i++) {`
Packit	575503	`if (classes[i].warned)`
Packit	575503	`continue;`
Packit	575503	`len = classes[i].len;`
Packit	575503	`if ( len == (sp - sp2)`
Packit	575503	`&& memcmp(sp2, classes[i].name, len) == 0) {`
Packit	575503	`found = true;`
Packit	575503	`break;`
Packit	575503	`}`
Packit	575503	`}`
Packit	575503
Packit	575503	`if (found && ! classes[i].warned) {`
Packit	575503	warning(_("regexp component `%.s' should probably be `[%.s]'"),
Packit	575503	`len, sp2, len, sp2);`
Packit	575503	`classes[i].warned = true;`
Packit	575503	`}`
Packit	575503
Packit	575503	`if (sp < end) {`
Packit	575503	`found = false;`
Packit	575503	`goto again;`
Packit	575503	`}`
Packit	575503	`done:`
Packit	575503	`s[length] = save;`
Packit	575503	`}`

source-git / gawk

Source Code

Blame re.c