|
Packit |
709fb3 |
/* dfasearch.c - searching subroutines using dfa and regex for grep.
|
|
Packit |
709fb3 |
Copyright 1992, 1998, 2000, 2007, 2009-2017 Free Software Foundation, Inc.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
This program is free software; you can redistribute it and/or modify
|
|
Packit |
709fb3 |
it under the terms of the GNU General Public License as published by
|
|
Packit |
709fb3 |
the Free Software Foundation; either version 3, or (at your option)
|
|
Packit |
709fb3 |
any later version.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
This program is distributed in the hope that it will be useful,
|
|
Packit |
709fb3 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
709fb3 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
709fb3 |
GNU General Public License for more details.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
You should have received a copy of the GNU General Public License
|
|
Packit |
709fb3 |
along with this program; if not, write to the Free Software
|
|
Packit |
709fb3 |
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
|
Packit |
709fb3 |
02110-1301, USA. */
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Written August 1992 by Mike Haertel. */
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
#include <config.h>
|
|
Packit |
709fb3 |
#include "intprops.h"
|
|
Packit |
709fb3 |
#include "search.h"
|
|
Packit |
709fb3 |
#include "die.h"
|
|
Packit |
709fb3 |
#include <error.h>
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
struct dfa_comp
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* KWset compiled pattern. For Ecompile and Gcompile, we compile
|
|
Packit |
709fb3 |
a list of strings, at least one of which is known to occur in
|
|
Packit |
709fb3 |
any string matching the regexp. */
|
|
Packit |
709fb3 |
kwset_t kwset;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* DFA compiled regexp. */
|
|
Packit |
709fb3 |
struct dfa *dfa;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Regex compiled regexps. */
|
|
Packit |
709fb3 |
struct re_pattern_buffer* patterns;
|
|
Packit |
709fb3 |
size_t pcount;
|
|
Packit |
709fb3 |
struct re_registers regs;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Number of compiled fixed strings known to exactly match the regexp.
|
|
Packit |
709fb3 |
If kwsexec returns < kwset_exact_matches, then we don't need to
|
|
Packit |
709fb3 |
call the regexp matcher at all. */
|
|
Packit |
709fb3 |
ptrdiff_t kwset_exact_matches;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
bool begline;
|
|
Packit |
709fb3 |
};
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
void
|
|
Packit |
709fb3 |
dfaerror (char const *mesg)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
die (EXIT_TROUBLE, 0, "%s", mesg);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* For now, the sole dfawarn-eliciting condition (use of a regexp
|
|
Packit |
709fb3 |
like '[:lower:]') is unequivocally an error, so treat it as such,
|
|
Packit |
709fb3 |
when possible. */
|
|
Packit |
709fb3 |
void
|
|
Packit |
709fb3 |
dfawarn (char const *mesg)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
if (!getenv ("POSIXLY_CORRECT"))
|
|
Packit |
709fb3 |
dfaerror (mesg);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* If the DFA turns out to have some set of fixed strings one of
|
|
Packit |
709fb3 |
which must occur in the match, then we build a kwset matcher
|
|
Packit |
709fb3 |
to find those strings, and thus quickly filter out impossible
|
|
Packit |
709fb3 |
matches. */
|
|
Packit |
709fb3 |
static void
|
|
Packit |
709fb3 |
kwsmusts (struct dfa_comp *dc)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
struct dfamust *dm = dfamust (dc->dfa);
|
|
Packit |
709fb3 |
if (!dm)
|
|
Packit |
709fb3 |
return;
|
|
Packit |
709fb3 |
dc->kwset = kwsinit (false);
|
|
Packit |
709fb3 |
if (dm->exact)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Prepare a substring whose presence implies a match.
|
|
Packit |
709fb3 |
The kwset matcher will return the index of the matching
|
|
Packit |
709fb3 |
string that it chooses. */
|
|
Packit |
709fb3 |
++dc->kwset_exact_matches;
|
|
Packit |
709fb3 |
ptrdiff_t old_len = strlen (dm->must);
|
|
Packit |
709fb3 |
ptrdiff_t new_len = old_len + dm->begline + dm->endline;
|
|
Packit |
709fb3 |
char *must = xmalloc (new_len);
|
|
Packit |
709fb3 |
char *mp = must;
|
|
Packit |
709fb3 |
*mp = eolbyte;
|
|
Packit |
709fb3 |
mp += dm->begline;
|
|
Packit |
709fb3 |
dc->begline |= dm->begline;
|
|
Packit |
709fb3 |
memcpy (mp, dm->must, old_len);
|
|
Packit |
709fb3 |
if (dm->endline)
|
|
Packit |
709fb3 |
mp[old_len] = eolbyte;
|
|
Packit |
709fb3 |
kwsincr (dc->kwset, must, new_len);
|
|
Packit |
709fb3 |
free (must);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Otherwise, filtering with this substring should help reduce the
|
|
Packit |
709fb3 |
search space, but we'll still have to use the regexp matcher. */
|
|
Packit |
709fb3 |
kwsincr (dc->kwset, dm->must, strlen (dm->must));
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
kwsprep (dc->kwset);
|
|
Packit |
709fb3 |
dfamustfree (dm);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
void *
|
|
Packit |
709fb3 |
GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
char *motif;
|
|
Packit |
709fb3 |
struct dfa_comp *dc = xcalloc (1, sizeof (*dc));
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
dc->dfa = dfaalloc ();
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (match_icase)
|
|
Packit |
709fb3 |
syntax_bits |= RE_ICASE;
|
|
Packit |
709fb3 |
re_set_syntax (syntax_bits);
|
|
Packit |
709fb3 |
int dfaopts = eolbyte ? 0 : DFA_EOL_NUL;
|
|
Packit |
709fb3 |
dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* For GNU regex, pass the patterns separately to detect errors like
|
|
Packit |
709fb3 |
"[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
|
|
Packit |
709fb3 |
this should be a syntax error. The same for backref, where the
|
|
Packit |
709fb3 |
backref should be local to each pattern. */
|
|
Packit |
709fb3 |
char const *p = pattern;
|
|
Packit |
709fb3 |
char const *patlim = pattern + size;
|
|
Packit |
709fb3 |
bool compilation_failed = false;
|
|
Packit |
709fb3 |
size_t palloc = 0;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
do
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
size_t len;
|
|
Packit |
709fb3 |
char const *sep = memchr (p, '\n', patlim - p);
|
|
Packit |
709fb3 |
if (sep)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
len = sep - p;
|
|
Packit |
709fb3 |
sep++;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
len = patlim - p;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (palloc <= dc->pcount)
|
|
Packit |
709fb3 |
dc->patterns = x2nrealloc (dc->patterns, &palloc, sizeof *dc->patterns);
|
|
Packit |
709fb3 |
struct re_pattern_buffer *pat = &dc->patterns[dc->pcount];
|
|
Packit |
709fb3 |
pat->buffer = NULL;
|
|
Packit |
709fb3 |
pat->allocated = 0;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Do not use a fastmap with -i, to work around glibc Bug#20381. */
|
|
Packit |
709fb3 |
pat->fastmap = match_icase ? NULL : xmalloc (UCHAR_MAX + 1);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
pat->translate = NULL;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
char const *err = re_compile_pattern (p, len, pat);
|
|
Packit |
709fb3 |
if (err)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* With patterns specified only on the command line, emit the bare
|
|
Packit |
709fb3 |
diagnostic. Otherwise, include a filename:lineno: prefix. */
|
|
Packit |
709fb3 |
size_t lineno;
|
|
Packit |
709fb3 |
char const *pat_filename = pattern_file_name (dc->pcount + 1,
|
|
Packit |
709fb3 |
&lineno);
|
|
Packit |
709fb3 |
if (*pat_filename == '\0')
|
|
Packit |
709fb3 |
error (0, 0, "%s", err);
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
error (0, 0, "%s:%zu: %s", pat_filename, lineno, err);
|
|
Packit |
709fb3 |
compilation_failed = true;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
dc->pcount++;
|
|
Packit |
709fb3 |
p = sep;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
while (p);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (compilation_failed)
|
|
Packit |
709fb3 |
exit (EXIT_TROUBLE);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* In the match_words and match_lines cases, we use a different pattern
|
|
Packit |
709fb3 |
for the DFA matcher that will quickly throw out cases that won't work.
|
|
Packit |
709fb3 |
Then if DFA succeeds we do some hairy stuff using the regex matcher
|
|
Packit |
709fb3 |
to decide whether the match should really count. */
|
|
Packit |
709fb3 |
if (match_words || match_lines)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
static char const line_beg_no_bk[] = "^(";
|
|
Packit |
709fb3 |
static char const line_end_no_bk[] = ")$";
|
|
Packit |
709fb3 |
static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])(";
|
|
Packit |
709fb3 |
static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)";
|
|
Packit |
709fb3 |
static char const line_beg_bk[] = "^\\(";
|
|
Packit |
709fb3 |
static char const line_end_bk[] = "\\)$";
|
|
Packit |
709fb3 |
static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
|
|
Packit |
709fb3 |
static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
|
|
Packit |
709fb3 |
int bk = !(syntax_bits & RE_NO_BK_PARENS);
|
|
Packit |
709fb3 |
char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
|
|
Packit |
709fb3 |
: (bk ? word_beg_bk : word_beg_no_bk));
|
|
Packit |
709fb3 |
size_t total = strlen (n);
|
|
Packit |
709fb3 |
memcpy (n + total, pattern, size);
|
|
Packit |
709fb3 |
total += size;
|
|
Packit |
709fb3 |
strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
|
|
Packit |
709fb3 |
: (bk ? word_end_bk : word_end_no_bk));
|
|
Packit |
709fb3 |
total += strlen (n + total);
|
|
Packit |
709fb3 |
pattern = motif = n;
|
|
Packit |
709fb3 |
size = total;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
motif = NULL;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
dfacomp (pattern, size, dc->dfa, 1);
|
|
Packit |
709fb3 |
kwsmusts (dc);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
free (motif);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
return dc;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
size_t
|
|
Packit |
709fb3 |
EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
|
Packit |
709fb3 |
char const *start_ptr)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start;
|
|
Packit |
709fb3 |
char eol = eolbyte;
|
|
Packit |
709fb3 |
regoff_t start;
|
|
Packit |
709fb3 |
size_t len, best_len;
|
|
Packit |
709fb3 |
struct kwsmatch kwsm;
|
|
Packit |
709fb3 |
size_t i;
|
|
Packit |
709fb3 |
struct dfa_comp *dc = vdc;
|
|
Packit |
709fb3 |
struct dfa *superset = dfasuperset (dc->dfa);
|
|
Packit |
709fb3 |
bool dfafast = dfaisfast (dc->dfa);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
mb_start = buf;
|
|
Packit |
709fb3 |
buflim = buf + size;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
for (beg = end = buf; end < buflim; beg = end)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
end = buflim;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (!start_ptr)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
char const *next_beg, *dfa_beg = beg;
|
|
Packit |
709fb3 |
size_t count = 0;
|
|
Packit |
709fb3 |
bool exact_kwset_match = false;
|
|
Packit |
709fb3 |
bool backref = false;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Try matching with KWset, if it's defined. */
|
|
Packit |
709fb3 |
if (dc->kwset)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
char const *prev_beg;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Find a possible match using the KWset matcher. */
|
|
Packit |
709fb3 |
ptrdiff_t offset = kwsexec (dc->kwset, beg - dc->begline,
|
|
Packit |
709fb3 |
buflim - beg + dc->begline,
|
|
Packit |
709fb3 |
&kwsm, true);
|
|
Packit |
709fb3 |
if (offset < 0)
|
|
Packit |
709fb3 |
goto failure;
|
|
Packit |
709fb3 |
match = beg + offset;
|
|
Packit |
709fb3 |
prev_beg = beg;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Narrow down to the line containing the possible match. */
|
|
Packit |
709fb3 |
beg = memrchr (buf, eol, match - buf);
|
|
Packit |
709fb3 |
beg = beg ? beg + 1 : buf;
|
|
Packit |
709fb3 |
dfa_beg = beg;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Determine the end pointer to give the DFA next. Typically
|
|
Packit |
709fb3 |
this is after the first newline after MATCH; but if the KWset
|
|
Packit |
709fb3 |
match is not exact, the DFA is fast, and the offset from
|
|
Packit |
709fb3 |
PREV_BEG is less than 64 or (MATCH - PREV_BEG), this is the
|
|
Packit |
709fb3 |
greater of the latter two values; this temporarily prefers
|
|
Packit |
709fb3 |
the DFA to KWset. */
|
|
Packit |
709fb3 |
exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
|
|
Packit |
709fb3 |
end = ((exact_kwset_match || !dfafast
|
|
Packit |
709fb3 |
|| MAX (16, match - beg) < (match - prev_beg) >> 2)
|
|
Packit |
709fb3 |
? match
|
|
Packit |
709fb3 |
: MAX (16, match - beg) < (buflim - prev_beg) >> 2
|
|
Packit |
709fb3 |
? prev_beg + 4 * MAX (16, match - beg)
|
|
Packit |
709fb3 |
: buflim);
|
|
Packit |
709fb3 |
end = memchr (end, eol, buflim - end);
|
|
Packit |
709fb3 |
end = end ? end + 1 : buflim;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (exact_kwset_match)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
if (!localeinfo.multibyte | localeinfo.using_utf8)
|
|
Packit |
709fb3 |
goto success;
|
|
Packit |
709fb3 |
if (mb_start < beg)
|
|
Packit |
709fb3 |
mb_start = beg;
|
|
Packit |
709fb3 |
if (mb_goback (&mb_start, match, buflim) == 0)
|
|
Packit |
709fb3 |
goto success;
|
|
Packit |
709fb3 |
/* The matched line starts in the middle of a multibyte
|
|
Packit |
709fb3 |
character. Perform the DFA search starting from the
|
|
Packit |
709fb3 |
beginning of the next character. */
|
|
Packit |
709fb3 |
dfa_beg = mb_start;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Try matching with the superset of DFA, if it's defined. */
|
|
Packit |
709fb3 |
if (superset && !exact_kwset_match)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Keep using the superset while it reports multiline
|
|
Packit |
709fb3 |
potential matches; this is more likely to be fast
|
|
Packit |
709fb3 |
than falling back to KWset would be. */
|
|
Packit |
709fb3 |
next_beg = dfaexec (superset, dfa_beg, (char *) end, 0,
|
|
Packit |
709fb3 |
&count, NULL);
|
|
Packit |
709fb3 |
if (next_beg == NULL || next_beg == end)
|
|
Packit |
709fb3 |
continue;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Narrow down to the line we've found. */
|
|
Packit |
709fb3 |
if (count != 0)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
beg = memrchr (buf, eol, next_beg - buf);
|
|
Packit |
709fb3 |
beg++;
|
|
Packit |
709fb3 |
dfa_beg = beg;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
end = memchr (next_beg, eol, buflim - next_beg);
|
|
Packit |
709fb3 |
end = end ? end + 1 : buflim;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
count = 0;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Try matching with DFA. */
|
|
Packit |
709fb3 |
next_beg = dfaexec (dc->dfa, dfa_beg, (char *) end, 0, &count,
|
|
Packit |
709fb3 |
&backref);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* If there's no match, or if we've matched the sentinel,
|
|
Packit |
709fb3 |
we're done. */
|
|
Packit |
709fb3 |
if (next_beg == NULL || next_beg == end)
|
|
Packit |
709fb3 |
continue;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Narrow down to the line we've found. */
|
|
Packit |
709fb3 |
if (count != 0)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
beg = memrchr (buf, eol, next_beg - buf);
|
|
Packit |
709fb3 |
beg++;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
end = memchr (next_beg, eol, buflim - next_beg);
|
|
Packit |
709fb3 |
end = end ? end + 1 : buflim;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Successful, no backreferences encountered! */
|
|
Packit |
709fb3 |
if (!backref)
|
|
Packit |
709fb3 |
goto success;
|
|
Packit |
709fb3 |
ptr = beg;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* We are looking for the leftmost (then longest) exact match.
|
|
Packit |
709fb3 |
We will go through the outer loop only once. */
|
|
Packit |
709fb3 |
ptr = start_ptr;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* If the "line" is longer than the maximum regexp offset,
|
|
Packit |
709fb3 |
die as if we've run out of memory. */
|
|
Packit |
709fb3 |
if (TYPE_MAXIMUM (regoff_t) < end - beg - 1)
|
|
Packit |
709fb3 |
xalloc_die ();
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Run the possible match through Regex. */
|
|
Packit |
709fb3 |
best_match = end;
|
|
Packit |
709fb3 |
best_len = 0;
|
|
Packit |
709fb3 |
for (i = 0; i < dc->pcount; i++)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
dc->patterns[i].not_eol = 0;
|
|
Packit |
709fb3 |
dc->patterns[i].newline_anchor = eolbyte == '\n';
|
|
Packit |
709fb3 |
start = re_search (&dc->patterns[i], beg, end - beg - 1,
|
|
Packit |
709fb3 |
ptr - beg, end - ptr - 1, &dc->regs);
|
|
Packit |
709fb3 |
if (start < -1)
|
|
Packit |
709fb3 |
xalloc_die ();
|
|
Packit |
709fb3 |
else if (0 <= start)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
len = dc->regs.end[0] - start;
|
|
Packit |
709fb3 |
match = beg + start;
|
|
Packit |
709fb3 |
if (match > best_match)
|
|
Packit |
709fb3 |
continue;
|
|
Packit |
709fb3 |
if (start_ptr && !match_words)
|
|
Packit |
709fb3 |
goto assess_pattern_match;
|
|
Packit |
709fb3 |
if ((!match_lines && !match_words)
|
|
Packit |
709fb3 |
|| (match_lines && len == end - ptr - 1))
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
match = ptr;
|
|
Packit |
709fb3 |
len = end - ptr;
|
|
Packit |
709fb3 |
goto assess_pattern_match;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
/* If -w and not -x, check whether the match aligns with
|
|
Packit |
709fb3 |
word boundaries. Do this iteratively because:
|
|
Packit |
709fb3 |
(a) the line may contain more than one occurrence of the
|
|
Packit |
709fb3 |
pattern, and
|
|
Packit |
709fb3 |
(b) Several alternatives in the pattern might be valid at a
|
|
Packit |
709fb3 |
given point, and we may need to consider a shorter one to
|
|
Packit |
709fb3 |
find a word boundary. */
|
|
Packit |
709fb3 |
if (!match_lines && match_words)
|
|
Packit |
709fb3 |
while (match <= best_match)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
regoff_t shorter_len = 0;
|
|
Packit |
709fb3 |
if (! wordchar_next (match + len, end - 1)
|
|
Packit |
709fb3 |
&& ! wordchar_prev (beg, match, end - 1))
|
|
Packit |
709fb3 |
goto assess_pattern_match;
|
|
Packit |
709fb3 |
if (len > 0)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Try a shorter length anchored at the same place. */
|
|
Packit |
709fb3 |
--len;
|
|
Packit |
709fb3 |
dc->patterns[i].not_eol = 1;
|
|
Packit |
709fb3 |
shorter_len = re_match (&dc->patterns[i], beg,
|
|
Packit |
709fb3 |
match + len - ptr, match - beg,
|
|
Packit |
709fb3 |
&dc->regs);
|
|
Packit |
709fb3 |
if (shorter_len < -1)
|
|
Packit |
709fb3 |
xalloc_die ();
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
if (0 < shorter_len)
|
|
Packit |
709fb3 |
len = shorter_len;
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Try looking further on. */
|
|
Packit |
709fb3 |
if (match == end - 1)
|
|
Packit |
709fb3 |
break;
|
|
Packit |
709fb3 |
match++;
|
|
Packit |
709fb3 |
dc->patterns[i].not_eol = 0;
|
|
Packit |
709fb3 |
start = re_search (&dc->patterns[i], beg, end - beg - 1,
|
|
Packit |
709fb3 |
match - beg, end - match - 1,
|
|
Packit |
709fb3 |
&dc->regs);
|
|
Packit |
709fb3 |
if (start < 0)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
if (start < -1)
|
|
Packit |
709fb3 |
xalloc_die ();
|
|
Packit |
709fb3 |
break;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
len = dc->regs.end[0] - start;
|
|
Packit |
709fb3 |
match = beg + start;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
} /* while (match <= best_match) */
|
|
Packit |
709fb3 |
continue;
|
|
Packit |
709fb3 |
assess_pattern_match:
|
|
Packit |
709fb3 |
if (!start_ptr)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Good enough for a non-exact match.
|
|
Packit |
709fb3 |
No need to look at further patterns, if any. */
|
|
Packit |
709fb3 |
goto success;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
if (match < best_match || (match == best_match && len > best_len))
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* Best exact match: leftmost, then longest. */
|
|
Packit |
709fb3 |
best_match = match;
|
|
Packit |
709fb3 |
best_len = len;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
} /* if re_search >= 0 */
|
|
Packit |
709fb3 |
} /* for Regex patterns. */
|
|
Packit |
709fb3 |
if (best_match < end)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* We have found an exact match. We were just
|
|
Packit |
709fb3 |
waiting for the best one (leftmost then longest). */
|
|
Packit |
709fb3 |
beg = best_match;
|
|
Packit |
709fb3 |
len = best_len;
|
|
Packit |
709fb3 |
goto success_in_len;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
} /* for (beg = end ..) */
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
failure:
|
|
Packit |
709fb3 |
return -1;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
success:
|
|
Packit |
709fb3 |
len = end - beg;
|
|
Packit |
709fb3 |
success_in_len:;
|
|
Packit |
709fb3 |
size_t off = beg - buf;
|
|
Packit |
709fb3 |
*match_size = len;
|
|
Packit |
709fb3 |
return off;
|
|
Packit |
709fb3 |
}
|