Blame src/pcresearch.c

Packit 709fb3
/* pcresearch.c - searching subroutines using PCRE for grep.
Packit 709fb3
   Copyright 2000, 2007, 2009-2017 Free Software Foundation, Inc.
Packit 709fb3
Packit 709fb3
   This program is free software; you can redistribute it and/or modify
Packit 709fb3
   it under the terms of the GNU General Public License as published by
Packit 709fb3
   the Free Software Foundation; either version 3, or (at your option)
Packit 709fb3
   any later version.
Packit 709fb3
Packit 709fb3
   This program is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 709fb3
   GNU General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public License
Packit 709fb3
   along with this program; if not, write to the Free Software
Packit 709fb3
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
Packit 709fb3
   02110-1301, USA.  */
Packit 709fb3
Packit 709fb3
/* Written August 1992 by Mike Haertel. */
Packit 709fb3
Packit 709fb3
#include <config.h>
Packit 709fb3
#include "search.h"
Packit 709fb3
#include "die.h"
Packit 709fb3
Packit 709fb3
#if HAVE_LIBPCRE
Packit 709fb3
# include <pcre.h>
Packit 709fb3
Packit 709fb3
/* This must be at least 2; everything after that is for performance
Packit 709fb3
   in pcre_exec.  */
Packit 709fb3
enum { NSUB = 300 };
Packit 709fb3
Packit 709fb3
# ifndef PCRE_STUDY_JIT_COMPILE
Packit 709fb3
#  define PCRE_STUDY_JIT_COMPILE 0
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
struct pcre_comp
Packit 709fb3
{
Packit 709fb3
  /* Compiled internal form of a Perl regular expression.  */
Packit 709fb3
  pcre *cre;
Packit 709fb3
Packit 709fb3
  /* Additional information about the pattern.  */
Packit 709fb3
  pcre_extra *extra;
Packit 709fb3
Packit 709fb3
# if PCRE_STUDY_JIT_COMPILE
Packit 709fb3
  /* The JIT stack and its maximum size.  */
Packit 709fb3
  pcre_jit_stack *jit_stack;
Packit 709fb3
  int jit_stack_size;
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
  /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
Packit 709fb3
     string matches when that flag is used.  */
Packit 709fb3
  int empty_match[2];
Packit 709fb3
};
Packit 709fb3
Packit 709fb3
Packit 709fb3
/* Match the already-compiled PCRE pattern against the data in SUBJECT,
Packit 709fb3
   of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
Packit 709fb3
   options OPTIONS, and storing resulting matches into SUB.  Return
Packit 709fb3
   the (nonnegative) match location or a (negative) error number.  */
Packit 709fb3
static int
Packit 709fb3
jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
Packit 709fb3
          int search_offset, int options, int *sub)
Packit 709fb3
{
Packit 709fb3
  while (true)
Packit 709fb3
    {
Packit 709fb3
      int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes,
Packit 709fb3
                         search_offset, options, sub, NSUB);
Packit 709fb3
Packit 709fb3
# if PCRE_STUDY_JIT_COMPILE
Packit 709fb3
      if (e == PCRE_ERROR_JIT_STACKLIMIT
Packit 709fb3
          && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
Packit 709fb3
        {
Packit 709fb3
          int old_size = pc->jit_stack_size;
Packit 709fb3
          int new_size = pc->jit_stack_size = old_size * 2;
Packit 709fb3
          if (pc->jit_stack)
Packit 709fb3
            pcre_jit_stack_free (pc->jit_stack);
Packit 709fb3
          pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size);
Packit 709fb3
          if (!pc->jit_stack)
Packit 709fb3
            die (EXIT_TROUBLE, 0,
Packit 709fb3
                 _("failed to allocate memory for the PCRE JIT stack"));
Packit 709fb3
          pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack);
Packit 709fb3
          continue;
Packit 709fb3
        }
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
      return e;
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
void *
Packit 709fb3
Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
Packit 709fb3
{
Packit 709fb3
#if !HAVE_LIBPCRE
Packit 709fb3
  die (EXIT_TROUBLE, 0,
Packit 709fb3
       _("support for the -P option is not compiled into "
Packit 709fb3
         "this --disable-perl-regexp binary"));
Packit 709fb3
#else
Packit 709fb3
  int e;
Packit 709fb3
  char const *ep;
Packit 709fb3
  static char const wprefix[] = "(?
Packit 709fb3
  static char const wsuffix[] = ")(?!\\w)";
Packit 709fb3
  static char const xprefix[] = "^(?:";
Packit 709fb3
  static char const xsuffix[] = ")$";
Packit 709fb3
  int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
Packit 709fb3
                         sizeof xprefix - 1 + sizeof xsuffix - 1);
Packit 709fb3
  char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
Packit 709fb3
  int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
Packit 709fb3
  char const *patlim = pattern + size;
Packit 709fb3
  char *n = re;
Packit 709fb3
  char const *p;
Packit 709fb3
  char const *pnul;
Packit 709fb3
  struct pcre_comp *pc = xcalloc (1, sizeof (*pc));
Packit 709fb3
Packit 709fb3
  if (localeinfo.multibyte)
Packit 709fb3
    {
Packit 709fb3
      if (! localeinfo.using_utf8)
Packit 709fb3
        die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
Packit 709fb3
      flags |= PCRE_UTF8;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* FIXME: Remove this restriction.  */
Packit 709fb3
  if (memchr (pattern, '\n', size))
Packit 709fb3
    die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
Packit 709fb3
Packit 709fb3
  *n = '\0';
Packit 709fb3
  if (match_words)
Packit 709fb3
    strcpy (n, wprefix);
Packit 709fb3
  if (match_lines)
Packit 709fb3
    strcpy (n, xprefix);
Packit 709fb3
  n += strlen (n);
Packit 709fb3
Packit 709fb3
  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
Packit 709fb3
     replace each NUL byte in the pattern with the four characters
Packit 709fb3
     "\000", removing a preceding backslash if there are an odd
Packit 709fb3
     number of backslashes before the NUL.  */
Packit 709fb3
  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
Packit 709fb3
    {
Packit 709fb3
      memcpy (n, p, pnul - p);
Packit 709fb3
      n += pnul - p;
Packit 709fb3
      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
Packit 709fb3
        continue;
Packit 709fb3
      n -= (pnul - p) & 1;
Packit 709fb3
      strcpy (n, "\\000");
Packit 709fb3
      n += 4;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  memcpy (n, p, patlim - p);
Packit 709fb3
  n += patlim - p;
Packit 709fb3
  *n = '\0';
Packit 709fb3
  if (match_words)
Packit 709fb3
    strcpy (n, wsuffix);
Packit 709fb3
  if (match_lines)
Packit 709fb3
    strcpy (n, xsuffix);
Packit 709fb3
Packit 709fb3
  pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
Packit 709fb3
  if (!pc->cre)
Packit 709fb3
    die (EXIT_TROUBLE, 0, "%s", ep);
Packit 709fb3
Packit 709fb3
  pc->extra = pcre_study (pc->cre, PCRE_STUDY_JIT_COMPILE, &ep);
Packit 709fb3
  if (ep)
Packit 709fb3
    die (EXIT_TROUBLE, 0, "%s", ep);
Packit 709fb3
Packit 709fb3
# if PCRE_STUDY_JIT_COMPILE
Packit 709fb3
  if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e))
Packit 709fb3
    die (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
Packit 709fb3
Packit 709fb3
  /* The PCRE documentation says that a 32 KiB stack is the default.  */
Packit 709fb3
  if (e)
Packit 709fb3
    pc->jit_stack_size = 32 << 10;
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
  free (re);
Packit 709fb3
Packit 709fb3
  int sub[NSUB];
Packit 709fb3
  pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0,
Packit 709fb3
                                      PCRE_NOTBOL, sub, NSUB);
Packit 709fb3
  pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub,
Packit 709fb3
                                     NSUB);
Packit 709fb3
Packit 709fb3
  return pc;
Packit 709fb3
#endif /* HAVE_LIBPCRE */
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
size_t
Packit 709fb3
Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
Packit 709fb3
          char const *start_ptr)
Packit 709fb3
{
Packit 709fb3
#if !HAVE_LIBPCRE
Packit 709fb3
  /* We can't get here, because Pcompile would have been called earlier.  */
Packit 709fb3
  die (EXIT_TROUBLE, 0, _("internal error"));
Packit 709fb3
#else
Packit 709fb3
  int sub[NSUB];
Packit 709fb3
  char const *p = start_ptr ? start_ptr : buf;
Packit 709fb3
  bool bol = p[-1] == eolbyte;
Packit 709fb3
  char const *line_start = buf;
Packit 709fb3
  int e = PCRE_ERROR_NOMATCH;
Packit 709fb3
  char const *line_end;
Packit 709fb3
  struct pcre_comp *pc = vcp;
Packit 709fb3
Packit 709fb3
  /* The search address to pass to pcre_exec.  This is the start of
Packit 709fb3
     the buffer, or just past the most-recently discovered encoding
Packit 709fb3
     error or line end.  */
Packit 709fb3
  char const *subject = buf;
Packit 709fb3
Packit 709fb3
  do
Packit 709fb3
    {
Packit 709fb3
      /* Search line by line.  Although this code formerly used
Packit 709fb3
         PCRE_MULTILINE for performance, the performance wasn't always
Packit 709fb3
         better and the correctness issues were too puzzling.  See
Packit 709fb3
         Bug#22655.  */
Packit 709fb3
      line_end = memchr (p, eolbyte, buf + size - p);
Packit 709fb3
      if (INT_MAX < line_end - p)
Packit 709fb3
        die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
Packit 709fb3
Packit 709fb3
      for (;;)
Packit 709fb3
        {
Packit 709fb3
          /* Skip past bytes that are easily determined to be encoding
Packit 709fb3
             errors, treating them as data that cannot match.  This is
Packit 709fb3
             faster than having pcre_exec check them.  */
Packit 709fb3
          while (localeinfo.sbclen[to_uchar (*p)] == -1)
Packit 709fb3
            {
Packit 709fb3
              p++;
Packit 709fb3
              subject = p;
Packit 709fb3
              bol = false;
Packit 709fb3
            }
Packit 709fb3
Packit 709fb3
          int search_offset = p - subject;
Packit 709fb3
Packit 709fb3
          /* Check for an empty match; this is faster than letting
Packit 709fb3
             pcre_exec do it.  */
Packit 709fb3
          if (p == line_end)
Packit 709fb3
            {
Packit 709fb3
              sub[0] = sub[1] = search_offset;
Packit 709fb3
              e = pc->empty_match[bol];
Packit 709fb3
              break;
Packit 709fb3
            }
Packit 709fb3
Packit 709fb3
          int options = 0;
Packit 709fb3
          if (!bol)
Packit 709fb3
            options |= PCRE_NOTBOL;
Packit 709fb3
Packit 709fb3
          e = jit_exec (pc, subject, line_end - subject, search_offset,
Packit 709fb3
                        options, sub);
Packit 709fb3
          if (e != PCRE_ERROR_BADUTF8)
Packit 709fb3
            break;
Packit 709fb3
          int valid_bytes = sub[0];
Packit 709fb3
Packit 709fb3
          if (search_offset <= valid_bytes)
Packit 709fb3
            {
Packit 709fb3
              /* Try to match the string before the encoding error.  */
Packit 709fb3
              if (valid_bytes == 0)
Packit 709fb3
                {
Packit 709fb3
                  /* Handle the empty-match case specially, for speed.
Packit 709fb3
                     This optimization is valid if VALID_BYTES is zero,
Packit 709fb3
                     which means SEARCH_OFFSET is also zero.  */
Packit 709fb3
                  sub[1] = 0;
Packit 709fb3
                  e = pc->empty_match[bol];
Packit 709fb3
                }
Packit 709fb3
              else
Packit 709fb3
                e = jit_exec (pc, subject, valid_bytes, search_offset,
Packit 709fb3
                              options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
Packit 709fb3
Packit 709fb3
              if (e != PCRE_ERROR_NOMATCH)
Packit 709fb3
                break;
Packit 709fb3
Packit 709fb3
              /* Treat the encoding error as data that cannot match.  */
Packit 709fb3
              p = subject + valid_bytes + 1;
Packit 709fb3
              bol = false;
Packit 709fb3
            }
Packit 709fb3
Packit 709fb3
          subject += valid_bytes + 1;
Packit 709fb3
        }
Packit 709fb3
Packit 709fb3
      if (e != PCRE_ERROR_NOMATCH)
Packit 709fb3
        break;
Packit 709fb3
      bol = true;
Packit 709fb3
      p = subject = line_start = line_end + 1;
Packit 709fb3
    }
Packit 709fb3
  while (p < buf + size);
Packit 709fb3
Packit 709fb3
  if (e <= 0)
Packit 709fb3
    {
Packit 709fb3
      switch (e)
Packit 709fb3
        {
Packit 709fb3
        case PCRE_ERROR_NOMATCH:
Packit 709fb3
          break;
Packit 709fb3
Packit 709fb3
        case PCRE_ERROR_NOMEMORY:
Packit 709fb3
          die (EXIT_TROUBLE, 0, _("memory exhausted"));
Packit 709fb3
Packit 709fb3
# if PCRE_STUDY_JIT_COMPILE
Packit 709fb3
        case PCRE_ERROR_JIT_STACKLIMIT:
Packit 709fb3
          die (EXIT_TROUBLE, 0, _("exhausted PCRE JIT stack"));
Packit 709fb3
# endif
Packit 709fb3
Packit 709fb3
        case PCRE_ERROR_MATCHLIMIT:
Packit 709fb3
          die (EXIT_TROUBLE, 0, _("exceeded PCRE's backtracking limit"));
Packit 709fb3
Packit 709fb3
        default:
Packit 709fb3
          /* For now, we lump all remaining PCRE failures into this basket.
Packit 709fb3
             If anyone cares to provide sample grep usage that can trigger
Packit 709fb3
             particular PCRE errors, we can add to the list (above) of more
Packit 709fb3
             detailed diagnostics.  */
Packit 709fb3
          die (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);
Packit 709fb3
        }
Packit 709fb3
Packit 709fb3
      return -1;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      char const *matchbeg = subject + sub[0];
Packit 709fb3
      char const *matchend = subject + sub[1];
Packit 709fb3
      char const *beg;
Packit 709fb3
      char const *end;
Packit 709fb3
      if (start_ptr)
Packit 709fb3
        {
Packit 709fb3
          beg = matchbeg;
Packit 709fb3
          end = matchend;
Packit 709fb3
        }
Packit 709fb3
      else
Packit 709fb3
        {
Packit 709fb3
          beg = line_start;
Packit 709fb3
          end = line_end + 1;
Packit 709fb3
        }
Packit 709fb3
      *match_size = end - beg;
Packit 709fb3
      return beg - buf;
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
}