Blame glib/pcre/pcre_study.c

Packit ae235b
/*************************************************
Packit ae235b
*      Perl-Compatible Regular Expressions       *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* PCRE is a library of functions to support regular expressions whose syntax
Packit ae235b
and semantics are as close as possible to those of the Perl 5 language.
Packit ae235b
Packit ae235b
                       Written by Philip Hazel
Packit ae235b
           Copyright (c) 1997-2012 University of Cambridge
Packit ae235b
Packit ae235b
-----------------------------------------------------------------------------
Packit ae235b
Redistribution and use in source and binary forms, with or without
Packit ae235b
modification, are permitted provided that the following conditions are met:
Packit ae235b
Packit ae235b
    * Redistributions of source code must retain the above copyright notice,
Packit ae235b
      this list of conditions and the following disclaimer.
Packit ae235b
Packit ae235b
    * Redistributions in binary form must reproduce the above copyright
Packit ae235b
      notice, this list of conditions and the following disclaimer in the
Packit ae235b
      documentation and/or other materials provided with the distribution.
Packit ae235b
Packit ae235b
    * Neither the name of the University of Cambridge nor the names of its
Packit ae235b
      contributors may be used to endorse or promote products derived from
Packit ae235b
      this software without specific prior written permission.
Packit ae235b
Packit ae235b
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit ae235b
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit ae235b
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit ae235b
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit ae235b
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit ae235b
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit ae235b
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit ae235b
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit ae235b
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit ae235b
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit ae235b
POSSIBILITY OF SUCH DAMAGE.
Packit ae235b
-----------------------------------------------------------------------------
Packit ae235b
*/
Packit ae235b
Packit ae235b
Packit ae235b
/* This module contains the external function pcre_study(), along with local
Packit ae235b
supporting functions. */
Packit ae235b
Packit ae235b
Packit ae235b
#include "config.h"
Packit ae235b
Packit ae235b
#include "pcre_internal.h"
Packit ae235b
Packit ae235b
#define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
Packit ae235b
Packit ae235b
/* Returns from set_start_bits() */
Packit ae235b
Packit ae235b
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*   Find the minimum subject length for a group  *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* Scan a parenthesized group and compute the minimum length of subject that
Packit ae235b
is needed to match it. This is a lower bound; it does not mean there is a
Packit ae235b
string of that length that matches. In UTF8 mode, the result is in characters
Packit ae235b
rather than bytes.
Packit ae235b
Packit ae235b
Arguments:
Packit ae235b
  code            pointer to start of group (the bracket)
Packit ae235b
  startcode       pointer to start of the whole pattern
Packit ae235b
  options         the compiling options
Packit ae235b
  int             RECURSE depth
Packit ae235b
Packit ae235b
Returns:   the minimum length
Packit ae235b
           -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
Packit ae235b
           -2 internal error (missing capturing bracket)
Packit ae235b
           -3 internal error (opcode not listed)
Packit ae235b
*/
Packit ae235b
Packit ae235b
static int
Packit ae235b
find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
Packit ae235b
  int recurse_depth)
Packit ae235b
{
Packit ae235b
int length = -1;
Packit ae235b
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
Packit ae235b
BOOL utf = (options & PCRE_UTF8) != 0;
Packit ae235b
BOOL had_recurse = FALSE;
Packit ae235b
int branchlength = 0;
Packit ae235b
pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
Packit ae235b
Packit ae235b
if (*code == OP_CBRA || *code == OP_SCBRA ||
Packit ae235b
    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
Packit ae235b
Packit ae235b
/* Scan along the opcodes for this branch. If we get to the end of the
Packit ae235b
branch, check the length against that of the other branches. */
Packit ae235b
Packit ae235b
for (;;)
Packit ae235b
  {
Packit ae235b
  int d, min;
Packit ae235b
  pcre_uchar *cs, *ce;
Packit ae235b
  int op = *cc;
Packit ae235b
Packit ae235b
  switch (op)
Packit ae235b
    {
Packit ae235b
    case OP_COND:
Packit ae235b
    case OP_SCOND:
Packit ae235b
Packit ae235b
    /* If there is only one branch in a condition, the implied branch has zero
Packit ae235b
    length, so we don't add anything. This covers the DEFINE "condition"
Packit ae235b
    automatically. */
Packit ae235b
Packit ae235b
    cs = cc + GET(cc, 1);
Packit ae235b
    if (*cs != OP_ALT)
Packit ae235b
      {
Packit ae235b
      cc = cs + 1 + LINK_SIZE;
Packit ae235b
      break;
Packit ae235b
      }
Packit ae235b
Packit ae235b
    /* Otherwise we can fall through and treat it the same as any other
Packit ae235b
    subpattern. */
Packit ae235b
Packit ae235b
    case OP_CBRA:
Packit ae235b
    case OP_SCBRA:
Packit ae235b
    case OP_BRA:
Packit ae235b
    case OP_SBRA:
Packit ae235b
    case OP_CBRAPOS:
Packit ae235b
    case OP_SCBRAPOS:
Packit ae235b
    case OP_BRAPOS:
Packit ae235b
    case OP_SBRAPOS:
Packit ae235b
    case OP_ONCE:
Packit ae235b
    case OP_ONCE_NC:
Packit ae235b
    d = find_minlength(cc, startcode, options, recurse_depth);
Packit ae235b
    if (d < 0) return d;
Packit ae235b
    branchlength += d;
Packit ae235b
    do cc += GET(cc, 1); while (*cc == OP_ALT);
Packit ae235b
    cc += 1 + LINK_SIZE;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* ACCEPT makes things far too complicated; we have to give up. */
Packit ae235b
Packit ae235b
    case OP_ACCEPT:
Packit ae235b
    case OP_ASSERT_ACCEPT:
Packit ae235b
    return -1;
Packit ae235b
Packit ae235b
    /* Reached end of a branch; if it's a ket it is the end of a nested
Packit ae235b
    call. If it's ALT it is an alternation in a nested call. If it is END it's
Packit ae235b
    the end of the outer call. All can be handled by the same code. If an
Packit ae235b
    ACCEPT was previously encountered, use the length that was in force at that
Packit ae235b
    time, and pass back the shortest ACCEPT length. */
Packit ae235b
Packit ae235b
    case OP_ALT:
Packit ae235b
    case OP_KET:
Packit ae235b
    case OP_KETRMAX:
Packit ae235b
    case OP_KETRMIN:
Packit ae235b
    case OP_KETRPOS:
Packit ae235b
    case OP_END:
Packit ae235b
    if (length < 0 || (!had_recurse && branchlength < length))
Packit ae235b
      length = branchlength;
Packit ae235b
    if (op != OP_ALT) return length;
Packit ae235b
    cc += 1 + LINK_SIZE;
Packit ae235b
    branchlength = 0;
Packit ae235b
    had_recurse = FALSE;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Skip over assertive subpatterns */
Packit ae235b
Packit ae235b
    case OP_ASSERT:
Packit ae235b
    case OP_ASSERT_NOT:
Packit ae235b
    case OP_ASSERTBACK:
Packit ae235b
    case OP_ASSERTBACK_NOT:
Packit ae235b
    do cc += GET(cc, 1); while (*cc == OP_ALT);
Packit ae235b
    /* Fall through */
Packit ae235b
Packit ae235b
    /* Skip over things that don't match chars */
Packit ae235b
Packit ae235b
    case OP_REVERSE:
Packit ae235b
    case OP_CREF:
Packit ae235b
    case OP_NCREF:
Packit ae235b
    case OP_RREF:
Packit ae235b
    case OP_NRREF:
Packit ae235b
    case OP_DEF:
Packit ae235b
    case OP_CALLOUT:
Packit ae235b
    case OP_SOD:
Packit ae235b
    case OP_SOM:
Packit ae235b
    case OP_EOD:
Packit ae235b
    case OP_EODN:
Packit ae235b
    case OP_CIRC:
Packit ae235b
    case OP_CIRCM:
Packit ae235b
    case OP_DOLL:
Packit ae235b
    case OP_DOLLM:
Packit ae235b
    case OP_NOT_WORD_BOUNDARY:
Packit ae235b
    case OP_WORD_BOUNDARY:
Packit ae235b
    cc += PRIV(OP_lengths)[*cc];
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Skip over a subpattern that has a {0} or {0,x} quantifier */
Packit ae235b
Packit ae235b
    case OP_BRAZERO:
Packit ae235b
    case OP_BRAMINZERO:
Packit ae235b
    case OP_BRAPOSZERO:
Packit ae235b
    case OP_SKIPZERO:
Packit ae235b
    cc += PRIV(OP_lengths)[*cc];
Packit ae235b
    do cc += GET(cc, 1); while (*cc == OP_ALT);
Packit ae235b
    cc += 1 + LINK_SIZE;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Handle literal characters and + repetitions */
Packit ae235b
Packit ae235b
    case OP_CHAR:
Packit ae235b
    case OP_CHARI:
Packit ae235b
    case OP_NOT:
Packit ae235b
    case OP_NOTI:
Packit ae235b
    case OP_PLUS:
Packit ae235b
    case OP_PLUSI:
Packit ae235b
    case OP_MINPLUS:
Packit ae235b
    case OP_MINPLUSI:
Packit ae235b
    case OP_POSPLUS:
Packit ae235b
    case OP_POSPLUSI:
Packit ae235b
    case OP_NOTPLUS:
Packit ae235b
    case OP_NOTPLUSI:
Packit ae235b
    case OP_NOTMINPLUS:
Packit ae235b
    case OP_NOTMINPLUSI:
Packit ae235b
    case OP_NOTPOSPLUS:
Packit ae235b
    case OP_NOTPOSPLUSI:
Packit ae235b
    branchlength++;
Packit ae235b
    cc += 2;
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
Packit ae235b
#endif
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    case OP_TYPEPLUS:
Packit ae235b
    case OP_TYPEMINPLUS:
Packit ae235b
    case OP_TYPEPOSPLUS:
Packit ae235b
    branchlength++;
Packit ae235b
    cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Handle exact repetitions. The count is already in characters, but we
Packit ae235b
    need to skip over a multibyte character in UTF8 mode.  */
Packit ae235b
Packit ae235b
    case OP_EXACT:
Packit ae235b
    case OP_EXACTI:
Packit ae235b
    case OP_NOTEXACT:
Packit ae235b
    case OP_NOTEXACTI:
Packit ae235b
    branchlength += GET2(cc,1);
Packit ae235b
    cc += 2 + IMM2_SIZE;
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
Packit ae235b
#endif
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    case OP_TYPEEXACT:
Packit ae235b
    branchlength += GET2(cc,1);
Packit ae235b
    cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
Packit ae235b
      || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Handle single-char non-literal matchers */
Packit ae235b
Packit ae235b
    case OP_PROP:
Packit ae235b
    case OP_NOTPROP:
Packit ae235b
    cc += 2;
Packit ae235b
    /* Fall through */
Packit ae235b
Packit ae235b
    case OP_NOT_DIGIT:
Packit ae235b
    case OP_DIGIT:
Packit ae235b
    case OP_NOT_WHITESPACE:
Packit ae235b
    case OP_WHITESPACE:
Packit ae235b
    case OP_NOT_WORDCHAR:
Packit ae235b
    case OP_WORDCHAR:
Packit ae235b
    case OP_ANY:
Packit ae235b
    case OP_ALLANY:
Packit ae235b
    case OP_EXTUNI:
Packit ae235b
    case OP_HSPACE:
Packit ae235b
    case OP_NOT_HSPACE:
Packit ae235b
    case OP_VSPACE:
Packit ae235b
    case OP_NOT_VSPACE:
Packit ae235b
    branchlength++;
Packit ae235b
    cc++;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* "Any newline" might match two characters, but it also might match just
Packit ae235b
    one. */
Packit ae235b
Packit ae235b
    case OP_ANYNL:
Packit ae235b
    branchlength += 1;
Packit ae235b
    cc++;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* The single-byte matcher means we can't proceed in UTF-8 mode. (In
Packit ae235b
    non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever
Packit ae235b
    appear, but leave the code, just in case.) */
Packit ae235b
Packit ae235b
    case OP_ANYBYTE:
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
    if (utf) return -1;
Packit ae235b
#endif
Packit ae235b
    branchlength++;
Packit ae235b
    cc++;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* For repeated character types, we have to test for \p and \P, which have
Packit ae235b
    an extra two bytes of parameters. */
Packit ae235b
Packit ae235b
    case OP_TYPESTAR:
Packit ae235b
    case OP_TYPEMINSTAR:
Packit ae235b
    case OP_TYPEQUERY:
Packit ae235b
    case OP_TYPEMINQUERY:
Packit ae235b
    case OP_TYPEPOSSTAR:
Packit ae235b
    case OP_TYPEPOSQUERY:
Packit ae235b
    if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
Packit ae235b
    cc += PRIV(OP_lengths)[op];
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    case OP_TYPEUPTO:
Packit ae235b
    case OP_TYPEMINUPTO:
Packit ae235b
    case OP_TYPEPOSUPTO:
Packit ae235b
    if (cc[1 + IMM2_SIZE] == OP_PROP
Packit ae235b
      || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
Packit ae235b
    cc += PRIV(OP_lengths)[op];
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Check a class for variable quantification */
Packit ae235b
Packit ae235b
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
Packit ae235b
    case OP_XCLASS:
Packit ae235b
    cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
Packit ae235b
    /* Fall through */
Packit ae235b
#endif
Packit ae235b
Packit ae235b
    case OP_CLASS:
Packit ae235b
    case OP_NCLASS:
Packit ae235b
    cc += PRIV(OP_lengths)[OP_CLASS];
Packit ae235b
Packit ae235b
    switch (*cc)
Packit ae235b
      {
Packit ae235b
      case OP_CRPLUS:
Packit ae235b
      case OP_CRMINPLUS:
Packit ae235b
      branchlength++;
Packit ae235b
      /* Fall through */
Packit ae235b
Packit ae235b
      case OP_CRSTAR:
Packit ae235b
      case OP_CRMINSTAR:
Packit ae235b
      case OP_CRQUERY:
Packit ae235b
      case OP_CRMINQUERY:
Packit ae235b
      cc++;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_CRRANGE:
Packit ae235b
      case OP_CRMINRANGE:
Packit ae235b
      branchlength += GET2(cc,1);
Packit ae235b
      cc += 1 + 2 * IMM2_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      default:
Packit ae235b
      branchlength++;
Packit ae235b
      break;
Packit ae235b
      }
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Backreferences and subroutine calls are treated in the same way: we find
Packit ae235b
    the minimum length for the subpattern. A recursion, however, causes an
Packit ae235b
    a flag to be set that causes the length of this branch to be ignored. The
Packit ae235b
    logic is that a recursion can only make sense if there is another
Packit ae235b
    alternation that stops the recursing. That will provide the minimum length
Packit ae235b
    (when no recursion happens). A backreference within the group that it is
Packit ae235b
    referencing behaves in the same way.
Packit ae235b
Packit ae235b
    If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
Packit ae235b
    matches an empty string (by default it causes a matching failure), so in
Packit ae235b
    that case we must set the minimum length to zero. */
Packit ae235b
Packit ae235b
    case OP_REF:
Packit ae235b
    case OP_REFI:
Packit ae235b
    if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
Packit ae235b
      {
Packit ae235b
      ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
Packit ae235b
      if (cs == NULL) return -2;
Packit ae235b
      do ce += GET(ce, 1); while (*ce == OP_ALT);
Packit ae235b
      if (cc > cs && cc < ce)
Packit ae235b
        {
Packit ae235b
        d = 0;
Packit ae235b
        had_recurse = TRUE;
Packit ae235b
        }
Packit ae235b
      else
Packit ae235b
        {
Packit ae235b
        d = find_minlength(cs, startcode, options, recurse_depth);
Packit ae235b
        }
Packit ae235b
      }
Packit ae235b
    else d = 0;
Packit ae235b
    cc += 1 + IMM2_SIZE;
Packit ae235b
Packit ae235b
    /* Handle repeated back references */
Packit ae235b
Packit ae235b
    switch (*cc)
Packit ae235b
      {
Packit ae235b
      case OP_CRSTAR:
Packit ae235b
      case OP_CRMINSTAR:
Packit ae235b
      case OP_CRQUERY:
Packit ae235b
      case OP_CRMINQUERY:
Packit ae235b
      min = 0;
Packit ae235b
      cc++;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_CRPLUS:
Packit ae235b
      case OP_CRMINPLUS:
Packit ae235b
      min = 1;
Packit ae235b
      cc++;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_CRRANGE:
Packit ae235b
      case OP_CRMINRANGE:
Packit ae235b
      min = GET2(cc, 1);
Packit ae235b
      cc += 1 + 2 * IMM2_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      default:
Packit ae235b
      min = 1;
Packit ae235b
      break;
Packit ae235b
      }
Packit ae235b
Packit ae235b
    branchlength += min * d;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* We can easily detect direct recursion, but not mutual recursion. This is
Packit ae235b
    caught by a recursion depth count. */
Packit ae235b
Packit ae235b
    case OP_RECURSE:
Packit ae235b
    cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
Packit ae235b
    do ce += GET(ce, 1); while (*ce == OP_ALT);
Packit ae235b
    if ((cc > cs && cc < ce) || recurse_depth > 10)
Packit ae235b
      had_recurse = TRUE;
Packit ae235b
    else
Packit ae235b
      {
Packit ae235b
      branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
Packit ae235b
      }
Packit ae235b
    cc += 1 + LINK_SIZE;
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Anything else does not or need not match a character. We can get the
Packit ae235b
    item's length from the table, but for those that can match zero occurrences
Packit ae235b
    of a character, we must take special action for UTF-8 characters. As it
Packit ae235b
    happens, the "NOT" versions of these opcodes are used at present only for
Packit ae235b
    ASCII characters, so they could be omitted from this list. However, in
Packit ae235b
    future that may change, so we include them here so as not to leave a
Packit ae235b
    gotcha for a future maintainer. */
Packit ae235b
Packit ae235b
    case OP_UPTO:
Packit ae235b
    case OP_UPTOI:
Packit ae235b
    case OP_NOTUPTO:
Packit ae235b
    case OP_NOTUPTOI:
Packit ae235b
    case OP_MINUPTO:
Packit ae235b
    case OP_MINUPTOI:
Packit ae235b
    case OP_NOTMINUPTO:
Packit ae235b
    case OP_NOTMINUPTOI:
Packit ae235b
    case OP_POSUPTO:
Packit ae235b
    case OP_POSUPTOI:
Packit ae235b
    case OP_NOTPOSUPTO:
Packit ae235b
    case OP_NOTPOSUPTOI:
Packit ae235b
Packit ae235b
    case OP_STAR:
Packit ae235b
    case OP_STARI:
Packit ae235b
    case OP_NOTSTAR:
Packit ae235b
    case OP_NOTSTARI:
Packit ae235b
    case OP_MINSTAR:
Packit ae235b
    case OP_MINSTARI:
Packit ae235b
    case OP_NOTMINSTAR:
Packit ae235b
    case OP_NOTMINSTARI:
Packit ae235b
    case OP_POSSTAR:
Packit ae235b
    case OP_POSSTARI:
Packit ae235b
    case OP_NOTPOSSTAR:
Packit ae235b
    case OP_NOTPOSSTARI:
Packit ae235b
Packit ae235b
    case OP_QUERY:
Packit ae235b
    case OP_QUERYI:
Packit ae235b
    case OP_NOTQUERY:
Packit ae235b
    case OP_NOTQUERYI:
Packit ae235b
    case OP_MINQUERY:
Packit ae235b
    case OP_MINQUERYI:
Packit ae235b
    case OP_NOTMINQUERY:
Packit ae235b
    case OP_NOTMINQUERYI:
Packit ae235b
    case OP_POSQUERY:
Packit ae235b
    case OP_POSQUERYI:
Packit ae235b
    case OP_NOTPOSQUERY:
Packit ae235b
    case OP_NOTPOSQUERYI:
Packit ae235b
Packit ae235b
    cc += PRIV(OP_lengths)[op];
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
Packit ae235b
#endif
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* Skip these, but we need to add in the name length. */
Packit ae235b
Packit ae235b
    case OP_MARK:
Packit ae235b
    case OP_PRUNE_ARG:
Packit ae235b
    case OP_SKIP_ARG:
Packit ae235b
    case OP_THEN_ARG:
Packit ae235b
    cc += PRIV(OP_lengths)[op] + cc[1];
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* The remaining opcodes are just skipped over. */
Packit ae235b
Packit ae235b
    case OP_CLOSE:
Packit ae235b
    case OP_COMMIT:
Packit ae235b
    case OP_FAIL:
Packit ae235b
    case OP_PRUNE:
Packit ae235b
    case OP_SET_SOM:
Packit ae235b
    case OP_SKIP:
Packit ae235b
    case OP_THEN:
Packit ae235b
    cc += PRIV(OP_lengths)[op];
Packit ae235b
    break;
Packit ae235b
Packit ae235b
    /* This should not occur: we list all opcodes explicitly so that when
Packit ae235b
    new ones get added they are properly considered. */
Packit ae235b
Packit ae235b
    default:
Packit ae235b
    return -3;
Packit ae235b
    }
Packit ae235b
  }
Packit ae235b
/* Control never gets here */
Packit ae235b
}
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*      Set a bit and maybe its alternate case    *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* Given a character, set its first byte's bit in the table, and also the
Packit ae235b
corresponding bit for the other version of a letter if we are caseless. In
Packit ae235b
UTF-8 mode, for characters greater than 127, we can only do the caseless thing
Packit ae235b
when Unicode property support is available.
Packit ae235b
Packit ae235b
Arguments:
Packit ae235b
  start_bits    points to the bit map
Packit ae235b
  p             points to the character
Packit ae235b
  caseless      the caseless flag
Packit ae235b
  cd            the block with char table pointers
Packit ae235b
  utf           TRUE for UTF-8 / UTF-16 mode
Packit ae235b
Packit ae235b
Returns:        pointer after the character
Packit ae235b
*/
Packit ae235b
Packit ae235b
static const pcre_uchar *
Packit ae235b
set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
Packit ae235b
  compile_data *cd, BOOL utf)
Packit ae235b
{
Packit ae235b
unsigned int c = *p;
Packit ae235b
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
SET_BIT(c);
Packit ae235b
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
if (utf && c > 127)
Packit ae235b
  {
Packit ae235b
  GETCHARINC(c, p);
Packit ae235b
#ifdef SUPPORT_UCP
Packit ae235b
  if (caseless)
Packit ae235b
    {
Packit ae235b
    pcre_uchar buff[6];
Packit ae235b
    c = UCD_OTHERCASE(c);
Packit ae235b
    (void)PRIV(ord2utf)(c, buff);
Packit ae235b
    SET_BIT(buff[0]);
Packit ae235b
    }
Packit ae235b
#endif
Packit ae235b
  return p;
Packit ae235b
  }
Packit ae235b
#endif
Packit ae235b
Packit ae235b
/* Not UTF-8 mode, or character is less than 127. */
Packit ae235b
Packit ae235b
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
Packit ae235b
return p + 1;
Packit ae235b
#endif
Packit ae235b
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
if (c > 0xff)
Packit ae235b
  {
Packit ae235b
  c = 0xff;
Packit ae235b
  caseless = FALSE;
Packit ae235b
  }
Packit ae235b
SET_BIT(c);
Packit ae235b
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
if (utf && c > 127)
Packit ae235b
  {
Packit ae235b
  GETCHARINC(c, p);
Packit ae235b
#ifdef SUPPORT_UCP
Packit ae235b
  if (caseless)
Packit ae235b
    {
Packit ae235b
    c = UCD_OTHERCASE(c);
Packit ae235b
    if (c > 0xff)
Packit ae235b
      c = 0xff;
Packit ae235b
    SET_BIT(c);
Packit ae235b
    }
Packit ae235b
#endif
Packit ae235b
  return p;
Packit ae235b
  }
Packit ae235b
#endif
Packit ae235b
Packit ae235b
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
Packit ae235b
return p + 1;
Packit ae235b
#endif
Packit ae235b
}
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*     Set bits for a positive character type     *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* This function sets starting bits for a character type. In UTF-8 mode, we can
Packit ae235b
only do a direct setting for bytes less than 128, as otherwise there can be
Packit ae235b
confusion with bytes in the middle of UTF-8 characters. In a "traditional"
Packit ae235b
environment, the tables will only recognize ASCII characters anyway, but in at
Packit ae235b
least one Windows environment, some higher bytes bits were set in the tables.
Packit ae235b
So we deal with that case by considering the UTF-8 encoding.
Packit ae235b
Packit ae235b
Arguments:
Packit ae235b
  start_bits     the starting bitmap
Packit ae235b
  cbit type      the type of character wanted
Packit ae235b
  table_limit    32 for non-UTF-8; 16 for UTF-8
Packit ae235b
  cd             the block with char table pointers
Packit ae235b
Packit ae235b
Returns:         nothing
Packit ae235b
*/
Packit ae235b
Packit ae235b
static void
Packit ae235b
set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
Packit ae235b
  compile_data *cd)
Packit ae235b
{
Packit ae235b
int c;
Packit ae235b
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
Packit ae235b
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
Packit ae235b
if (table_limit == 32) return;
Packit ae235b
for (c = 128; c < 256; c++)
Packit ae235b
  {
Packit ae235b
  if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
Packit ae235b
    {
Packit ae235b
    pcre_uchar buff[6];
Packit ae235b
    (void)PRIV(ord2utf)(c, buff);
Packit ae235b
    SET_BIT(buff[0]);
Packit ae235b
    }
Packit ae235b
  }
Packit ae235b
#endif
Packit ae235b
}
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*     Set bits for a negative character type     *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* This function sets starting bits for a negative character type such as \D.
Packit ae235b
In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
Packit ae235b
otherwise there can be confusion with bytes in the middle of UTF-8 characters.
Packit ae235b
Unlike in the positive case, where we can set appropriate starting bits for
Packit ae235b
specific high-valued UTF-8 characters, in this case we have to set the bits for
Packit ae235b
all high-valued characters. The lowest is 0xc2, but we overkill by starting at
Packit ae235b
0xc0 (192) for simplicity.
Packit ae235b
Packit ae235b
Arguments:
Packit ae235b
  start_bits     the starting bitmap
Packit ae235b
  cbit type      the type of character wanted
Packit ae235b
  table_limit    32 for non-UTF-8; 16 for UTF-8
Packit ae235b
  cd             the block with char table pointers
Packit ae235b
Packit ae235b
Returns:         nothing
Packit ae235b
*/
Packit ae235b
Packit ae235b
static void
Packit ae235b
set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
Packit ae235b
  compile_data *cd)
Packit ae235b
{
Packit ae235b
int c;
Packit ae235b
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
Packit ae235b
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
Packit ae235b
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
Packit ae235b
#endif
Packit ae235b
}
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*          Create bitmap of starting bytes       *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* This function scans a compiled unanchored expression recursively and
Packit ae235b
attempts to build a bitmap of the set of possible starting bytes. As time goes
Packit ae235b
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
Packit ae235b
useful for parenthesized groups in patterns such as (a*)b where the group
Packit ae235b
provides some optional starting bytes but scanning must continue at the outer
Packit ae235b
level to find at least one mandatory byte. At the outermost level, this
Packit ae235b
function fails unless the result is SSB_DONE.
Packit ae235b
Packit ae235b
Arguments:
Packit ae235b
  code         points to an expression
Packit ae235b
  start_bits   points to a 32-byte table, initialized to 0
Packit ae235b
  utf          TRUE if in UTF-8 / UTF-16 mode
Packit ae235b
  cd           the block with char table pointers
Packit ae235b
Packit ae235b
Returns:       SSB_FAIL     => Failed to find any starting bytes
Packit ae235b
               SSB_DONE     => Found mandatory starting bytes
Packit ae235b
               SSB_CONTINUE => Found optional starting bytes
Packit ae235b
               SSB_UNKNOWN  => Hit an unrecognized opcode
Packit ae235b
*/
Packit ae235b
Packit ae235b
static int
Packit ae235b
set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
Packit ae235b
  compile_data *cd)
Packit ae235b
{
Packit ae235b
int c;
Packit ae235b
int yield = SSB_DONE;
Packit ae235b
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
Packit ae235b
int table_limit = utf? 16:32;
Packit ae235b
#else
Packit ae235b
int table_limit = 32;
Packit ae235b
#endif
Packit ae235b
Packit ae235b
#if 0
Packit ae235b
/* ========================================================================= */
Packit ae235b
/* The following comment and code was inserted in January 1999. In May 2006,
Packit ae235b
when it was observed to cause compiler warnings about unused values, I took it
Packit ae235b
out again. If anybody is still using OS/2, they will have to put it back
Packit ae235b
manually. */
Packit ae235b
Packit ae235b
/* This next statement and the later reference to dummy are here in order to
Packit ae235b
trick the optimizer of the IBM C compiler for OS/2 into generating correct
Packit ae235b
code. Apparently IBM isn't going to fix the problem, and we would rather not
Packit ae235b
disable optimization (in this module it actually makes a big difference, and
Packit ae235b
the pcre module can use all the optimization it can get). */
Packit ae235b
Packit ae235b
volatile int dummy;
Packit ae235b
/* ========================================================================= */
Packit ae235b
#endif
Packit ae235b
Packit ae235b
do
Packit ae235b
  {
Packit ae235b
  BOOL try_next = TRUE;
Packit ae235b
  const pcre_uchar *tcode = code + 1 + LINK_SIZE;
Packit ae235b
Packit ae235b
  if (*code == OP_CBRA || *code == OP_SCBRA ||
Packit ae235b
      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
Packit ae235b
Packit ae235b
  while (try_next)    /* Loop for items in this branch */
Packit ae235b
    {
Packit ae235b
    int rc;
Packit ae235b
Packit ae235b
    switch(*tcode)
Packit ae235b
      {
Packit ae235b
      /* If we reach something we don't understand, it means a new opcode has
Packit ae235b
      been created that hasn't been added to this code. Hopefully this problem
Packit ae235b
      will be discovered during testing. */
Packit ae235b
Packit ae235b
      default:
Packit ae235b
      return SSB_UNKNOWN;
Packit ae235b
Packit ae235b
      /* Fail for a valid opcode that implies no starting bits. */
Packit ae235b
Packit ae235b
      case OP_ACCEPT:
Packit ae235b
      case OP_ASSERT_ACCEPT:
Packit ae235b
      case OP_ALLANY:
Packit ae235b
      case OP_ANY:
Packit ae235b
      case OP_ANYBYTE:
Packit ae235b
      case OP_CIRC:
Packit ae235b
      case OP_CIRCM:
Packit ae235b
      case OP_CLOSE:
Packit ae235b
      case OP_COMMIT:
Packit ae235b
      case OP_COND:
Packit ae235b
      case OP_CREF:
Packit ae235b
      case OP_DEF:
Packit ae235b
      case OP_DOLL:
Packit ae235b
      case OP_DOLLM:
Packit ae235b
      case OP_END:
Packit ae235b
      case OP_EOD:
Packit ae235b
      case OP_EODN:
Packit ae235b
      case OP_EXTUNI:
Packit ae235b
      case OP_FAIL:
Packit ae235b
      case OP_MARK:
Packit ae235b
      case OP_NCREF:
Packit ae235b
      case OP_NOT:
Packit ae235b
      case OP_NOTEXACT:
Packit ae235b
      case OP_NOTEXACTI:
Packit ae235b
      case OP_NOTI:
Packit ae235b
      case OP_NOTMINPLUS:
Packit ae235b
      case OP_NOTMINPLUSI:
Packit ae235b
      case OP_NOTMINQUERY:
Packit ae235b
      case OP_NOTMINQUERYI:
Packit ae235b
      case OP_NOTMINSTAR:
Packit ae235b
      case OP_NOTMINSTARI:
Packit ae235b
      case OP_NOTMINUPTO:
Packit ae235b
      case OP_NOTMINUPTOI:
Packit ae235b
      case OP_NOTPLUS:
Packit ae235b
      case OP_NOTPLUSI:
Packit ae235b
      case OP_NOTPOSPLUS:
Packit ae235b
      case OP_NOTPOSPLUSI:
Packit ae235b
      case OP_NOTPOSQUERY:
Packit ae235b
      case OP_NOTPOSQUERYI:
Packit ae235b
      case OP_NOTPOSSTAR:
Packit ae235b
      case OP_NOTPOSSTARI:
Packit ae235b
      case OP_NOTPOSUPTO:
Packit ae235b
      case OP_NOTPOSUPTOI:
Packit ae235b
      case OP_NOTPROP:
Packit ae235b
      case OP_NOTQUERY:
Packit ae235b
      case OP_NOTQUERYI:
Packit ae235b
      case OP_NOTSTAR:
Packit ae235b
      case OP_NOTSTARI:
Packit ae235b
      case OP_NOTUPTO:
Packit ae235b
      case OP_NOTUPTOI:
Packit ae235b
      case OP_NOT_HSPACE:
Packit ae235b
      case OP_NOT_VSPACE:
Packit ae235b
      case OP_NRREF:
Packit ae235b
      case OP_PROP:
Packit ae235b
      case OP_PRUNE:
Packit ae235b
      case OP_PRUNE_ARG:
Packit ae235b
      case OP_RECURSE:
Packit ae235b
      case OP_REF:
Packit ae235b
      case OP_REFI:
Packit ae235b
      case OP_REVERSE:
Packit ae235b
      case OP_RREF:
Packit ae235b
      case OP_SCOND:
Packit ae235b
      case OP_SET_SOM:
Packit ae235b
      case OP_SKIP:
Packit ae235b
      case OP_SKIP_ARG:
Packit ae235b
      case OP_SOD:
Packit ae235b
      case OP_SOM:
Packit ae235b
      case OP_THEN:
Packit ae235b
      case OP_THEN_ARG:
Packit ae235b
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
Packit ae235b
      case OP_XCLASS:
Packit ae235b
#endif
Packit ae235b
      return SSB_FAIL;
Packit ae235b
Packit ae235b
      /* We can ignore word boundary tests. */
Packit ae235b
Packit ae235b
      case OP_WORD_BOUNDARY:
Packit ae235b
      case OP_NOT_WORD_BOUNDARY:
Packit ae235b
      tcode++;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* If we hit a bracket or a positive lookahead assertion, recurse to set
Packit ae235b
      bits from within the subpattern. If it can't find anything, we have to
Packit ae235b
      give up. If it finds some mandatory character(s), we are done for this
Packit ae235b
      branch. Otherwise, carry on scanning after the subpattern. */
Packit ae235b
Packit ae235b
      case OP_BRA:
Packit ae235b
      case OP_SBRA:
Packit ae235b
      case OP_CBRA:
Packit ae235b
      case OP_SCBRA:
Packit ae235b
      case OP_BRAPOS:
Packit ae235b
      case OP_SBRAPOS:
Packit ae235b
      case OP_CBRAPOS:
Packit ae235b
      case OP_SCBRAPOS:
Packit ae235b
      case OP_ONCE:
Packit ae235b
      case OP_ONCE_NC:
Packit ae235b
      case OP_ASSERT:
Packit ae235b
      rc = set_start_bits(tcode, start_bits, utf, cd);
Packit ae235b
      if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
Packit ae235b
      if (rc == SSB_DONE) try_next = FALSE; else
Packit ae235b
        {
Packit ae235b
        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
Packit ae235b
        tcode += 1 + LINK_SIZE;
Packit ae235b
        }
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* If we hit ALT or KET, it means we haven't found anything mandatory in
Packit ae235b
      this branch, though we might have found something optional. For ALT, we
Packit ae235b
      continue with the next alternative, but we have to arrange that the final
Packit ae235b
      result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
Packit ae235b
      return SSB_CONTINUE: if this is the top level, that indicates failure,
Packit ae235b
      but after a nested subpattern, it causes scanning to continue. */
Packit ae235b
Packit ae235b
      case OP_ALT:
Packit ae235b
      yield = SSB_CONTINUE;
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_KET:
Packit ae235b
      case OP_KETRMAX:
Packit ae235b
      case OP_KETRMIN:
Packit ae235b
      case OP_KETRPOS:
Packit ae235b
      return SSB_CONTINUE;
Packit ae235b
Packit ae235b
      /* Skip over callout */
Packit ae235b
Packit ae235b
      case OP_CALLOUT:
Packit ae235b
      tcode += 2 + 2*LINK_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Skip over lookbehind and negative lookahead assertions */
Packit ae235b
Packit ae235b
      case OP_ASSERT_NOT:
Packit ae235b
      case OP_ASSERTBACK:
Packit ae235b
      case OP_ASSERTBACK_NOT:
Packit ae235b
      do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
Packit ae235b
      tcode += 1 + LINK_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* BRAZERO does the bracket, but carries on. */
Packit ae235b
Packit ae235b
      case OP_BRAZERO:
Packit ae235b
      case OP_BRAMINZERO:
Packit ae235b
      case OP_BRAPOSZERO:
Packit ae235b
      rc = set_start_bits(++tcode, start_bits, utf, cd);
Packit ae235b
      if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
Packit ae235b
/* =========================================================================
Packit ae235b
      See the comment at the head of this function concerning the next line,
Packit ae235b
      which was an old fudge for the benefit of OS/2.
Packit ae235b
      dummy = 1;
Packit ae235b
  ========================================================================= */
Packit ae235b
      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
Packit ae235b
      tcode += 1 + LINK_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* SKIPZERO skips the bracket. */
Packit ae235b
Packit ae235b
      case OP_SKIPZERO:
Packit ae235b
      tcode++;
Packit ae235b
      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
Packit ae235b
      tcode += 1 + LINK_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Single-char * or ? sets the bit and tries the next item */
Packit ae235b
Packit ae235b
      case OP_STAR:
Packit ae235b
      case OP_MINSTAR:
Packit ae235b
      case OP_POSSTAR:
Packit ae235b
      case OP_QUERY:
Packit ae235b
      case OP_MINQUERY:
Packit ae235b
      case OP_POSQUERY:
Packit ae235b
      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_STARI:
Packit ae235b
      case OP_MINSTARI:
Packit ae235b
      case OP_POSSTARI:
Packit ae235b
      case OP_QUERYI:
Packit ae235b
      case OP_MINQUERYI:
Packit ae235b
      case OP_POSQUERYI:
Packit ae235b
      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Single-char upto sets the bit and tries the next */
Packit ae235b
Packit ae235b
      case OP_UPTO:
Packit ae235b
      case OP_MINUPTO:
Packit ae235b
      case OP_POSUPTO:
Packit ae235b
      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_UPTOI:
Packit ae235b
      case OP_MINUPTOI:
Packit ae235b
      case OP_POSUPTOI:
Packit ae235b
      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* At least one single char sets the bit and stops */
Packit ae235b
Packit ae235b
      case OP_EXACT:
Packit ae235b
      tcode += IMM2_SIZE;
Packit ae235b
      /* Fall through */
Packit ae235b
      case OP_CHAR:
Packit ae235b
      case OP_PLUS:
Packit ae235b
      case OP_MINPLUS:
Packit ae235b
      case OP_POSPLUS:
Packit ae235b
      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_EXACTI:
Packit ae235b
      tcode += IMM2_SIZE;
Packit ae235b
      /* Fall through */
Packit ae235b
      case OP_CHARI:
Packit ae235b
      case OP_PLUSI:
Packit ae235b
      case OP_MINPLUSI:
Packit ae235b
      case OP_POSPLUSI:
Packit ae235b
      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Special spacing and line-terminating items. These recognize specific
Packit ae235b
      lists of characters. The difference between VSPACE and ANYNL is that the
Packit ae235b
      latter can match the two-character CRLF sequence, but that is not
Packit ae235b
      relevant for finding the first character, so their code here is
Packit ae235b
      identical. */
Packit ae235b
Packit ae235b
      case OP_HSPACE:
Packit ae235b
      SET_BIT(0x09);
Packit ae235b
      SET_BIT(0x20);
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
      if (utf)
Packit ae235b
        {
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
        SET_BIT(0xC2);  /* For U+00A0 */
Packit ae235b
        SET_BIT(0xE1);  /* For U+1680, U+180E */
Packit ae235b
        SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
Packit ae235b
        SET_BIT(0xE3);  /* For U+3000 */
Packit ae235b
#endif
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
        SET_BIT(0xA0);
Packit ae235b
        SET_BIT(0xFF);  /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
        }
Packit ae235b
      else
Packit ae235b
#endif /* SUPPORT_UTF */
Packit ae235b
        {
Packit ae235b
        SET_BIT(0xA0);
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
        SET_BIT(0xFF);  /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
        }
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_ANYNL:
Packit ae235b
      case OP_VSPACE:
Packit ae235b
      SET_BIT(0x0A);
Packit ae235b
      SET_BIT(0x0B);
Packit ae235b
      SET_BIT(0x0C);
Packit ae235b
      SET_BIT(0x0D);
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
      if (utf)
Packit ae235b
        {
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
        SET_BIT(0xC2);  /* For U+0085 */
Packit ae235b
        SET_BIT(0xE2);  /* For U+2028, U+2029 */
Packit ae235b
#endif
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
        SET_BIT(0x85);
Packit ae235b
        SET_BIT(0xFF);  /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
        }
Packit ae235b
      else
Packit ae235b
#endif /* SUPPORT_UTF */
Packit ae235b
        {
Packit ae235b
        SET_BIT(0x85);
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
        SET_BIT(0xFF);  /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
        }
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Single character types set the bits and stop. Note that if PCRE_UCP
Packit ae235b
      is set, we do not see these op codes because \d etc are converted to
Packit ae235b
      properties. Therefore, these apply in the case when only characters less
Packit ae235b
      than 256 are recognized to match the types. */
Packit ae235b
Packit ae235b
      case OP_NOT_DIGIT:
Packit ae235b
      set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_DIGIT:
Packit ae235b
      set_type_bits(start_bits, cbit_digit, table_limit, cd);
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* The cbit_space table has vertical tab as whitespace; we have to
Packit ae235b
      ensure it is set as not whitespace. */
Packit ae235b
Packit ae235b
      case OP_NOT_WHITESPACE:
Packit ae235b
      set_nottype_bits(start_bits, cbit_space, table_limit, cd);
Packit ae235b
      start_bits[1] |= 0x08;
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* The cbit_space table has vertical tab as whitespace; we have to
Packit ae235b
      not set it from the table. */
Packit ae235b
Packit ae235b
      case OP_WHITESPACE:
Packit ae235b
      c = start_bits[1];    /* Save in case it was already set */
Packit ae235b
      set_type_bits(start_bits, cbit_space, table_limit, cd);
Packit ae235b
      start_bits[1] = (start_bits[1] & ~0x08) | c;
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_NOT_WORDCHAR:
Packit ae235b
      set_nottype_bits(start_bits, cbit_word, table_limit, cd);
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_WORDCHAR:
Packit ae235b
      set_type_bits(start_bits, cbit_word, table_limit, cd);
Packit ae235b
      try_next = FALSE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* One or more character type fudges the pointer and restarts, knowing
Packit ae235b
      it will hit a single character type and stop there. */
Packit ae235b
Packit ae235b
      case OP_TYPEPLUS:
Packit ae235b
      case OP_TYPEMINPLUS:
Packit ae235b
      case OP_TYPEPOSPLUS:
Packit ae235b
      tcode++;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      case OP_TYPEEXACT:
Packit ae235b
      tcode += 1 + IMM2_SIZE;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Zero or more repeats of character types set the bits and then
Packit ae235b
      try again. */
Packit ae235b
Packit ae235b
      case OP_TYPEUPTO:
Packit ae235b
      case OP_TYPEMINUPTO:
Packit ae235b
      case OP_TYPEPOSUPTO:
Packit ae235b
      tcode += IMM2_SIZE;  /* Fall through */
Packit ae235b
Packit ae235b
      case OP_TYPESTAR:
Packit ae235b
      case OP_TYPEMINSTAR:
Packit ae235b
      case OP_TYPEPOSSTAR:
Packit ae235b
      case OP_TYPEQUERY:
Packit ae235b
      case OP_TYPEMINQUERY:
Packit ae235b
      case OP_TYPEPOSQUERY:
Packit ae235b
      switch(tcode[1])
Packit ae235b
        {
Packit ae235b
        default:
Packit ae235b
        case OP_ANY:
Packit ae235b
        case OP_ALLANY:
Packit ae235b
        return SSB_FAIL;
Packit ae235b
Packit ae235b
        case OP_HSPACE:
Packit ae235b
        SET_BIT(0x09);
Packit ae235b
        SET_BIT(0x20);
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
        if (utf)
Packit ae235b
          {
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
          SET_BIT(0xC2);  /* For U+00A0 */
Packit ae235b
          SET_BIT(0xE1);  /* For U+1680, U+180E */
Packit ae235b
          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
Packit ae235b
          SET_BIT(0xE3);  /* For U+3000 */
Packit ae235b
#endif
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
          SET_BIT(0xA0);
Packit ae235b
          SET_BIT(0xFF);  /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
          }
Packit ae235b
        else
Packit ae235b
#endif /* SUPPORT_UTF */
Packit ae235b
          SET_BIT(0xA0);
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        case OP_ANYNL:
Packit ae235b
        case OP_VSPACE:
Packit ae235b
        SET_BIT(0x0A);
Packit ae235b
        SET_BIT(0x0B);
Packit ae235b
        SET_BIT(0x0C);
Packit ae235b
        SET_BIT(0x0D);
Packit ae235b
#ifdef SUPPORT_UTF
Packit ae235b
        if (utf)
Packit ae235b
          {
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
          SET_BIT(0xC2);  /* For U+0085 */
Packit ae235b
          SET_BIT(0xE2);  /* For U+2028, U+2029 */
Packit ae235b
#endif
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
          SET_BIT(0x85);
Packit ae235b
          SET_BIT(0xFF);  /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
          }
Packit ae235b
        else
Packit ae235b
#endif /* SUPPORT_UTF */
Packit ae235b
          SET_BIT(0x85);
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        case OP_NOT_DIGIT:
Packit ae235b
        set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        case OP_DIGIT:
Packit ae235b
        set_type_bits(start_bits, cbit_digit, table_limit, cd);
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        /* The cbit_space table has vertical tab as whitespace; we have to
Packit ae235b
        ensure it gets set as not whitespace. */
Packit ae235b
Packit ae235b
        case OP_NOT_WHITESPACE:
Packit ae235b
        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
Packit ae235b
        start_bits[1] |= 0x08;
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        /* The cbit_space table has vertical tab as whitespace; we have to
Packit ae235b
        avoid setting it. */
Packit ae235b
Packit ae235b
        case OP_WHITESPACE:
Packit ae235b
        c = start_bits[1];    /* Save in case it was already set */
Packit ae235b
        set_type_bits(start_bits, cbit_space, table_limit, cd);
Packit ae235b
        start_bits[1] = (start_bits[1] & ~0x08) | c;
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        case OP_NOT_WORDCHAR:
Packit ae235b
        set_nottype_bits(start_bits, cbit_word, table_limit, cd);
Packit ae235b
        break;
Packit ae235b
Packit ae235b
        case OP_WORDCHAR:
Packit ae235b
        set_type_bits(start_bits, cbit_word, table_limit, cd);
Packit ae235b
        break;
Packit ae235b
        }
Packit ae235b
Packit ae235b
      tcode += 2;
Packit ae235b
      break;
Packit ae235b
Packit ae235b
      /* Character class where all the information is in a bit map: set the
Packit ae235b
      bits and either carry on or not, according to the repeat count. If it was
Packit ae235b
      a negative class, and we are operating with UTF-8 characters, any byte
Packit ae235b
      with a value >= 0xc4 is a potentially valid starter because it starts a
Packit ae235b
      character with a value > 255. */
Packit ae235b
Packit ae235b
      case OP_NCLASS:
Packit ae235b
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
Packit ae235b
      if (utf)
Packit ae235b
        {
Packit ae235b
        start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
Packit ae235b
        memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
Packit ae235b
        }
Packit ae235b
#endif
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
      SET_BIT(0xFF);                         /* For characters > 255 */
Packit ae235b
#endif
Packit ae235b
      /* Fall through */
Packit ae235b
Packit ae235b
      case OP_CLASS:
Packit ae235b
        {
Packit ae235b
        pcre_uint8 *map;
Packit ae235b
        tcode++;
Packit ae235b
        map = (pcre_uint8 *)tcode;
Packit ae235b
Packit ae235b
        /* In UTF-8 mode, the bits in a bit map correspond to character
Packit ae235b
        values, not to byte values. However, the bit map we are constructing is
Packit ae235b
        for byte values. So we have to do a conversion for characters whose
Packit ae235b
        value is > 127. In fact, there are only two possible starting bytes for
Packit ae235b
        characters in the range 128 - 255. */
Packit ae235b
Packit ae235b
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
Packit ae235b
        if (utf)
Packit ae235b
          {
Packit ae235b
          for (c = 0; c < 16; c++) start_bits[c] |= map[c];
Packit ae235b
          for (c = 128; c < 256; c++)
Packit ae235b
            {
Packit ae235b
            if ((map[c/8] && (1 << (c&7))) != 0)
Packit ae235b
              {
Packit ae235b
              int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
Packit ae235b
              start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
Packit ae235b
              c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
Packit ae235b
              }
Packit ae235b
            }
Packit ae235b
          }
Packit ae235b
        else
Packit ae235b
#endif
Packit ae235b
          {
Packit ae235b
          /* In non-UTF-8 mode, the two bit maps are completely compatible. */
Packit ae235b
          for (c = 0; c < 32; c++) start_bits[c] |= map[c];
Packit ae235b
          }
Packit ae235b
Packit ae235b
        /* Advance past the bit map, and act on what follows. For a zero
Packit ae235b
        minimum repeat, continue; otherwise stop processing. */
Packit ae235b
Packit ae235b
        tcode += 32 / sizeof(pcre_uchar);
Packit ae235b
        switch (*tcode)
Packit ae235b
          {
Packit ae235b
          case OP_CRSTAR:
Packit ae235b
          case OP_CRMINSTAR:
Packit ae235b
          case OP_CRQUERY:
Packit ae235b
          case OP_CRMINQUERY:
Packit ae235b
          tcode++;
Packit ae235b
          break;
Packit ae235b
Packit ae235b
          case OP_CRRANGE:
Packit ae235b
          case OP_CRMINRANGE:
Packit ae235b
          if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
Packit ae235b
            else try_next = FALSE;
Packit ae235b
          break;
Packit ae235b
Packit ae235b
          default:
Packit ae235b
          try_next = FALSE;
Packit ae235b
          break;
Packit ae235b
          }
Packit ae235b
        }
Packit ae235b
      break; /* End of bitmap class handling */
Packit ae235b
Packit ae235b
      }      /* End of switch */
Packit ae235b
    }        /* End of try_next loop */
Packit ae235b
Packit ae235b
  code += GET(code, 1);   /* Advance to next branch */
Packit ae235b
  }
Packit ae235b
while (*code == OP_ALT);
Packit ae235b
return yield;
Packit ae235b
}
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*          Study a compiled expression           *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* This function is handed a compiled expression that it must study to produce
Packit ae235b
information that will speed up the matching. It returns a pcre[16]_extra block
Packit ae235b
which then gets handed back to pcre_exec().
Packit ae235b
Packit ae235b
Arguments:
Packit ae235b
  re        points to the compiled expression
Packit ae235b
  options   contains option bits
Packit ae235b
  errorptr  points to where to place error messages;
Packit ae235b
            set NULL unless error
Packit ae235b
Packit ae235b
Returns:    pointer to a pcre[16]_extra block, with study_data filled in and
Packit ae235b
              the appropriate flags set;
Packit ae235b
            NULL on error or if no optimization possible
Packit ae235b
*/
Packit ae235b
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
Packit ae235b
pcre_study(const pcre *external_re, int options, const char **errorptr)
Packit ae235b
#else
Packit ae235b
PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
Packit ae235b
pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
Packit ae235b
#endif
Packit ae235b
{
Packit ae235b
int min;
Packit ae235b
BOOL bits_set = FALSE;
Packit ae235b
pcre_uint8 start_bits[32];
Packit ae235b
PUBL(extra) *extra = NULL;
Packit ae235b
pcre_study_data *study;
Packit ae235b
const pcre_uint8 *tables;
Packit ae235b
pcre_uchar *code;
Packit ae235b
compile_data compile_block;
Packit ae235b
const REAL_PCRE *re = (const REAL_PCRE *)external_re;
Packit ae235b
Packit ae235b
*errorptr = NULL;
Packit ae235b
Packit ae235b
if (re == NULL || re->magic_number != MAGIC_NUMBER)
Packit ae235b
  {
Packit ae235b
  *errorptr = "argument is not a compiled regular expression";
Packit ae235b
  return NULL;
Packit ae235b
  }
Packit ae235b
Packit ae235b
if ((re->flags & PCRE_MODE) == 0)
Packit ae235b
  {
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
  *errorptr = "argument is compiled in 16 bit mode";
Packit ae235b
#else
Packit ae235b
  *errorptr = "argument is compiled in 8 bit mode";
Packit ae235b
#endif
Packit ae235b
  return NULL;
Packit ae235b
  }
Packit ae235b
Packit ae235b
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
Packit ae235b
  {
Packit ae235b
  *errorptr = "unknown or incorrect option bit(s) set";
Packit ae235b
  return NULL;
Packit ae235b
  }
Packit ae235b
Packit ae235b
code = (pcre_uchar *)re + re->name_table_offset +
Packit ae235b
  (re->name_count * re->name_entry_size);
Packit ae235b
Packit ae235b
/* For an anchored pattern, or an unanchored pattern that has a first char, or
Packit ae235b
a multiline pattern that matches only at "line starts", there is no point in
Packit ae235b
seeking a list of starting bytes. */
Packit ae235b
Packit ae235b
if ((re->options & PCRE_ANCHORED) == 0 &&
Packit ae235b
    (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
Packit ae235b
  {
Packit ae235b
  int rc;
Packit ae235b
Packit ae235b
  /* Set the character tables in the block that is passed around */
Packit ae235b
Packit ae235b
  tables = re->tables;
Packit ae235b
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
  if (tables == NULL)
Packit ae235b
    (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
Packit ae235b
    (void *)(&tables));
Packit ae235b
#else
Packit ae235b
  if (tables == NULL)
Packit ae235b
    (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
Packit ae235b
    (void *)(&tables));
Packit ae235b
#endif
Packit ae235b
Packit ae235b
  compile_block.lcc = tables + lcc_offset;
Packit ae235b
  compile_block.fcc = tables + fcc_offset;
Packit ae235b
  compile_block.cbits = tables + cbits_offset;
Packit ae235b
  compile_block.ctypes = tables + ctypes_offset;
Packit ae235b
Packit ae235b
  /* See if we can find a fixed set of initial characters for the pattern. */
Packit ae235b
Packit ae235b
  memset(start_bits, 0, 32 * sizeof(pcre_uint8));
Packit ae235b
  rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
Packit ae235b
    &compile_block);
Packit ae235b
  bits_set = rc == SSB_DONE;
Packit ae235b
  if (rc == SSB_UNKNOWN)
Packit ae235b
    {
Packit ae235b
    *errorptr = "internal error: opcode not recognized";
Packit ae235b
    return NULL;
Packit ae235b
    }
Packit ae235b
  }
Packit ae235b
Packit ae235b
/* Find the minimum length of subject string. */
Packit ae235b
Packit ae235b
switch(min = find_minlength(code, code, re->options, 0))
Packit ae235b
  {
Packit ae235b
  case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
Packit ae235b
  case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
Packit ae235b
  default: break;
Packit ae235b
  }
Packit ae235b
Packit ae235b
/* If a set of starting bytes has been identified, or if the minimum length is
Packit ae235b
greater than zero, or if JIT optimization has been requested, get a
Packit ae235b
pcre[16]_extra block and a pcre_study_data block. The study data is put in the
Packit ae235b
latter, which is pointed to by the former, which may also get additional data
Packit ae235b
set later by the calling program. At the moment, the size of pcre_study_data
Packit ae235b
is fixed. We nevertheless save it in a field for returning via the
Packit ae235b
pcre_fullinfo() function so that if it becomes variable in the future,
Packit ae235b
we don't have to change that code. */
Packit ae235b
Packit ae235b
if (bits_set || min > 0
Packit ae235b
#ifdef SUPPORT_JIT
Packit ae235b
    || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
Packit ae235b
                 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0
Packit ae235b
#endif
Packit ae235b
  )
Packit ae235b
  {
Packit ae235b
  extra = (PUBL(extra) *)(PUBL(malloc))
Packit ae235b
    (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
Packit ae235b
  if (extra == NULL)
Packit ae235b
    {
Packit ae235b
    *errorptr = "failed to get memory";
Packit ae235b
    return NULL;
Packit ae235b
    }
Packit ae235b
Packit ae235b
  study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
Packit ae235b
  extra->flags = PCRE_EXTRA_STUDY_DATA;
Packit ae235b
  extra->study_data = study;
Packit ae235b
Packit ae235b
  study->size = sizeof(pcre_study_data);
Packit ae235b
  study->flags = 0;
Packit ae235b
Packit ae235b
  /* Set the start bits always, to avoid unset memory errors if the
Packit ae235b
  study data is written to a file, but set the flag only if any of the bits
Packit ae235b
  are set, to save time looking when none are. */
Packit ae235b
Packit ae235b
  if (bits_set)
Packit ae235b
    {
Packit ae235b
    study->flags |= PCRE_STUDY_MAPPED;
Packit ae235b
    memcpy(study->start_bits, start_bits, sizeof(start_bits));
Packit ae235b
    }
Packit ae235b
  else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
Packit ae235b
Packit ae235b
#ifdef PCRE_DEBUG
Packit ae235b
  if (bits_set)
Packit ae235b
    {
Packit ae235b
    pcre_uint8 *ptr = start_bits;
Packit ae235b
    int i;
Packit ae235b
Packit ae235b
    printf("Start bits:\n");
Packit ae235b
    for (i = 0; i < 32; i++)
Packit ae235b
      printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
Packit ae235b
    }
Packit ae235b
#endif
Packit ae235b
Packit ae235b
  /* Always set the minlength value in the block, because the JIT compiler
Packit ae235b
  makes use of it. However, don't set the bit unless the length is greater than
Packit ae235b
  zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
Packit ae235b
  checking the zero case. */
Packit ae235b
Packit ae235b
  if (min > 0)
Packit ae235b
    {
Packit ae235b
    study->flags |= PCRE_STUDY_MINLEN;
Packit ae235b
    study->minlength = min;
Packit ae235b
    }
Packit ae235b
  else study->minlength = 0;
Packit ae235b
Packit ae235b
  /* If JIT support was compiled and requested, attempt the JIT compilation.
Packit ae235b
  If no starting bytes were found, and the minimum length is zero, and JIT
Packit ae235b
  compilation fails, abandon the extra block and return NULL. */
Packit ae235b
Packit ae235b
#ifdef SUPPORT_JIT
Packit ae235b
  extra->executable_jit = NULL;
Packit ae235b
  if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
Packit ae235b
    PRIV(jit_compile)(re, extra, JIT_COMPILE);
Packit ae235b
  if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
Packit ae235b
    PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
Packit ae235b
  if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
Packit ae235b
    PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
Packit ae235b
Packit ae235b
  if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
Packit ae235b
    {
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
    pcre_free_study(extra);
Packit ae235b
#endif
Packit ae235b
#ifdef COMPILE_PCRE16
Packit ae235b
    pcre16_free_study(extra);
Packit ae235b
#endif
Packit ae235b
    extra = NULL;
Packit ae235b
    }
Packit ae235b
#endif
Packit ae235b
  }
Packit ae235b
Packit ae235b
return extra;
Packit ae235b
}
Packit ae235b
Packit ae235b
Packit ae235b
/*************************************************
Packit ae235b
*          Free the study data                   *
Packit ae235b
*************************************************/
Packit ae235b
Packit ae235b
/* This function frees the memory that was obtained by pcre_study().
Packit ae235b
Packit ae235b
Argument:   a pointer to the pcre[16]_extra block
Packit ae235b
Returns:    nothing
Packit ae235b
*/
Packit ae235b
Packit ae235b
#ifdef COMPILE_PCRE8
Packit ae235b
PCRE_EXP_DEFN void
Packit ae235b
pcre_free_study(pcre_extra *extra)
Packit ae235b
#else
Packit ae235b
PCRE_EXP_DEFN void
Packit ae235b
pcre16_free_study(pcre16_extra *extra)
Packit ae235b
#endif
Packit ae235b
{
Packit ae235b
if (extra == NULL)
Packit ae235b
  return;
Packit ae235b
#ifdef SUPPORT_JIT
Packit ae235b
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
Packit ae235b
     extra->executable_jit != NULL)
Packit ae235b
  PRIV(jit_free)(extra->executable_jit);
Packit ae235b
#endif
Packit ae235b
PUBL(free)(extra);
Packit ae235b
}
Packit ae235b
Packit ae235b
/* End of pcre_study.c */