Blame retoglob.c

Packit a69f91
/*
Packit a69f91
 * re2glob - C implementation
Packit a69f91
 * (c) 2007 ActiveState Software Inc.
Packit a69f91
 */
Packit a69f91
Packit a69f91
#include <tcl.h>
Packit a69f91
Packit a69f91
#define DEBUG 0
Packit a69f91
Packit a69f91
static void
Packit a69f91
ExpChopNested _ANSI_ARGS_ ((Tcl_UniChar** xstr,
Packit a69f91
			    int*          xstrlen,
Packit a69f91
			    Tcl_UniChar   open,
Packit a69f91
			    Tcl_UniChar   close));
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpLiteral _ANSI_ARGS_ ((Tcl_UniChar* nexto,
Packit a69f91
			 Tcl_UniChar* str,
Packit a69f91
			 int          strlen));
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpCollapseStar _ANSI_ARGS_ ((Tcl_UniChar* src,
Packit a69f91
			      Tcl_UniChar* last));
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpCollapseQForward _ANSI_ARGS_ ((Tcl_UniChar* src,
Packit a69f91
				  Tcl_UniChar* last));
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpCollapseQBack _ANSI_ARGS_ ((Tcl_UniChar* src,
Packit a69f91
			       Tcl_UniChar* last));
Packit a69f91
Packit a69f91
static Tcl_UniChar
Packit a69f91
ExpBackslash _ANSI_ARGS_ ((char prefix,
Packit a69f91
			 Tcl_UniChar* str,
Packit a69f91
			 int          strlen));
Packit a69f91
Packit a69f91
static int
Packit a69f91
ExpCountStar _ANSI_ARGS_ ((Tcl_UniChar* src, Tcl_UniChar* last));
Packit a69f91
Packit a69f91
Packit a69f91
static char*
Packit a69f91
xxx (Tcl_UniChar* x, int xl)
Packit a69f91
{
Packit a69f91
  static Tcl_DString ds;
Packit a69f91
  Tcl_DStringInit (&ds);
Packit a69f91
  return Tcl_UniCharToUtfDString (x,xl,&ds);
Packit a69f91
}
Packit a69f91
Packit a69f91
Packit a69f91
Tcl_Obj*
Packit a69f91
exp_retoglob (
Packit a69f91
    Tcl_UniChar* str,
Packit a69f91
    int          strlen)
Packit a69f91
{
Packit a69f91
  /*
Packit a69f91
   * Output: x2 size of input (literal where every character has to be
Packit a69f91
   * quoted.
Packit a69f91
   * Location: For next translated unit, in output.
Packit a69f91
   * Size of last generated unit, in characters.
Packit a69f91
   * Stack of output locations at opening parens. x1 size of input.
Packit a69f91
   * Location for next location on stack.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  static Tcl_UniChar litprefix [] = {'*','*','*','='};
Packit a69f91
  static Tcl_UniChar areprefix [] = {'*','*','*',':'};
Packit a69f91
  static Tcl_UniChar areopts   [] = {'(','?'};
Packit a69f91
  static Tcl_UniChar nocapture [] = {'?',':'};
Packit a69f91
  static Tcl_UniChar lookhas   [] = {'?','='};
Packit a69f91
  static Tcl_UniChar looknot   [] = {'?','!'};
Packit a69f91
  static Tcl_UniChar xcomment  [] = {'?','#'};
Packit a69f91
Packit a69f91
  static Tcl_UniChar classa  [] = {'[','.'};
Packit a69f91
  static Tcl_UniChar classb  [] = {'[','='};
Packit a69f91
  static Tcl_UniChar classc  [] = {'[',':'};
Packit a69f91
Packit a69f91
Packit a69f91
  int lastsz, expanded;
Packit a69f91
  Tcl_UniChar*  out;
Packit a69f91
  Tcl_UniChar*  nexto;
Packit a69f91
  Tcl_UniChar** paren;
Packit a69f91
  Tcl_UniChar** nextp;
Packit a69f91
  Tcl_Obj*     glob = NULL;
Packit a69f91
  Tcl_UniChar* mark;
Packit a69f91
  Tcl_UniChar  ch;
Packit a69f91
Packit a69f91
  /*
Packit a69f91
   * Set things up.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  out    = nexto = (Tcl_UniChar*)  Tcl_Alloc (strlen*2*sizeof (Tcl_UniChar));
Packit a69f91
  paren  = nextp = (Tcl_UniChar**) Tcl_Alloc (strlen*  sizeof (Tcl_UniChar*));
Packit a69f91
  lastsz = -1;
Packit a69f91
  expanded = 0;
Packit a69f91
Packit a69f91
  /*
Packit a69f91
   * Start processing ...
Packit a69f91
   */
Packit a69f91
Packit a69f91
#define CHOP(n)  {str += (n); strlen -= (n);}
Packit a69f91
#define CHOPC(c) {while (*str != (c) && strlen) CHOP(1) ;}
Packit a69f91
#define EMIT(c)  {lastsz = 1; *nexto++ = (c);}
Packit a69f91
#define EMITX(c) {lastsz++;   *nexto++ = (c);}
Packit a69f91
#define MATCH(lit) ((strlen >= (sizeof (lit)/sizeof (Tcl_UniChar))) && (0 == Tcl_UniCharNcmp (str,(lit),sizeof(lit)/sizeof (Tcl_UniChar))))
Packit a69f91
#define MATCHC(c) (strlen && (*str == (c)))
Packit a69f91
#define PUSHPAREN {*nextp++ = nexto;}
Packit a69f91
#define UNEMIT {nexto -= lastsz; lastsz = -1;}
Packit a69f91
  /* Tcl_UniCharIsDigit ? */
Packit a69f91
#define MATCH_DIGIT (MATCHC ('0') || MATCHC ('1') || \
Packit a69f91
	  MATCHC ('2') || MATCHC ('3') || \
Packit a69f91
	  MATCHC ('4') || MATCHC ('5') || \
Packit a69f91
	  MATCHC ('6') || MATCHC ('7') || \
Packit a69f91
	  MATCHC ('8') || MATCHC ('9'))
Packit a69f91
#define MATCH_HEXDIGIT (MATCH_DIGIT || \
Packit a69f91
		       MATCHC ('a') || MATCHC ('A') || \
Packit a69f91
		       MATCHC ('b') || MATCHC ('B') || \
Packit a69f91
		       MATCHC ('c') || MATCHC ('C') || \
Packit a69f91
		       MATCHC ('d') || MATCHC ('D') || \
Packit a69f91
		       MATCHC ('e') || MATCHC ('E') || \
Packit a69f91
		       MATCHC ('f') || MATCHC ('F'))
Packit a69f91
#define EMITC(c) {if (((c) == '\\') || \
Packit a69f91
		      ((c) == '*') || \
Packit a69f91
		      ((c) == '?') || \
Packit a69f91
		      ((c) == '$') || \
Packit a69f91
		      ((c) == '^') || \
Packit a69f91
		      ((c) == '[')) { \
Packit a69f91
			EMIT ('\\'); EMITX ((c)); \
Packit a69f91
		      } else { \
Packit a69f91
			EMIT ((c));}}
Packit a69f91
#define MATCH_AREOPTS(c) (c == 'b' || c == 'c' || \
Packit a69f91
          c == 'e' || c == 'i' || c == 'm' || c == 'n' || \
Packit a69f91
          c == 'p' || c == 'q' || c == 's' || c == 't' || \
Packit a69f91
          c == 'w' || c == 'x')
Packit a69f91
Packit a69f91
#if DEBUG
Packit a69f91
#define LOG if (1) fprintf
Packit a69f91
#define FF fflush (stderr)
Packit a69f91
#define MARK(s) LOG (stderr,#s "\n"); FF;
Packit a69f91
#else
Packit a69f91
#define LOG if (0) fprintf
Packit a69f91
#define FF 
Packit a69f91
#define MARK(s) 
Packit a69f91
#endif
Packit a69f91
Packit a69f91
  /* ***= -> literal string follows */
Packit a69f91
Packit a69f91
  LOG (stderr,"RE-2-GLOB '%s'\n", xxx(str,strlen)); FF;
Packit a69f91
Packit a69f91
  if (MATCH (litprefix)) {
Packit a69f91
    CHOP (4);
Packit a69f91
    nexto = ExpLiteral (nexto, str, strlen);
Packit a69f91
    goto done;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  /* ***: -> RE is ARE. Always for Expect. Therefore ignore */
Packit a69f91
Packit a69f91
  if (MATCH (areprefix)) {
Packit a69f91
    CHOP (4);
Packit a69f91
    LOG (stderr,"ARE '%s'\n", xxx(str,strlen)); FF;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  /* (?xyz) ARE options, in {bceimnpqstwx}. Not validating that the
Packit a69f91
   * options are legal. We assume that the RE is valid.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  if (MATCH (areopts)) { /* "(?" */
Packit a69f91
    Tcl_UniChar* save = str;
Packit a69f91
    Tcl_UniChar* stop;
Packit a69f91
    int stoplen;
Packit a69f91
    int save_strlen = strlen;
Packit a69f91
    int all_ARE_opts = 1;
Packit a69f91
Packit a69f91
    /* First, ensure that this is actually an ARE opts string.
Packit a69f91
     * It could be something else (e.g., a non-capturing block).
Packit a69f91
     */
Packit a69f91
    CHOP (2);
Packit a69f91
    mark = str; CHOPC (')');
Packit a69f91
    stop = str;       /* Remember closing parens location, allows */
Packit a69f91
    stoplen = strlen; /* us to avoid a second CHOPC run later */
Packit a69f91
Packit a69f91
    while (mark < str) {
Packit a69f91
      if (MATCH_AREOPTS(*mark)) {
Packit a69f91
        mark++;
Packit a69f91
      } else {
Packit a69f91
        all_ARE_opts = 0;
Packit a69f91
        break;
Packit a69f91
      }
Packit a69f91
    }
Packit a69f91
Packit a69f91
    /* Reset back to our entry point. */
Packit a69f91
    str    = save;
Packit a69f91
    strlen = save_strlen;
Packit a69f91
Packit a69f91
    if (all_ARE_opts) {
Packit a69f91
      /* Now actually perform the ARE option processing */
Packit a69f91
      LOG (stderr, "%s\n", "Processing AREOPTS"); FF;
Packit a69f91
Packit a69f91
      CHOP (2);
Packit a69f91
      mark = str;
Packit a69f91
      /* Equivalent to CHOPC (')') */
Packit a69f91
      str    = stop; 
Packit a69f91
      strlen = stoplen;
Packit a69f91
Packit a69f91
      while (mark < str) {
Packit a69f91
        if (*mark == 'q') {
Packit a69f91
          CHOP (1);
Packit a69f91
          nexto = ExpLiteral (nexto, str, strlen);
Packit a69f91
          goto done;
Packit a69f91
        } else if (*mark == 'x') {
Packit a69f91
          expanded = 1;
Packit a69f91
          LOG (stderr,"EXPANDED\n"); FF;
Packit a69f91
        }
Packit a69f91
        mark++;
Packit a69f91
      }
Packit a69f91
      CHOP (1);
Packit a69f91
    }
Packit a69f91
  }
Packit a69f91
Packit a69f91
  while (strlen) {
Packit a69f91
Packit a69f91
    LOG (stderr,"'%s' <-- ",xxx(out,nexto-out)); FF;
Packit a69f91
    LOG (stderr,"'%s'\n",   xxx(str,strlen));    FF;
Packit a69f91
Packit a69f91
    if (expanded) {
Packit a69f91
      /* Expanded syntax, whitespace and comments, ignore. */
Packit a69f91
      while (MATCHC (' ')  ||
Packit a69f91
	     MATCHC (0x9) ||
Packit a69f91
	     MATCHC (0xa)) CHOP (1);
Packit a69f91
      if (MATCHC ('#')) {
Packit a69f91
	CHOPC (0xa);
Packit a69f91
	if (strlen) CHOP (1);
Packit a69f91
	continue;
Packit a69f91
      }
Packit a69f91
    }
Packit a69f91
Packit a69f91
    if (MATCHC ('|')) {
Packit a69f91
      /* branching is too complex */
Packit a69f91
      goto error;
Packit a69f91
    } else if (MATCHC ('(')) {
Packit a69f91
      /* open parens */
Packit a69f91
      CHOP (1);
Packit a69f91
      if (MATCH (nocapture)) { /* "?:" */
Packit a69f91
	/* non capturing -save location */
Packit a69f91
	PUSHPAREN;
Packit a69f91
	CHOP (2);
Packit a69f91
      } else if (MATCH (lookhas) || /* "?=" */
Packit a69f91
		 MATCH (looknot)) { /* "?!" */
Packit a69f91
	/* lookahead - ignore */
Packit a69f91
	CHOP (2);
Packit a69f91
	ExpChopNested (&str, &strlen, '(', ')');
Packit a69f91
      } else if (MATCH (xcomment)) { /* "?#" */
Packit a69f91
	/* comment - ignore */
Packit a69f91
	CHOPC (')'); CHOP (1);
Packit a69f91
      } else {
Packit a69f91
	/* plain capturing */
Packit a69f91
	PUSHPAREN;
Packit a69f91
      }
Packit a69f91
    } else if (MATCHC (')')) {
Packit a69f91
      /* Closing parens. */
Packit a69f91
      CHOP (1);
Packit a69f91
      /* Everything coming after the saved result is new, and
Packit a69f91
       * collapsed into a single entry for a possible coming operator
Packit a69f91
       * to handle.
Packit a69f91
       */
Packit a69f91
      nextp --; /* Back to last save */
Packit a69f91
      mark   = *nextp; /* Location where generation for this parens started */
Packit a69f91
      lastsz = (nexto - mark); /* This many chars generated */
Packit a69f91
      /* Now lastsz has the correct value for a possibly following
Packit a69f91
       * UNEMIT
Packit a69f91
       */
Packit a69f91
    } else if (MATCHC ('$') || MATCHC ('^')) {
Packit a69f91
      /* anchor constraints - ignore */
Packit a69f91
      CHOP (1);
Packit a69f91
    } else if (MATCHC ('[')) {
Packit a69f91
      /* Classes - reduce to any char [[=chars=]] [[.chars.]]
Packit a69f91
       * [[:name:]] [chars] Count brackets to find end.
Packit a69f91
Packit a69f91
       * These are a bit complicated ... [= =], [. .], [: {] sequences
Packit a69f91
       * always have to be complete. '[' does NOT nest otherwise.  And
Packit a69f91
       * a ']' after the opening '[' (with only '^' allowed to
Packit a69f91
       * intervene is a character, not the closing bracket. We have to
Packit a69f91
       * process the class in pieces to handle all this. The Tcl level
Packit a69f91
       * implementations (0-2 all have bugs one way or other, all
Packit a69f91
       * different.
Packit a69f91
       */
Packit a69f91
Packit a69f91
      int first   = 1;
Packit a69f91
      int allowed = 1;
Packit a69f91
      CHOP (1);
Packit a69f91
      while (strlen) {
Packit a69f91
	if (first && MATCHC ('^')) {
Packit a69f91
	  /* ^ as first keeps allowed ok for one more cycle */
Packit a69f91
	  CHOP (1);
Packit a69f91
	  first = 0;
Packit a69f91
	  continue;
Packit a69f91
	} else if (allowed && MATCHC (']')) {
Packit a69f91
	  /* Not a closing bracket! */
Packit a69f91
	  CHOP (1);
Packit a69f91
	} else if (MATCHC (']')) {
Packit a69f91
	  /* Closing bracket found */
Packit a69f91
	  CHOP (1);
Packit a69f91
	  break;
Packit a69f91
	} else if (MATCH (classa) ||
Packit a69f91
		   MATCH (classb) ||
Packit a69f91
		   MATCH (classc)) {
Packit a69f91
	  Tcl_UniChar delim[2];
Packit a69f91
	  delim[0] = str [1];
Packit a69f91
	  delim[1] = ']';
Packit a69f91
	  CHOP (2);
Packit a69f91
	  while (!MATCH (delim)) CHOP (1);
Packit a69f91
	  CHOP (2);
Packit a69f91
	} else {
Packit a69f91
	  /* Any char in class */
Packit a69f91
	  CHOP (1);
Packit a69f91
	}
Packit a69f91
	/* Reset flags handling start of class */
Packit a69f91
	allowed = first = 0;
Packit a69f91
      }
Packit a69f91
Packit a69f91
      EMIT ('?');
Packit a69f91
    } else if (MATCHC ('\\')) {
Packit a69f91
      /* Escapes */
Packit a69f91
      CHOP (1);
Packit a69f91
      if (MATCHC ('d') || MATCHC ('D') ||
Packit a69f91
	  MATCHC ('s') || MATCHC ('S') ||
Packit a69f91
	  MATCHC ('w') || MATCHC ('W')) {
Packit a69f91
	/* Class shorthands - reduce to any char */
Packit a69f91
	EMIT ('?');
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('m') || MATCHC ('M') ||
Packit a69f91
		 MATCHC ('y') || MATCHC ('Y') ||
Packit a69f91
		 MATCHC ('A') || MATCHC ('Z')) {
Packit a69f91
	/* constraint escapes - ignore */
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('B')) {
Packit a69f91
	/* Backslash */
Packit a69f91
	EMIT  ('\\');
Packit a69f91
	EMITX ('\\');
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('0')) {
Packit a69f91
	/* Escape NULL */
Packit a69f91
	EMIT ('\0');
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('e')) {
Packit a69f91
	/* Escape ESC */
Packit a69f91
	EMIT ('\033');
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('a')) {
Packit a69f91
	/* Escape \a */
Packit a69f91
	EMIT (0x7);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('b')) {
Packit a69f91
	/* Escape \b */
Packit a69f91
	EMIT (0x8);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('f')) {
Packit a69f91
	/* Escape \f */
Packit a69f91
	EMIT (0xc);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('n')) {
Packit a69f91
	/* Escape \n */
Packit a69f91
	EMIT (0xa);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('r')) {
Packit a69f91
	/* Escape \r */
Packit a69f91
	EMIT (0xd);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('t')) {
Packit a69f91
	/* Escape \t */
Packit a69f91
	EMIT (0x9);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('v')) {
Packit a69f91
	/* Escape \v */
Packit a69f91
	EMIT (0xb);
Packit a69f91
	CHOP (1);
Packit a69f91
      } else if (MATCHC ('c') && (strlen >= 2)) {
Packit a69f91
	/* Escape \cX - reduce to (.) */
Packit a69f91
	EMIT ('?');
Packit a69f91
	CHOP (2);
Packit a69f91
      } else if (MATCHC ('x')) {
Packit a69f91
	CHOP (1);
Packit a69f91
	if (MATCH_HEXDIGIT) {
Packit a69f91
	  /* Escape hex character */
Packit a69f91
	  mark = str;
Packit a69f91
	  while (MATCH_HEXDIGIT) CHOP (1);
Packit a69f91
	  if ((str - mark) > 2) { mark = str - 2; }
Packit a69f91
	  ch = ExpBackslash ('x',mark,str-mark);
Packit a69f91
	  EMITC (ch);
Packit a69f91
	} else {
Packit a69f91
	  /* Without hex digits following this is a plain char */
Packit a69f91
	  EMIT ('x');
Packit a69f91
	}
Packit a69f91
      } else if (MATCHC ('u')) {
Packit a69f91
	/*  Escapes unicode short. */
Packit a69f91
	CHOP (1);
Packit a69f91
	mark = str;
Packit a69f91
	CHOP (4);
Packit a69f91
	ch = ExpBackslash ('u',mark,str-mark);
Packit a69f91
	EMITC (ch);
Packit a69f91
      } else if (MATCHC ('U')) {
Packit a69f91
	/* Escapes unicode long. */
Packit a69f91
	CHOP (1);
Packit a69f91
	mark = str;
Packit a69f91
	CHOP (8);
Packit a69f91
	ch = ExpBackslash ('U',mark,str-mark);
Packit a69f91
	EMITC (ch);
Packit a69f91
      } else if (MATCH_DIGIT) {
Packit a69f91
	/* Escapes, octal, and backreferences - reduce (.*) */
Packit a69f91
	CHOP (1);
Packit a69f91
	while (MATCH_DIGIT) CHOP (1);
Packit a69f91
	EMIT ('*');
Packit a69f91
      } else {
Packit a69f91
	/* Plain escaped characters - copy over, requote */
Packit a69f91
	EMITC (*str);
Packit a69f91
	CHOP (1);
Packit a69f91
      }
Packit a69f91
    } else if (MATCHC ('{')) {
Packit a69f91
      /* Non-greedy and greedy bounds - reduce to (*) */
Packit a69f91
      CHOP (1);
Packit a69f91
      if (MATCH_DIGIT) {
Packit a69f91
	/* Locate closing brace and remove operator */
Packit a69f91
	CHOPC ('}'); CHOP (1);
Packit a69f91
	/* Remove optional greedy quantifier */
Packit a69f91
	if (MATCHC ('?')) { CHOP (1);}
Packit a69f91
	UNEMIT;
Packit a69f91
	EMIT ('*');
Packit a69f91
      } else {
Packit a69f91
	/* Brace is plain character, copy over */
Packit a69f91
	EMIT ('{');
Packit a69f91
	/* CHOP already done */
Packit a69f91
      }
Packit a69f91
    } else if (MATCHC ('*') ||
Packit a69f91
	       MATCHC ('+') ||
Packit a69f91
	       MATCHC ('?')) {
Packit a69f91
      /* (Non-)greedy operators - reduce to (*) */
Packit a69f91
      CHOP (1);
Packit a69f91
      /* Remove optional greedy quantifier */
Packit a69f91
      if (MATCHC ('?')) { CHOP (1);}
Packit a69f91
      UNEMIT;
Packit a69f91
      EMIT ('*');
Packit a69f91
    } else if (MATCHC ('.')) {
Packit a69f91
      /* anychar - copy over */
Packit a69f91
      EMIT ('?');
Packit a69f91
      CHOP (1);
Packit a69f91
    } else {
Packit a69f91
      /* Plain char, copy over. */
Packit a69f91
      EMIT (*str);
Packit a69f91
      CHOP (1);
Packit a69f91
    }
Packit a69f91
  }
Packit a69f91
Packit a69f91
  LOG (stderr,"'%s' <-- ",xxx(out,nexto-out)); FF;
Packit a69f91
  LOG (stderr,"'%s'\n",   xxx(str,strlen));    FF;
Packit a69f91
Packit a69f91
  /*
Packit a69f91
   * Clean up the output a bit (collapse *-sequences and absorb ?'s
Packit a69f91
   * into adjacent *'s.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  MARK (QF)
Packit a69f91
  nexto = ExpCollapseQForward (out,nexto);
Packit a69f91
  LOG (stderr,"QF '%s'\n",xxx(out,nexto-out)); FF;
Packit a69f91
Packit a69f91
  MARK (QB)
Packit a69f91
  nexto = ExpCollapseQBack    (out,nexto);
Packit a69f91
  LOG (stderr,"QB '%s'\n",xxx(out,nexto-out)); FF;
Packit a69f91
Packit a69f91
  MARK (QS)
Packit a69f91
  nexto = ExpCollapseStar     (out,nexto);
Packit a69f91
  LOG (stderr,"ST '%s'\n",xxx(out,nexto-out)); FF;
Packit a69f91
Packit a69f91
  /*
Packit a69f91
   * Heuristic: if there are more than two *s, the risk is far too
Packit a69f91
   * large that the result actually is slower than the normal re
Packit a69f91
   * matching.  So bail out.
Packit a69f91
   */
Packit a69f91
  if (ExpCountStar (out,nexto) > 2) {
Packit a69f91
      goto error;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  /*
Packit a69f91
   * Check if the result is actually useful.
Packit a69f91
   * Empty or just a *, or ? are not. A series
Packit a69f91
   * of ?'s is borderline, as they semi-count
Packit a69f91
   * the buffer.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  if ((nexto == out) ||
Packit a69f91
      (((nexto-out) == 1) &&
Packit a69f91
       ((*out == '*') ||
Packit a69f91
	(*out == '?')))) {
Packit a69f91
    goto error;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  /*
Packit a69f91
   * Result generation and cleanup.
Packit a69f91
   */
Packit a69f91
 done:
Packit a69f91
  LOG (stderr,"RESULT_ '%s'\n", xxx(out,nexto-out)); FF;
Packit a69f91
  glob = Tcl_NewUnicodeObj (out,(nexto-out));
Packit a69f91
  goto cleanup;
Packit a69f91
Packit a69f91
 error:
Packit a69f91
  LOG (stderr,"RESULT_ ERROR\n"); FF;
Packit a69f91
Packit a69f91
 cleanup:
Packit a69f91
  Tcl_Free ((char*)out);
Packit a69f91
  Tcl_Free ((char*)paren);
Packit a69f91
Packit a69f91
  return glob;
Packit a69f91
}
Packit a69f91
Packit a69f91
static void
Packit a69f91
#ifdef _AIX
Packit a69f91
ExpChopNested (Tcl_UniChar** xstr,
Packit a69f91
	       int*          xstrlen,
Packit a69f91
	       Tcl_UniChar   open,
Packit a69f91
	       Tcl_UniChar   close)
Packit a69f91
#else
Packit a69f91
ExpChopNested (xstr,xstrlen, open, close)
Packit a69f91
     Tcl_UniChar** xstr;
Packit a69f91
     int*          xstrlen;
Packit a69f91
     Tcl_UniChar   open;
Packit a69f91
     Tcl_UniChar   close;
Packit a69f91
#endif
Packit a69f91
{
Packit a69f91
  Tcl_UniChar* str    = *xstr;
Packit a69f91
  int          strlen = *xstrlen;
Packit a69f91
  int          level = 0;
Packit a69f91
Packit a69f91
  while (strlen) {
Packit a69f91
    if (MATCHC (open)) {
Packit a69f91
      level ++;
Packit a69f91
    } else if (MATCHC (close)) {
Packit a69f91
      level --;
Packit a69f91
      if (level < 0) {
Packit a69f91
	CHOP (1);
Packit a69f91
	break;
Packit a69f91
      }
Packit a69f91
    }
Packit a69f91
    CHOP (1);
Packit a69f91
  }
Packit a69f91
Packit a69f91
  *xstr = str;
Packit a69f91
  *xstrlen = strlen;
Packit a69f91
}
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpLiteral (nexto, str, strlen)
Packit a69f91
     Tcl_UniChar* nexto;
Packit a69f91
     Tcl_UniChar* str;
Packit a69f91
     int          strlen;
Packit a69f91
{
Packit a69f91
  int lastsz;
Packit a69f91
Packit a69f91
  LOG (stderr,"LITERAL '%s'\n", xxx(str,strlen)); FF;
Packit a69f91
Packit a69f91
  while (strlen) {
Packit a69f91
    EMITC (*str);
Packit a69f91
    CHOP (1);
Packit a69f91
  }
Packit a69f91
  return nexto;
Packit a69f91
}
Packit a69f91
Packit a69f91
static Tcl_UniChar
Packit a69f91
#ifdef _AIX
Packit a69f91
ExpBackslash (char prefix,
Packit a69f91
	      Tcl_UniChar* str,
Packit a69f91
	      int          strlen)
Packit a69f91
#else
Packit a69f91
ExpBackslash (prefix, str, strlen)
Packit a69f91
     char prefix;
Packit a69f91
     Tcl_UniChar* str;
Packit a69f91
     int          strlen;
Packit a69f91
#endif
Packit a69f91
{
Packit a69f91
  /* strlen <= 8 */
Packit a69f91
  char buf[20];
Packit a69f91
  char dst[TCL_UTF_MAX+1];
Packit a69f91
  Tcl_UniChar ch;
Packit a69f91
  int at = 0;
Packit a69f91
Packit a69f91
  /* Construct an utf backslash sequence we can throw to Tcl */
Packit a69f91
Packit a69f91
  buf [at++] = '\\';
Packit a69f91
  buf [at++] = prefix;
Packit a69f91
  while (strlen) {
Packit a69f91
    buf [at++] = *str++;
Packit a69f91
    strlen --;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  Tcl_UtfBackslash (buf, NULL, dst);
Packit a69f91
  TclUtfToUniChar (dst, &ch);
Packit a69f91
  return ch;
Packit a69f91
}
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpCollapseStar (src, last)
Packit a69f91
     Tcl_UniChar* src;
Packit a69f91
     Tcl_UniChar* last;
Packit a69f91
{
Packit a69f91
  Tcl_UniChar* dst, *base;
Packit a69f91
  int skip = 0;
Packit a69f91
  int star = 0;
Packit a69f91
Packit a69f91
  /* Collapses series of *'s into a single *. State machine. The
Packit a69f91
   * complexity is due to the need of handling escaped characters.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  LOG (stderr,"Q-STAR\n"); FF;
Packit a69f91
Packit a69f91
  for (dst = base = src; src < last;) {
Packit a69f91
Packit a69f91
    LOG (stderr,"@%1d /%1d '%s' <-- ", star,skip,xxx(base,dst-base)); FF;
Packit a69f91
    LOG (stderr,"'%s'\n",   xxx(src,last-src));  FF;
Packit a69f91
Packit a69f91
    if (skip) {
Packit a69f91
      skip = 0;
Packit a69f91
      star = 0;
Packit a69f91
    } else if (*src == '\\') {
Packit a69f91
      skip = 1; /* Copy next char, whatever its value */
Packit a69f91
      star = 0;
Packit a69f91
    } else if (*src == '*') {
Packit a69f91
      if (star) {
Packit a69f91
	/* Previous char was *, do not copy the current * to collapse
Packit a69f91
	 * the sequence
Packit a69f91
	 */
Packit a69f91
	src++;
Packit a69f91
	continue;
Packit a69f91
      }
Packit a69f91
      star = 1; /* *-series starts here */
Packit a69f91
    } else {
Packit a69f91
      star = 0;
Packit a69f91
    }
Packit a69f91
    *dst++ = *src++;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  LOG (stderr,"@%1d /%1d '%s' <-- ", star,skip,xxx(base,dst-base)); FF;
Packit a69f91
  LOG (stderr,"'%s'\n",   xxx(src,last-src));  FF;
Packit a69f91
Packit a69f91
  return dst;
Packit a69f91
}
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpCollapseQForward (src, last)
Packit a69f91
     Tcl_UniChar* src;
Packit a69f91
     Tcl_UniChar* last;
Packit a69f91
{
Packit a69f91
  Tcl_UniChar* dst, *base;
Packit a69f91
  int skip = 0;
Packit a69f91
  int quest = 0;
Packit a69f91
Packit a69f91
  /* Collapses series of ?'s coming after a *. State machine. The
Packit a69f91
   * complexity is due to the need of handling escaped characters.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  LOG (stderr,"Q-Forward\n"); FF;
Packit a69f91
Packit a69f91
  for (dst = base = src; src < last;) {
Packit a69f91
Packit a69f91
    LOG (stderr,"?%1d /%1d '%s' <-- ", quest,skip,xxx(base,dst-base)); FF;
Packit a69f91
    LOG (stderr,"'%s'\n",   xxx(src,last-src));  FF;
Packit a69f91
Packit a69f91
    if (skip) {
Packit a69f91
      skip = 0;
Packit a69f91
      quest = 0;
Packit a69f91
    } else if (*src == '\\') {
Packit a69f91
      skip = 1;
Packit a69f91
      quest = 0;
Packit a69f91
      /* Copy next char, whatever its value */
Packit a69f91
    } else if (*src == '?') {
Packit a69f91
      if (quest) {
Packit a69f91
	/* Previous char was *, do not copy the current ? to collapse
Packit a69f91
	 * the sequence
Packit a69f91
	 */
Packit a69f91
	src++;
Packit a69f91
	continue;
Packit a69f91
      }
Packit a69f91
    } else if (*src == '*') {
Packit a69f91
      quest = 1;
Packit a69f91
    } else {
Packit a69f91
      quest = 0;
Packit a69f91
    }
Packit a69f91
    *dst++ = *src++;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  LOG (stderr,"?%1d /%1d '%s' <-- ", quest,skip,xxx(base,dst-base)); FF;
Packit a69f91
  LOG (stderr,"'%s'\n",   xxx(src,last-src));  FF;
Packit a69f91
  return dst;
Packit a69f91
}
Packit a69f91
Packit a69f91
static Tcl_UniChar*
Packit a69f91
ExpCollapseQBack (src, last)
Packit a69f91
     Tcl_UniChar* src;
Packit a69f91
     Tcl_UniChar* last;
Packit a69f91
{
Packit a69f91
  Tcl_UniChar* dst, *base;
Packit a69f91
  int skip = 0;
Packit a69f91
Packit a69f91
  /* Collapses series of ?'s coming before a *. State machine. The
Packit a69f91
   * complexity is due to the need of handling escaped characters.
Packit a69f91
   */
Packit a69f91
Packit a69f91
  LOG (stderr,"Q-Backward\n"); FF;
Packit a69f91
Packit a69f91
  for (dst = base = src; src < last;) {
Packit a69f91
    LOG (stderr,"/%1d '%s' <-- ",skip,xxx(base,dst-base)); FF;
Packit a69f91
    LOG (stderr,"'%s'\n",   xxx(src,last-src));  FF;
Packit a69f91
Packit a69f91
    if (skip) {
Packit a69f91
      skip = 0;
Packit a69f91
    } else if (*src == '\\') {
Packit a69f91
      skip = 1;
Packit a69f91
      /* Copy next char, whatever its value */
Packit a69f91
    } else if (*src == '*') {
Packit a69f91
      /* Move backward in the output while the previous character is
Packit a69f91
       * an unescaped question mark. If there is a previous character,
Packit a69f91
       * or a character before that..
Packit a69f91
       */
Packit a69f91
Packit a69f91
      while ((((dst-base) > 2)  && (dst[-1] == '?') && (dst[-2] != '\\')) ||
Packit a69f91
	     (((dst-base) == 1) && (dst[-1] == '?'))) {
Packit a69f91
	dst --;
Packit a69f91
      }
Packit a69f91
    }
Packit a69f91
    *dst++ = *src++;
Packit a69f91
  }
Packit a69f91
Packit a69f91
  LOG (stderr,"/%1d '%s' <-- \n",skip,xxx(base,dst-base)); FF;
Packit a69f91
  LOG (stderr,"'%s'\n",   xxx(src,last-src));  FF;
Packit a69f91
  return dst;
Packit a69f91
}
Packit a69f91
Packit a69f91
static int
Packit a69f91
ExpCountStar (src, last)
Packit a69f91
    Tcl_UniChar* src;
Packit a69f91
    Tcl_UniChar* last;
Packit a69f91
{
Packit a69f91
    int skip = 0;
Packit a69f91
    int stars = 0;
Packit a69f91
Packit a69f91
    /* Count number of *'s. State machine. The complexity is due to the
Packit a69f91
     * need of handling escaped characters.
Packit a69f91
     */
Packit a69f91
Packit a69f91
    for (; src < last; src++) {
Packit a69f91
	if (skip) {
Packit a69f91
	    skip = 0;
Packit a69f91
	} else if (*src == '\\') {
Packit a69f91
	    skip = 1;
Packit a69f91
	} else if (*src == '*') {
Packit a69f91
	    stars++;
Packit a69f91
	}
Packit a69f91
    }
Packit a69f91
Packit a69f91
    return stars;
Packit a69f91
}
Packit a69f91
Packit a69f91
#undef CHOP
Packit a69f91
#undef CHOPC
Packit a69f91
#undef EMIT
Packit a69f91
#undef EMITX
Packit a69f91
#undef MATCH
Packit a69f91
#undef MATCHC
Packit a69f91
#undef MATCH_DIGIT
Packit a69f91
#undef MATCH_HEXDIGIT
Packit a69f91
#undef PUSHPAREN
Packit a69f91
#undef UNEMIT