Blame lib/regcomp.c

Packit Service fdd496
/* Extended regular expression matching and search library.
Packit Service fdd496
   Copyright (C) 2002-2017 Free Software Foundation, Inc.
Packit Service fdd496
   This file is part of the GNU C Library.
Packit Service fdd496
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Packit Service fdd496
Packit Service fdd496
   The GNU C Library is free software; you can redistribute it and/or
Packit Service fdd496
   modify it under the terms of the GNU General Public
Packit Service fdd496
   License as published by the Free Software Foundation; either
Packit Service fdd496
   version 3 of the License, or (at your option) any later version.
Packit Service fdd496
Packit Service fdd496
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service fdd496
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service fdd496
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service fdd496
   General Public License for more details.
Packit Service fdd496
Packit Service fdd496
   You should have received a copy of the GNU General Public
Packit Service fdd496
   License along with the GNU C Library; if not, see
Packit Service fdd496
   <http://www.gnu.org/licenses/>.  */
Packit Service fdd496
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
# include <locale/weight.h>
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
Packit Service fdd496
					  size_t length, reg_syntax_t syntax);
Packit Service fdd496
static void re_compile_fastmap_iter (regex_t *bufp,
Packit Service fdd496
				     const re_dfastate_t *init_state,
Packit Service fdd496
				     char *fastmap);
Packit Service fdd496
static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
static void free_charset (re_charset_t *cset);
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
static void free_workarea_compile (regex_t *preg);
Packit Service fdd496
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
static void optimize_utf8 (re_dfa_t *dfa);
Packit Service fdd496
#endif
Packit Service fdd496
static reg_errcode_t analyze (regex_t *preg);
Packit Service fdd496
static reg_errcode_t preorder (bin_tree_t *root,
Packit Service fdd496
			       reg_errcode_t (fn (void *, bin_tree_t *)),
Packit Service fdd496
			       void *extra);
Packit Service fdd496
static reg_errcode_t postorder (bin_tree_t *root,
Packit Service fdd496
				reg_errcode_t (fn (void *, bin_tree_t *)),
Packit Service fdd496
				void *extra);
Packit Service fdd496
static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
Packit Service fdd496
static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
Packit Service fdd496
static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
Packit Service fdd496
				 bin_tree_t *node);
Packit Service fdd496
static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
Packit Service fdd496
static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
Packit Service fdd496
static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
Packit Service fdd496
static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
Packit Service fdd496
static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
Packit Service fdd496
				   unsigned int constraint);
Packit Service fdd496
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
Packit Service fdd496
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
Packit Service fdd496
					 Idx node, bool root);
Packit Service fdd496
static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
Packit Service fdd496
static Idx fetch_number (re_string_t *input, re_token_t *token,
Packit Service fdd496
			 reg_syntax_t syntax);
Packit Service fdd496
static int peek_token (re_token_t *token, re_string_t *input,
Packit Service fdd496
			reg_syntax_t syntax) internal_function;
Packit Service fdd496
static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
Packit Service fdd496
			  reg_syntax_t syntax, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
Packit Service fdd496
				  re_token_t *token, reg_syntax_t syntax,
Packit Service fdd496
				  Idx nest, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
Packit Service fdd496
				 re_token_t *token, reg_syntax_t syntax,
Packit Service fdd496
				 Idx nest, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
Packit Service fdd496
				     re_token_t *token, reg_syntax_t syntax,
Packit Service fdd496
				     Idx nest, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
Packit Service fdd496
				  re_token_t *token, reg_syntax_t syntax,
Packit Service fdd496
				  Idx nest, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
Packit Service fdd496
				 re_dfa_t *dfa, re_token_t *token,
Packit Service fdd496
				 reg_syntax_t syntax, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
Packit Service fdd496
				      re_token_t *token, reg_syntax_t syntax,
Packit Service fdd496
				      reg_errcode_t *err);
Packit Service fdd496
static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
Packit Service fdd496
					    re_string_t *regexp,
Packit Service fdd496
					    re_token_t *token, int token_len,
Packit Service fdd496
					    re_dfa_t *dfa,
Packit Service fdd496
					    reg_syntax_t syntax,
Packit Service fdd496
					    bool accept_hyphen);
Packit Service fdd496
static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
Packit Service fdd496
					  re_string_t *regexp,
Packit Service fdd496
					  re_token_t *token);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
static reg_errcode_t build_equiv_class (bitset_t sbcset,
Packit Service fdd496
					re_charset_t *mbcset,
Packit Service fdd496
					Idx *equiv_class_alloc,
Packit Service fdd496
					const unsigned char *name);
Packit Service fdd496
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
Packit Service fdd496
				      bitset_t sbcset,
Packit Service fdd496
				      re_charset_t *mbcset,
Packit Service fdd496
				      Idx *char_class_alloc,
Packit Service fdd496
				      const char *class_name,
Packit Service fdd496
				      reg_syntax_t syntax);
Packit Service fdd496
#else  /* not RE_ENABLE_I18N */
Packit Service fdd496
static reg_errcode_t build_equiv_class (bitset_t sbcset,
Packit Service fdd496
					const unsigned char *name);
Packit Service fdd496
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
Packit Service fdd496
				      bitset_t sbcset,
Packit Service fdd496
				      const char *class_name,
Packit Service fdd496
				      reg_syntax_t syntax);
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
Packit Service fdd496
				       RE_TRANSLATE_TYPE trans,
Packit Service fdd496
				       const char *class_name,
Packit Service fdd496
				       const char *extra,
Packit Service fdd496
				       bool non_match, reg_errcode_t *err);
Packit Service fdd496
static bin_tree_t *create_tree (re_dfa_t *dfa,
Packit Service fdd496
				bin_tree_t *left, bin_tree_t *right,
Packit Service fdd496
				re_token_type_t type);
Packit Service fdd496
static bin_tree_t *create_token_tree (re_dfa_t *dfa,
Packit Service fdd496
				      bin_tree_t *left, bin_tree_t *right,
Packit Service fdd496
				      const re_token_t *token);
Packit Service fdd496
static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
Packit Service fdd496
static void free_token (re_token_t *node);
Packit Service fdd496
static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
Packit Service fdd496
static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
Packit Service fdd496

Packit Service fdd496
/* This table gives an error message for each of the error codes listed
Packit Service fdd496
   in regex.h.  Obviously the order here has to be same as there.
Packit Service fdd496
   POSIX doesn't require that we do anything for REG_NOERROR,
Packit Service fdd496
   but why not be nice?  */
Packit Service fdd496
Packit Service fdd496
static const char __re_error_msgid[] =
Packit Service fdd496
  {
Packit Service fdd496
#define REG_NOERROR_IDX	0
Packit Service fdd496
    gettext_noop ("Success")	/* REG_NOERROR */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
Packit Service fdd496
    gettext_noop ("No match")	/* REG_NOMATCH */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
Packit Service fdd496
    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
Packit Service fdd496
    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
Packit Service fdd496
    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
Packit Service fdd496
    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
Packit Service fdd496
    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
Packit Service fdd496
    gettext_noop ("Unmatched [, [^, [:, [., or [=")	/* REG_EBRACK */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=")
Packit Service fdd496
    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
Packit Service fdd496
    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
Packit Service fdd496
    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
Packit Service fdd496
    gettext_noop ("Invalid range end")	/* REG_ERANGE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
Packit Service fdd496
    gettext_noop ("Memory exhausted") /* REG_ESPACE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
Packit Service fdd496
    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
Packit Service fdd496
    gettext_noop ("Premature end of regular expression") /* REG_EEND */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
Packit Service fdd496
    gettext_noop ("Regular expression too big") /* REG_ESIZE */
Packit Service fdd496
    "\0"
Packit Service fdd496
#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
Packit Service fdd496
    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
Packit Service fdd496
  };
Packit Service fdd496
Packit Service fdd496
static const size_t __re_error_msgid_idx[] =
Packit Service fdd496
  {
Packit Service fdd496
    REG_NOERROR_IDX,
Packit Service fdd496
    REG_NOMATCH_IDX,
Packit Service fdd496
    REG_BADPAT_IDX,
Packit Service fdd496
    REG_ECOLLATE_IDX,
Packit Service fdd496
    REG_ECTYPE_IDX,
Packit Service fdd496
    REG_EESCAPE_IDX,
Packit Service fdd496
    REG_ESUBREG_IDX,
Packit Service fdd496
    REG_EBRACK_IDX,
Packit Service fdd496
    REG_EPAREN_IDX,
Packit Service fdd496
    REG_EBRACE_IDX,
Packit Service fdd496
    REG_BADBR_IDX,
Packit Service fdd496
    REG_ERANGE_IDX,
Packit Service fdd496
    REG_ESPACE_IDX,
Packit Service fdd496
    REG_BADRPT_IDX,
Packit Service fdd496
    REG_EEND_IDX,
Packit Service fdd496
    REG_ESIZE_IDX,
Packit Service fdd496
    REG_ERPAREN_IDX
Packit Service fdd496
  };
Packit Service fdd496

Packit Service fdd496
/* Entry points for GNU code.  */
Packit Service fdd496
Packit Service fdd496
/* re_compile_pattern is the GNU regular expression compiler: it
Packit Service fdd496
   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
Packit Service fdd496
   Returns 0 if the pattern was valid, otherwise an error string.
Packit Service fdd496
Packit Service fdd496
   Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
Packit Service fdd496
   are set in BUFP on entry.  */
Packit Service fdd496
Packit Service fdd496
const char *
Packit Service fdd496
re_compile_pattern (const char *pattern, size_t length,
Packit Service fdd496
		    struct re_pattern_buffer *bufp)
Packit Service fdd496
{
Packit Service fdd496
  reg_errcode_t ret;
Packit Service fdd496
Packit Service fdd496
  /* And GNU code determines whether or not to get register information
Packit Service fdd496
     by passing null for the REGS argument to re_match, etc., not by
Packit Service fdd496
     setting no_sub, unless RE_NO_SUB is set.  */
Packit Service fdd496
  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
Packit Service fdd496
Packit Service fdd496
  /* Match anchors at newline.  */
Packit Service fdd496
  bufp->newline_anchor = 1;
Packit Service fdd496
Packit Service fdd496
  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
Packit Service fdd496
Packit Service fdd496
  if (!ret)
Packit Service fdd496
    return NULL;
Packit Service fdd496
  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
Packit Service fdd496
}
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
weak_alias (__re_compile_pattern, re_compile_pattern)
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
/* Set by 're_set_syntax' to the current regexp syntax to recognize.  Can
Packit Service fdd496
   also be assigned to arbitrarily: each pattern buffer stores its own
Packit Service fdd496
   syntax, so it can be changed between regex compilations.  */
Packit Service fdd496
/* This has no initializer because initialized variables in Emacs
Packit Service fdd496
   become read-only after dumping.  */
Packit Service fdd496
reg_syntax_t re_syntax_options;
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
/* Specify the precise syntax of regexps for compilation.  This provides
Packit Service fdd496
   for compatibility for various utilities which historically have
Packit Service fdd496
   different, incompatible syntaxes.
Packit Service fdd496
Packit Service fdd496
   The argument SYNTAX is a bit mask comprised of the various bits
Packit Service fdd496
   defined in regex.h.  We return the old syntax.  */
Packit Service fdd496
Packit Service fdd496
reg_syntax_t
Packit Service fdd496
re_set_syntax (reg_syntax_t syntax)
Packit Service fdd496
{
Packit Service fdd496
  reg_syntax_t ret = re_syntax_options;
Packit Service fdd496
Packit Service fdd496
  re_syntax_options = syntax;
Packit Service fdd496
  return ret;
Packit Service fdd496
}
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
weak_alias (__re_set_syntax, re_set_syntax)
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
int
Packit Service fdd496
re_compile_fastmap (struct re_pattern_buffer *bufp)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = bufp->buffer;
Packit Service fdd496
  char *fastmap = bufp->fastmap;
Packit Service fdd496
Packit Service fdd496
  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
Packit Service fdd496
  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
Packit Service fdd496
  if (dfa->init_state != dfa->init_state_word)
Packit Service fdd496
    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
Packit Service fdd496
  if (dfa->init_state != dfa->init_state_nl)
Packit Service fdd496
    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
Packit Service fdd496
  if (dfa->init_state != dfa->init_state_begbuf)
Packit Service fdd496
    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
Packit Service fdd496
  bufp->fastmap_accurate = 1;
Packit Service fdd496
  return 0;
Packit Service fdd496
}
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
weak_alias (__re_compile_fastmap, re_compile_fastmap)
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
static inline void
Packit Service fdd496
__attribute__ ((always_inline))
Packit Service fdd496
re_set_fastmap (char *fastmap, bool icase, int ch)
Packit Service fdd496
{
Packit Service fdd496
  fastmap[ch] = 1;
Packit Service fdd496
  if (icase)
Packit Service fdd496
    fastmap[tolower (ch)] = 1;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Helper function for re_compile_fastmap.
Packit Service fdd496
   Compile fastmap for the initial_state INIT_STATE.  */
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
Packit Service fdd496
			 char *fastmap)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = bufp->buffer;
Packit Service fdd496
  Idx node_cnt;
Packit Service fdd496
  bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
Packit Service fdd496
  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
Packit Service fdd496
    {
Packit Service fdd496
      Idx node = init_state->nodes.elems[node_cnt];
Packit Service fdd496
      re_token_type_t type = dfa->nodes[node].type;
Packit Service fdd496
Packit Service fdd496
      if (type == CHARACTER)
Packit Service fdd496
	{
Packit Service fdd496
	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
Packit Service fdd496
	    {
Packit Service fdd496
	      unsigned char buf[MB_LEN_MAX];
Packit Service fdd496
	      unsigned char *p;
Packit Service fdd496
	      wchar_t wc;
Packit Service fdd496
	      mbstate_t state;
Packit Service fdd496
Packit Service fdd496
	      p = buf;
Packit Service fdd496
	      *p++ = dfa->nodes[node].opr.c;
Packit Service fdd496
	      while (++node < dfa->nodes_len
Packit Service fdd496
		     &&	dfa->nodes[node].type == CHARACTER
Packit Service fdd496
		     && dfa->nodes[node].mb_partial)
Packit Service fdd496
		*p++ = dfa->nodes[node].opr.c;
Packit Service fdd496
	      memset (&state, '\0', sizeof (state));
Packit Service fdd496
	      if (__mbrtowc (&wc, (const char *) buf, p - buf,
Packit Service fdd496
			     &state) == p - buf
Packit Service fdd496
		  && (__wcrtomb ((char *) buf, __towlower (wc), &state)
Packit Service fdd496
		      != (size_t) -1))
Packit Service fdd496
		re_set_fastmap (fastmap, false, buf[0]);
Packit Service fdd496
	    }
Packit Service fdd496
#endif
Packit Service fdd496
	}
Packit Service fdd496
      else if (type == SIMPLE_BRACKET)
Packit Service fdd496
	{
Packit Service fdd496
	  int i, ch;
Packit Service fdd496
	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
Packit Service fdd496
	    {
Packit Service fdd496
	      int j;
Packit Service fdd496
	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
Packit Service fdd496
	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
Packit Service fdd496
		if (w & ((bitset_word_t) 1 << j))
Packit Service fdd496
		  re_set_fastmap (fastmap, icase, ch);
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      else if (type == COMPLEX_BRACKET)
Packit Service fdd496
	{
Packit Service fdd496
	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
Packit Service fdd496
	  Idx i;
Packit Service fdd496
Packit Service fdd496
# ifdef _LIBC
Packit Service fdd496
	  /* See if we have to try all bytes which start multiple collation
Packit Service fdd496
	     elements.
Packit Service fdd496
	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
Packit Service fdd496
		  collation element, and don't catch 'b' since 'b' is
Packit Service fdd496
		  the only collation element which starts from 'b' (and
Packit Service fdd496
		  it is caught by SIMPLE_BRACKET).  */
Packit Service fdd496
	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
Packit Service fdd496
		  && (cset->ncoll_syms || cset->nranges))
Packit Service fdd496
		{
Packit Service fdd496
		  const int32_t *table = (const int32_t *)
Packit Service fdd496
		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
Packit Service fdd496
		  for (i = 0; i < SBC_MAX; ++i)
Packit Service fdd496
		    if (table[i] < 0)
Packit Service fdd496
		      re_set_fastmap (fastmap, icase, i);
Packit Service fdd496
		}
Packit Service fdd496
# endif /* _LIBC */
Packit Service fdd496
Packit Service fdd496
	  /* See if we have to start the match at all multibyte characters,
Packit Service fdd496
	     i.e. where we would not find an invalid sequence.  This only
Packit Service fdd496
	     applies to multibyte character sets; for single byte character
Packit Service fdd496
	     sets, the SIMPLE_BRACKET again suffices.  */
Packit Service fdd496
	  if (dfa->mb_cur_max > 1
Packit Service fdd496
	      && (cset->nchar_classes || cset->non_match || cset->nranges
Packit Service fdd496
# ifdef _LIBC
Packit Service fdd496
		  || cset->nequiv_classes
Packit Service fdd496
# endif /* _LIBC */
Packit Service fdd496
		 ))
Packit Service fdd496
	    {
Packit Service fdd496
	      unsigned char c = 0;
Packit Service fdd496
	      do
Packit Service fdd496
		{
Packit Service fdd496
		  mbstate_t mbs;
Packit Service fdd496
		  memset (&mbs, 0, sizeof (mbs));
Packit Service fdd496
		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
Packit Service fdd496
		    re_set_fastmap (fastmap, false, (int) c);
Packit Service fdd496
		}
Packit Service fdd496
	      while (++c != 0);
Packit Service fdd496
	    }
Packit Service fdd496
Packit Service fdd496
	  else
Packit Service fdd496
	    {
Packit Service fdd496
	      /* ... Else catch all bytes which can start the mbchars.  */
Packit Service fdd496
	      for (i = 0; i < cset->nmbchars; ++i)
Packit Service fdd496
		{
Packit Service fdd496
		  char buf[256];
Packit Service fdd496
		  mbstate_t state;
Packit Service fdd496
		  memset (&state, '\0', sizeof (state));
Packit Service fdd496
		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
Packit Service fdd496
		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
Packit Service fdd496
		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
Packit Service fdd496
		    {
Packit Service fdd496
		      if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
Packit Service fdd496
			  != (size_t) -1)
Packit Service fdd496
			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
Packit Service fdd496
		    }
Packit Service fdd496
		}
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
      else if (type == OP_PERIOD
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
	       || type == OP_UTF8_PERIOD
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
	       || type == END_OF_RE)
Packit Service fdd496
	{
Packit Service fdd496
	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
Packit Service fdd496
	  if (type == END_OF_RE)
Packit Service fdd496
	    bufp->can_be_null = 1;
Packit Service fdd496
	  return;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
}
Packit Service fdd496

Packit Service fdd496
/* Entry point for POSIX code.  */
Packit Service fdd496
/* regcomp takes a regular expression as a string and compiles it.
Packit Service fdd496
Packit Service fdd496
   PREG is a regex_t *.  We do not expect any fields to be initialized,
Packit Service fdd496
   since POSIX says we shouldn't.  Thus, we set
Packit Service fdd496
Packit Service fdd496
     'buffer' to the compiled pattern;
Packit Service fdd496
     'used' to the length of the compiled pattern;
Packit Service fdd496
     'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
Packit Service fdd496
       REG_EXTENDED bit in CFLAGS is set; otherwise, to
Packit Service fdd496
       RE_SYNTAX_POSIX_BASIC;
Packit Service fdd496
     'newline_anchor' to REG_NEWLINE being set in CFLAGS;
Packit Service fdd496
     'fastmap' to an allocated space for the fastmap;
Packit Service fdd496
     'fastmap_accurate' to zero;
Packit Service fdd496
     're_nsub' to the number of subexpressions in PATTERN.
Packit Service fdd496
Packit Service fdd496
   PATTERN is the address of the pattern string.
Packit Service fdd496
Packit Service fdd496
   CFLAGS is a series of bits which affect compilation.
Packit Service fdd496
Packit Service fdd496
     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
Packit Service fdd496
     use POSIX basic syntax.
Packit Service fdd496
Packit Service fdd496
     If REG_NEWLINE is set, then . and [^...] don't match newline.
Packit Service fdd496
     Also, regexec will try a match beginning after every newline.
Packit Service fdd496
Packit Service fdd496
     If REG_ICASE is set, then we considers upper- and lowercase
Packit Service fdd496
     versions of letters to be equivalent when matching.
Packit Service fdd496
Packit Service fdd496
     If REG_NOSUB is set, then when PREG is passed to regexec, that
Packit Service fdd496
     routine will report only success or failure, and nothing about the
Packit Service fdd496
     registers.
Packit Service fdd496
Packit Service fdd496
   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
Packit Service fdd496
   the return codes and their meanings.)  */
Packit Service fdd496
Packit Service fdd496
int
Packit Service fdd496
regcomp (regex_t *_Restrict_ preg, const char *_Restrict_ pattern, int cflags)
Packit Service fdd496
{
Packit Service fdd496
  reg_errcode_t ret;
Packit Service fdd496
  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
Packit Service fdd496
			 : RE_SYNTAX_POSIX_BASIC);
Packit Service fdd496
Packit Service fdd496
  preg->buffer = NULL;
Packit Service fdd496
  preg->allocated = 0;
Packit Service fdd496
  preg->used = 0;
Packit Service fdd496
Packit Service fdd496
  /* Try to allocate space for the fastmap.  */
Packit Service fdd496
  preg->fastmap = re_malloc (char, SBC_MAX);
Packit Service fdd496
  if (BE (preg->fastmap == NULL, 0))
Packit Service fdd496
    return REG_ESPACE;
Packit Service fdd496
Packit Service fdd496
  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
Packit Service fdd496
Packit Service fdd496
  /* If REG_NEWLINE is set, newlines are treated differently.  */
Packit Service fdd496
  if (cflags & REG_NEWLINE)
Packit Service fdd496
    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
Packit Service fdd496
      syntax &= ~RE_DOT_NEWLINE;
Packit Service fdd496
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
Packit Service fdd496
      /* It also changes the matching behavior.  */
Packit Service fdd496
      preg->newline_anchor = 1;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    preg->newline_anchor = 0;
Packit Service fdd496
  preg->no_sub = !!(cflags & REG_NOSUB);
Packit Service fdd496
  preg->translate = NULL;
Packit Service fdd496
Packit Service fdd496
  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
Packit Service fdd496
Packit Service fdd496
  /* POSIX doesn't distinguish between an unmatched open-group and an
Packit Service fdd496
     unmatched close-group: both are REG_EPAREN.  */
Packit Service fdd496
  if (ret == REG_ERPAREN)
Packit Service fdd496
    ret = REG_EPAREN;
Packit Service fdd496
Packit Service fdd496
  /* We have already checked preg->fastmap != NULL.  */
Packit Service fdd496
  if (BE (ret == REG_NOERROR, 1))
Packit Service fdd496
    /* Compute the fastmap now, since regexec cannot modify the pattern
Packit Service fdd496
       buffer.  This function never fails in this implementation.  */
Packit Service fdd496
    (void) re_compile_fastmap (preg);
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      /* Some error occurred while compiling the expression.  */
Packit Service fdd496
      re_free (preg->fastmap);
Packit Service fdd496
      preg->fastmap = NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return (int) ret;
Packit Service fdd496
}
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
weak_alias (__regcomp, regcomp)
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
/* Returns a message corresponding to an error code, ERRCODE, returned
Packit Service fdd496
   from either regcomp or regexec.   We don't use PREG here.  */
Packit Service fdd496
Packit Service fdd496
size_t
Packit Service fdd496
regerror (int errcode, const regex_t *_Restrict_ preg, char *_Restrict_ errbuf,
Packit Service fdd496
	  size_t errbuf_size)
Packit Service fdd496
{
Packit Service fdd496
  const char *msg;
Packit Service fdd496
  size_t msg_size;
Packit Service fdd496
Packit Service fdd496
  if (BE (errcode < 0
Packit Service fdd496
	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
Packit Service fdd496
			       / sizeof (__re_error_msgid_idx[0])), 0))
Packit Service fdd496
    /* Only error codes returned by the rest of the code should be passed
Packit Service fdd496
       to this routine.  If we are given anything else, or if other regex
Packit Service fdd496
       code generates an invalid error code, then the program has a bug.
Packit Service fdd496
       Dump core so we can fix it.  */
Packit Service fdd496
    abort ();
Packit Service fdd496
Packit Service fdd496
  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
Packit Service fdd496
Packit Service fdd496
  msg_size = strlen (msg) + 1; /* Includes the null.  */
Packit Service fdd496
Packit Service fdd496
  if (BE (errbuf_size != 0, 1))
Packit Service fdd496
    {
Packit Service fdd496
      size_t cpy_size = msg_size;
Packit Service fdd496
      if (BE (msg_size > errbuf_size, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  cpy_size = errbuf_size - 1;
Packit Service fdd496
	  errbuf[cpy_size] = '\0';
Packit Service fdd496
	}
Packit Service fdd496
      memcpy (errbuf, msg, cpy_size);
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return msg_size;
Packit Service fdd496
}
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
weak_alias (__regerror, regerror)
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
/* This static array is used for the map to single-byte characters when
Packit Service fdd496
   UTF-8 is used.  Otherwise we would allocate memory just to initialize
Packit Service fdd496
   it the same all the time.  UTF-8 is the preferred encoding so this is
Packit Service fdd496
   a worthwhile optimization.  */
Packit Service fdd496
static const bitset_t utf8_sb_map =
Packit Service fdd496
{
Packit Service fdd496
  /* Set the first 128 bits.  */
Packit Service fdd496
# if defined __GNUC__ && !defined __STRICT_ANSI__
Packit Service fdd496
  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
Packit Service fdd496
# else
Packit Service fdd496
#  if 4 * BITSET_WORD_BITS < ASCII_CHARS
Packit Service fdd496
#   error "bitset_word_t is narrower than 32 bits"
Packit Service fdd496
#  elif 3 * BITSET_WORD_BITS < ASCII_CHARS
Packit Service fdd496
  BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
Packit Service fdd496
#  elif 2 * BITSET_WORD_BITS < ASCII_CHARS
Packit Service fdd496
  BITSET_WORD_MAX, BITSET_WORD_MAX,
Packit Service fdd496
#  elif 1 * BITSET_WORD_BITS < ASCII_CHARS
Packit Service fdd496
  BITSET_WORD_MAX,
Packit Service fdd496
#  endif
Packit Service fdd496
  (BITSET_WORD_MAX
Packit Service fdd496
   >> (SBC_MAX % BITSET_WORD_BITS == 0
Packit Service fdd496
       ? 0
Packit Service fdd496
       : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
Packit Service fdd496
# endif
Packit Service fdd496
};
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
free_dfa_content (re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  Idx i, j;
Packit Service fdd496
Packit Service fdd496
  if (dfa->nodes)
Packit Service fdd496
    for (i = 0; i < dfa->nodes_len; ++i)
Packit Service fdd496
      free_token (dfa->nodes + i);
Packit Service fdd496
  re_free (dfa->nexts);
Packit Service fdd496
  for (i = 0; i < dfa->nodes_len; ++i)
Packit Service fdd496
    {
Packit Service fdd496
      if (dfa->eclosures != NULL)
Packit Service fdd496
	re_node_set_free (dfa->eclosures + i);
Packit Service fdd496
      if (dfa->inveclosures != NULL)
Packit Service fdd496
	re_node_set_free (dfa->inveclosures + i);
Packit Service fdd496
      if (dfa->edests != NULL)
Packit Service fdd496
	re_node_set_free (dfa->edests + i);
Packit Service fdd496
    }
Packit Service fdd496
  re_free (dfa->edests);
Packit Service fdd496
  re_free (dfa->eclosures);
Packit Service fdd496
  re_free (dfa->inveclosures);
Packit Service fdd496
  re_free (dfa->nodes);
Packit Service fdd496
Packit Service fdd496
  if (dfa->state_table)
Packit Service fdd496
    for (i = 0; i <= dfa->state_hash_mask; ++i)
Packit Service fdd496
      {
Packit Service fdd496
	struct re_state_table_entry *entry = dfa->state_table + i;
Packit Service fdd496
	for (j = 0; j < entry->num; ++j)
Packit Service fdd496
	  {
Packit Service fdd496
	    re_dfastate_t *state = entry->array[j];
Packit Service fdd496
	    free_state (state);
Packit Service fdd496
	  }
Packit Service fdd496
	re_free (entry->array);
Packit Service fdd496
      }
Packit Service fdd496
  re_free (dfa->state_table);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (dfa->sb_char != utf8_sb_map)
Packit Service fdd496
    re_free (dfa->sb_char);
Packit Service fdd496
#endif
Packit Service fdd496
  re_free (dfa->subexp_map);
Packit Service fdd496
#ifdef DEBUG
Packit Service fdd496
  re_free (dfa->re_str);
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  re_free (dfa);
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
/* Free dynamically allocated space used by PREG.  */
Packit Service fdd496
Packit Service fdd496
void
Packit Service fdd496
regfree (regex_t *preg)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  if (BE (dfa != NULL, 1))
Packit Service fdd496
    {
Packit Service fdd496
      lock_fini (dfa->lock);
Packit Service fdd496
      free_dfa_content (dfa);
Packit Service fdd496
    }
Packit Service fdd496
  preg->buffer = NULL;
Packit Service fdd496
  preg->allocated = 0;
Packit Service fdd496
Packit Service fdd496
  re_free (preg->fastmap);
Packit Service fdd496
  preg->fastmap = NULL;
Packit Service fdd496
Packit Service fdd496
  re_free (preg->translate);
Packit Service fdd496
  preg->translate = NULL;
Packit Service fdd496
}
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
weak_alias (__regfree, regfree)
Packit Service fdd496
#endif
Packit Service fdd496

Packit Service fdd496
/* Entry points compatible with 4.2 BSD regex library.  We don't define
Packit Service fdd496
   them unless specifically requested.  */
Packit Service fdd496
Packit Service fdd496
#if defined _REGEX_RE_COMP || defined _LIBC
Packit Service fdd496
Packit Service fdd496
/* BSD has one and only one pattern buffer.  */
Packit Service fdd496
static struct re_pattern_buffer re_comp_buf;
Packit Service fdd496
Packit Service fdd496
char *
Packit Service fdd496
# ifdef _LIBC
Packit Service fdd496
/* Make these definitions weak in libc, so POSIX programs can redefine
Packit Service fdd496
   these names if they don't use our functions, and still use
Packit Service fdd496
   regcomp/regexec above without link errors.  */
Packit Service fdd496
weak_function
Packit Service fdd496
# endif
Packit Service fdd496
re_comp (const char *s)
Packit Service fdd496
{
Packit Service fdd496
  reg_errcode_t ret;
Packit Service fdd496
  char *fastmap;
Packit Service fdd496
Packit Service fdd496
  if (!s)
Packit Service fdd496
    {
Packit Service fdd496
      if (!re_comp_buf.buffer)
Packit Service fdd496
	return gettext ("No previous regular expression");
Packit Service fdd496
      return 0;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  if (re_comp_buf.buffer)
Packit Service fdd496
    {
Packit Service fdd496
      fastmap = re_comp_buf.fastmap;
Packit Service fdd496
      re_comp_buf.fastmap = NULL;
Packit Service fdd496
      __regfree (&re_comp_buf);
Packit Service fdd496
      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
Packit Service fdd496
      re_comp_buf.fastmap = fastmap;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  if (re_comp_buf.fastmap == NULL)
Packit Service fdd496
    {
Packit Service fdd496
      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
Packit Service fdd496
      if (re_comp_buf.fastmap == NULL)
Packit Service fdd496
	return (char *) gettext (__re_error_msgid
Packit Service fdd496
				 + __re_error_msgid_idx[(int) REG_ESPACE]);
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Since 're_exec' always passes NULL for the 'regs' argument, we
Packit Service fdd496
     don't need to initialize the pattern buffer fields which affect it.  */
Packit Service fdd496
Packit Service fdd496
  /* Match anchors at newlines.  */
Packit Service fdd496
  re_comp_buf.newline_anchor = 1;
Packit Service fdd496
Packit Service fdd496
  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
Packit Service fdd496
Packit Service fdd496
  if (!ret)
Packit Service fdd496
    return NULL;
Packit Service fdd496
Packit Service fdd496
  /* Yes, we're discarding 'const' here if !HAVE_LIBINTL.  */
Packit Service fdd496
  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
libc_freeres_fn (free_mem)
Packit Service fdd496
{
Packit Service fdd496
  __regfree (&re_comp_buf);
Packit Service fdd496
}
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
#endif /* _REGEX_RE_COMP */
Packit Service fdd496

Packit Service fdd496
/* Internal entry point.
Packit Service fdd496
   Compile the regular expression PATTERN, whose length is LENGTH.
Packit Service fdd496
   SYNTAX indicate regular expression's syntax.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
re_compile_internal (regex_t *preg, const char * pattern, size_t length,
Packit Service fdd496
		     reg_syntax_t syntax)
Packit Service fdd496
{
Packit Service fdd496
  reg_errcode_t err = REG_NOERROR;
Packit Service fdd496
  re_dfa_t *dfa;
Packit Service fdd496
  re_string_t regexp;
Packit Service fdd496
Packit Service fdd496
  /* Initialize the pattern buffer.  */
Packit Service fdd496
  preg->fastmap_accurate = 0;
Packit Service fdd496
  preg->syntax = syntax;
Packit Service fdd496
  preg->not_bol = preg->not_eol = 0;
Packit Service fdd496
  preg->used = 0;
Packit Service fdd496
  preg->re_nsub = 0;
Packit Service fdd496
  preg->can_be_null = 0;
Packit Service fdd496
  preg->regs_allocated = REGS_UNALLOCATED;
Packit Service fdd496
Packit Service fdd496
  /* Initialize the dfa.  */
Packit Service fdd496
  dfa = preg->buffer;
Packit Service fdd496
  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
Packit Service fdd496
    {
Packit Service fdd496
      /* If zero allocated, but buffer is non-null, try to realloc
Packit Service fdd496
	 enough space.  This loses if buffer's address is bogus, but
Packit Service fdd496
	 that is the user's responsibility.  If ->buffer is NULL this
Packit Service fdd496
	 is a simple allocation.  */
Packit Service fdd496
      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
Packit Service fdd496
      if (dfa == NULL)
Packit Service fdd496
	return REG_ESPACE;
Packit Service fdd496
      preg->allocated = sizeof (re_dfa_t);
Packit Service fdd496
      preg->buffer = dfa;
Packit Service fdd496
    }
Packit Service fdd496
  preg->used = sizeof (re_dfa_t);
Packit Service fdd496
Packit Service fdd496
  err = init_dfa (dfa, length);
Packit Service fdd496
  if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0))
Packit Service fdd496
    err = REG_ESPACE;
Packit Service fdd496
  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
    {
Packit Service fdd496
      free_dfa_content (dfa);
Packit Service fdd496
      preg->buffer = NULL;
Packit Service fdd496
      preg->allocated = 0;
Packit Service fdd496
      return err;
Packit Service fdd496
    }
Packit Service fdd496
#ifdef DEBUG
Packit Service fdd496
  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
Packit Service fdd496
  dfa->re_str = re_malloc (char, length + 1);
Packit Service fdd496
  strncpy (dfa->re_str, pattern, length + 1);
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  err = re_string_construct (&regexp, pattern, length, preg->translate,
Packit Service fdd496
			     (syntax & RE_ICASE) != 0, dfa);
Packit Service fdd496
  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
    {
Packit Service fdd496
    re_compile_internal_free_return:
Packit Service fdd496
      free_workarea_compile (preg);
Packit Service fdd496
      re_string_destruct (&regexp);
Packit Service fdd496
      lock_fini (dfa->lock);
Packit Service fdd496
      free_dfa_content (dfa);
Packit Service fdd496
      preg->buffer = NULL;
Packit Service fdd496
      preg->allocated = 0;
Packit Service fdd496
      return err;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Parse the regular expression, and build a structure tree.  */
Packit Service fdd496
  preg->re_nsub = 0;
Packit Service fdd496
  dfa->str_tree = parse (&regexp, preg, syntax, &err;;
Packit Service fdd496
  if (BE (dfa->str_tree == NULL, 0))
Packit Service fdd496
    goto re_compile_internal_free_return;
Packit Service fdd496
Packit Service fdd496
  /* Analyze the tree and create the nfa.  */
Packit Service fdd496
  err = analyze (preg);
Packit Service fdd496
  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
    goto re_compile_internal_free_return;
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  /* If possible, do searching in single byte encoding to speed things up.  */
Packit Service fdd496
  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
Packit Service fdd496
    optimize_utf8 (dfa);
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  /* Then create the initial state of the dfa.  */
Packit Service fdd496
  err = create_initial_state (dfa);
Packit Service fdd496
Packit Service fdd496
  /* Release work areas.  */
Packit Service fdd496
  free_workarea_compile (preg);
Packit Service fdd496
  re_string_destruct (&regexp);
Packit Service fdd496
Packit Service fdd496
  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
    {
Packit Service fdd496
      lock_fini (dfa->lock);
Packit Service fdd496
      free_dfa_content (dfa);
Packit Service fdd496
      preg->buffer = NULL;
Packit Service fdd496
      preg->allocated = 0;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return err;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Initialize DFA.  We use the length of the regular expression PAT_LEN
Packit Service fdd496
   as the initial length of some arrays.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
init_dfa (re_dfa_t *dfa, size_t pat_len)
Packit Service fdd496
{
Packit Service fdd496
  __re_size_t table_size;
Packit Service fdd496
#ifndef _LIBC
Packit Service fdd496
  const char *codeset_name;
Packit Service fdd496
#endif
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
Packit Service fdd496
#else
Packit Service fdd496
  size_t max_i18n_object_size = 0;
Packit Service fdd496
#endif
Packit Service fdd496
  size_t max_object_size =
Packit Service fdd496
    MAX (sizeof (struct re_state_table_entry),
Packit Service fdd496
	 MAX (sizeof (re_token_t),
Packit Service fdd496
	      MAX (sizeof (re_node_set),
Packit Service fdd496
		   MAX (sizeof (regmatch_t),
Packit Service fdd496
			max_i18n_object_size))));
Packit Service fdd496
Packit Service fdd496
  memset (dfa, '\0', sizeof (re_dfa_t));
Packit Service fdd496
Packit Service fdd496
  /* Force allocation of str_tree_storage the first time.  */
Packit Service fdd496
  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
Packit Service fdd496
Packit Service fdd496
  /* Avoid overflows.  The extra "/ 2" is for the table_size doubling
Packit Service fdd496
     calculation below, and for similar doubling calculations
Packit Service fdd496
     elsewhere.  And it's <= rather than <, because some of the
Packit Service fdd496
     doubling calculations add 1 afterwards.  */
Packit Service fdd496
  if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))
Packit Service fdd496
    return REG_ESPACE;
Packit Service fdd496
Packit Service fdd496
  dfa->nodes_alloc = pat_len + 1;
Packit Service fdd496
  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
Packit Service fdd496
Packit Service fdd496
  /*  table_size = 2 ^ ceil(log pat_len) */
Packit Service fdd496
  for (table_size = 1; ; table_size <<= 1)
Packit Service fdd496
    if (table_size > pat_len)
Packit Service fdd496
      break;
Packit Service fdd496
Packit Service fdd496
  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
Packit Service fdd496
  dfa->state_hash_mask = table_size - 1;
Packit Service fdd496
Packit Service fdd496
  dfa->mb_cur_max = MB_CUR_MAX;
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
  if (dfa->mb_cur_max == 6
Packit Service fdd496
      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
Packit Service fdd496
    dfa->is_utf8 = 1;
Packit Service fdd496
  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
Packit Service fdd496
		       != 0);
Packit Service fdd496
#else
Packit Service fdd496
  codeset_name = nl_langinfo (CODESET);
Packit Service fdd496
  if ((codeset_name[0] == 'U' || codeset_name[0] == 'u')
Packit Service fdd496
      && (codeset_name[1] == 'T' || codeset_name[1] == 't')
Packit Service fdd496
      && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
Packit Service fdd496
      && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)
Packit Service fdd496
    dfa->is_utf8 = 1;
Packit Service fdd496
Packit Service fdd496
  /* We check exhaustively in the loop below if this charset is a
Packit Service fdd496
     superset of ASCII.  */
Packit Service fdd496
  dfa->map_notascii = 0;
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (dfa->mb_cur_max > 1)
Packit Service fdd496
    {
Packit Service fdd496
      if (dfa->is_utf8)
Packit Service fdd496
	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  int i, j, ch;
Packit Service fdd496
Packit Service fdd496
	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
Packit Service fdd496
	  if (BE (dfa->sb_char == NULL, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
Packit Service fdd496
	  /* Set the bits corresponding to single byte chars.  */
Packit Service fdd496
	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
Packit Service fdd496
	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
Packit Service fdd496
	      {
Packit Service fdd496
		wint_t wch = __btowc (ch);
Packit Service fdd496
		if (wch != WEOF)
Packit Service fdd496
		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
Packit Service fdd496
# ifndef _LIBC
Packit Service fdd496
		if (isascii (ch) && wch != ch)
Packit Service fdd496
		  dfa->map_notascii = 1;
Packit Service fdd496
# endif
Packit Service fdd496
	      }
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
Packit Service fdd496
    return REG_ESPACE;
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Initialize WORD_CHAR table, which indicate which character is
Packit Service fdd496
   "word".  In this case "word" means that it is the word construction
Packit Service fdd496
   character used by some operators like "\<", "\>", etc.  */
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
internal_function
Packit Service fdd496
init_word_char (re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  int i = 0;
Packit Service fdd496
  int j;
Packit Service fdd496
  int ch = 0;
Packit Service fdd496
  dfa->word_ops_used = 1;
Packit Service fdd496
  if (BE (dfa->map_notascii == 0, 1))
Packit Service fdd496
    {
Packit Service fdd496
      bitset_word_t bits0 = 0x00000000;
Packit Service fdd496
      bitset_word_t bits1 = 0x03ff0000;
Packit Service fdd496
      bitset_word_t bits2 = 0x87fffffe;
Packit Service fdd496
      bitset_word_t bits3 = 0x07fffffe;
Packit Service fdd496
      if (BITSET_WORD_BITS == 64)
Packit Service fdd496
	{
Packit Service fdd496
	  dfa->word_char[0] = bits1 << 31 << 1 | bits0;
Packit Service fdd496
	  dfa->word_char[1] = bits3 << 31 << 1 | bits2;
Packit Service fdd496
	  i = 2;
Packit Service fdd496
	}
Packit Service fdd496
      else if (BITSET_WORD_BITS == 32)
Packit Service fdd496
	{
Packit Service fdd496
	  dfa->word_char[0] = bits0;
Packit Service fdd496
	  dfa->word_char[1] = bits1;
Packit Service fdd496
	  dfa->word_char[2] = bits2;
Packit Service fdd496
	  dfa->word_char[3] = bits3;
Packit Service fdd496
	  i = 4;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
        goto general_case;
Packit Service fdd496
      ch = 128;
Packit Service fdd496
Packit Service fdd496
      if (BE (dfa->is_utf8, 1))
Packit Service fdd496
	{
Packit Service fdd496
	  memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
Packit Service fdd496
	  return;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
 general_case:
Packit Service fdd496
  for (; i < BITSET_WORDS; ++i)
Packit Service fdd496
    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
Packit Service fdd496
      if (isalnum (ch) || ch == '_')
Packit Service fdd496
	dfa->word_char[i] |= (bitset_word_t) 1 << j;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Free the work area which are only used while compiling.  */
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
free_workarea_compile (regex_t *preg)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  bin_tree_storage_t *storage, *next;
Packit Service fdd496
  for (storage = dfa->str_tree_storage; storage; storage = next)
Packit Service fdd496
    {
Packit Service fdd496
      next = storage->next;
Packit Service fdd496
      re_free (storage);
Packit Service fdd496
    }
Packit Service fdd496
  dfa->str_tree_storage = NULL;
Packit Service fdd496
  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
Packit Service fdd496
  dfa->str_tree = NULL;
Packit Service fdd496
  re_free (dfa->org_indices);
Packit Service fdd496
  dfa->org_indices = NULL;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Create initial states for all contexts.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
create_initial_state (re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  Idx first, i;
Packit Service fdd496
  reg_errcode_t err;
Packit Service fdd496
  re_node_set init_nodes;
Packit Service fdd496
Packit Service fdd496
  /* Initial states have the epsilon closure of the node which is
Packit Service fdd496
     the first node of the regular expression.  */
Packit Service fdd496
  first = dfa->str_tree->first->node_idx;
Packit Service fdd496
  dfa->init_node = first;
Packit Service fdd496
  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
Packit Service fdd496
  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
    return err;
Packit Service fdd496
Packit Service fdd496
  /* The back-references which are in initial states can epsilon transit,
Packit Service fdd496
     since in this case all of the subexpressions can be null.
Packit Service fdd496
     Then we add epsilon closures of the nodes which are the next nodes of
Packit Service fdd496
     the back-references.  */
Packit Service fdd496
  if (dfa->nbackref > 0)
Packit Service fdd496
    for (i = 0; i < init_nodes.nelem; ++i)
Packit Service fdd496
      {
Packit Service fdd496
	Idx node_idx = init_nodes.elems[i];
Packit Service fdd496
	re_token_type_t type = dfa->nodes[node_idx].type;
Packit Service fdd496
Packit Service fdd496
	Idx clexp_idx;
Packit Service fdd496
	if (type != OP_BACK_REF)
Packit Service fdd496
	  continue;
Packit Service fdd496
	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
Packit Service fdd496
	  {
Packit Service fdd496
	    re_token_t *clexp_node;
Packit Service fdd496
	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
Packit Service fdd496
	    if (clexp_node->type == OP_CLOSE_SUBEXP
Packit Service fdd496
		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
Packit Service fdd496
	      break;
Packit Service fdd496
	  }
Packit Service fdd496
	if (clexp_idx == init_nodes.nelem)
Packit Service fdd496
	  continue;
Packit Service fdd496
Packit Service fdd496
	if (type == OP_BACK_REF)
Packit Service fdd496
	  {
Packit Service fdd496
	    Idx dest_idx = dfa->edests[node_idx].elems[0];
Packit Service fdd496
	    if (!re_node_set_contains (&init_nodes, dest_idx))
Packit Service fdd496
	      {
Packit Service fdd496
		reg_errcode_t merge_err
Packit Service fdd496
                  = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
Packit Service fdd496
		if (merge_err != REG_NOERROR)
Packit Service fdd496
		  return merge_err;
Packit Service fdd496
		i = 0;
Packit Service fdd496
	      }
Packit Service fdd496
	  }
Packit Service fdd496
      }
Packit Service fdd496
Packit Service fdd496
  /* It must be the first time to invoke acquire_state.  */
Packit Service fdd496
  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
Packit Service fdd496
  /* We don't check ERR here, since the initial state must not be NULL.  */
Packit Service fdd496
  if (BE (dfa->init_state == NULL, 0))
Packit Service fdd496
    return err;
Packit Service fdd496
  if (dfa->init_state->has_constraint)
Packit Service fdd496
    {
Packit Service fdd496
      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
Packit Service fdd496
						       CONTEXT_WORD);
Packit Service fdd496
      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
Packit Service fdd496
						     CONTEXT_NEWLINE);
Packit Service fdd496
      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
Packit Service fdd496
							 &init_nodes,
Packit Service fdd496
							 CONTEXT_NEWLINE
Packit Service fdd496
							 | CONTEXT_BEGBUF);
Packit Service fdd496
      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
Packit Service fdd496
	      || dfa->init_state_begbuf == NULL, 0))
Packit Service fdd496
	return err;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    dfa->init_state_word = dfa->init_state_nl
Packit Service fdd496
      = dfa->init_state_begbuf = dfa->init_state;
Packit Service fdd496
Packit Service fdd496
  re_node_set_free (&init_nodes);
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496

Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
/* If it is possible to do searching in single byte encoding instead of UTF-8
Packit Service fdd496
   to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
Packit Service fdd496
   DFA nodes where needed.  */
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
optimize_utf8 (re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  Idx node;
Packit Service fdd496
  int i;
Packit Service fdd496
  bool mb_chars = false;
Packit Service fdd496
  bool has_period = false;
Packit Service fdd496
Packit Service fdd496
  for (node = 0; node < dfa->nodes_len; ++node)
Packit Service fdd496
    switch (dfa->nodes[node].type)
Packit Service fdd496
      {
Packit Service fdd496
      case CHARACTER:
Packit Service fdd496
	if (dfa->nodes[node].opr.c >= ASCII_CHARS)
Packit Service fdd496
	  mb_chars = true;
Packit Service fdd496
	break;
Packit Service fdd496
      case ANCHOR:
Packit Service fdd496
	switch (dfa->nodes[node].opr.ctx_type)
Packit Service fdd496
	  {
Packit Service fdd496
	  case LINE_FIRST:
Packit Service fdd496
	  case LINE_LAST:
Packit Service fdd496
	  case BUF_FIRST:
Packit Service fdd496
	  case BUF_LAST:
Packit Service fdd496
	    break;
Packit Service fdd496
	  default:
Packit Service fdd496
	    /* Word anchors etc. cannot be handled.  It's okay to test
Packit Service fdd496
	       opr.ctx_type since constraints (for all DFA nodes) are
Packit Service fdd496
	       created by ORing one or more opr.ctx_type values.  */
Packit Service fdd496
	    return;
Packit Service fdd496
	  }
Packit Service fdd496
	break;
Packit Service fdd496
      case OP_PERIOD:
Packit Service fdd496
	has_period = true;
Packit Service fdd496
	break;
Packit Service fdd496
      case OP_BACK_REF:
Packit Service fdd496
      case OP_ALT:
Packit Service fdd496
      case END_OF_RE:
Packit Service fdd496
      case OP_DUP_ASTERISK:
Packit Service fdd496
      case OP_OPEN_SUBEXP:
Packit Service fdd496
      case OP_CLOSE_SUBEXP:
Packit Service fdd496
	break;
Packit Service fdd496
      case COMPLEX_BRACKET:
Packit Service fdd496
	return;
Packit Service fdd496
      case SIMPLE_BRACKET:
Packit Service fdd496
	/* Just double check.  */
Packit Service fdd496
	{
Packit Service fdd496
	  int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
Packit Service fdd496
			? 0
Packit Service fdd496
			: BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
Packit Service fdd496
	  for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
Packit Service fdd496
	    {
Packit Service fdd496
	      if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
Packit Service fdd496
		return;
Packit Service fdd496
	      rshift = 0;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
	break;
Packit Service fdd496
      default:
Packit Service fdd496
	abort ();
Packit Service fdd496
      }
Packit Service fdd496
Packit Service fdd496
  if (mb_chars || has_period)
Packit Service fdd496
    for (node = 0; node < dfa->nodes_len; ++node)
Packit Service fdd496
      {
Packit Service fdd496
	if (dfa->nodes[node].type == CHARACTER
Packit Service fdd496
	    && dfa->nodes[node].opr.c >= ASCII_CHARS)
Packit Service fdd496
	  dfa->nodes[node].mb_partial = 0;
Packit Service fdd496
	else if (dfa->nodes[node].type == OP_PERIOD)
Packit Service fdd496
	  dfa->nodes[node].type = OP_UTF8_PERIOD;
Packit Service fdd496
      }
Packit Service fdd496
Packit Service fdd496
  /* The search can be in single byte locale.  */
Packit Service fdd496
  dfa->mb_cur_max = 1;
Packit Service fdd496
  dfa->is_utf8 = 0;
Packit Service fdd496
  dfa->has_mb_node = dfa->nbackref > 0 || has_period;
Packit Service fdd496
}
Packit Service fdd496
#endif
Packit Service fdd496

Packit Service fdd496
/* Analyze the structure tree, and calculate "first", "next", "edest",
Packit Service fdd496
   "eclosure", and "inveclosure".  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
analyze (regex_t *preg)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  reg_errcode_t ret;
Packit Service fdd496
Packit Service fdd496
  /* Allocate arrays.  */
Packit Service fdd496
  dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
Packit Service fdd496
  dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
Packit Service fdd496
  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
Packit Service fdd496
  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
Packit Service fdd496
  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
Packit Service fdd496
	  || dfa->eclosures == NULL, 0))
Packit Service fdd496
    return REG_ESPACE;
Packit Service fdd496
Packit Service fdd496
  dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
Packit Service fdd496
  if (dfa->subexp_map != NULL)
Packit Service fdd496
    {
Packit Service fdd496
      Idx i;
Packit Service fdd496
      for (i = 0; i < preg->re_nsub; i++)
Packit Service fdd496
	dfa->subexp_map[i] = i;
Packit Service fdd496
      preorder (dfa->str_tree, optimize_subexps, dfa);
Packit Service fdd496
      for (i = 0; i < preg->re_nsub; i++)
Packit Service fdd496
	if (dfa->subexp_map[i] != i)
Packit Service fdd496
	  break;
Packit Service fdd496
      if (i == preg->re_nsub)
Packit Service fdd496
	{
Packit Service fdd496
	  free (dfa->subexp_map);
Packit Service fdd496
	  dfa->subexp_map = NULL;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  ret = postorder (dfa->str_tree, lower_subexps, preg);
Packit Service fdd496
  if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
    return ret;
Packit Service fdd496
  ret = postorder (dfa->str_tree, calc_first, dfa);
Packit Service fdd496
  if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
    return ret;
Packit Service fdd496
  preorder (dfa->str_tree, calc_next, dfa);
Packit Service fdd496
  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
Packit Service fdd496
  if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
    return ret;
Packit Service fdd496
  ret = calc_eclosure (dfa);
Packit Service fdd496
  if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
    return ret;
Packit Service fdd496
Packit Service fdd496
  /* We only need this during the prune_impossible_nodes pass in regexec.c;
Packit Service fdd496
     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
Packit Service fdd496
  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
Packit Service fdd496
      || dfa->nbackref)
Packit Service fdd496
    {
Packit Service fdd496
      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
Packit Service fdd496
      if (BE (dfa->inveclosures == NULL, 0))
Packit Service fdd496
	return REG_ESPACE;
Packit Service fdd496
      ret = calc_inveclosure (dfa);
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return ret;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Our parse trees are very unbalanced, so we cannot use a stack to
Packit Service fdd496
   implement parse tree visits.  Instead, we use parent pointers and
Packit Service fdd496
   some hairy code in these two functions.  */
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
Packit Service fdd496
	   void *extra)
Packit Service fdd496
{
Packit Service fdd496
  bin_tree_t *node, *prev;
Packit Service fdd496
Packit Service fdd496
  for (node = root; ; )
Packit Service fdd496
    {
Packit Service fdd496
      /* Descend down the tree, preferably to the left (or to the right
Packit Service fdd496
	 if that's the only child).  */
Packit Service fdd496
      while (node->left || node->right)
Packit Service fdd496
	if (node->left)
Packit Service fdd496
	  node = node->left;
Packit Service fdd496
	else
Packit Service fdd496
	  node = node->right;
Packit Service fdd496
Packit Service fdd496
      do
Packit Service fdd496
	{
Packit Service fdd496
	  reg_errcode_t err = fn (extra, node);
Packit Service fdd496
	  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
	    return err;
Packit Service fdd496
	  if (node->parent == NULL)
Packit Service fdd496
	    return REG_NOERROR;
Packit Service fdd496
	  prev = node;
Packit Service fdd496
	  node = node->parent;
Packit Service fdd496
	}
Packit Service fdd496
      /* Go up while we have a node that is reached from the right.  */
Packit Service fdd496
      while (node->right == prev || node->right == NULL);
Packit Service fdd496
      node = node->right;
Packit Service fdd496
    }
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
Packit Service fdd496
	  void *extra)
Packit Service fdd496
{
Packit Service fdd496
  bin_tree_t *node;
Packit Service fdd496
Packit Service fdd496
  for (node = root; ; )
Packit Service fdd496
    {
Packit Service fdd496
      reg_errcode_t err = fn (extra, node);
Packit Service fdd496
      if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
	return err;
Packit Service fdd496
Packit Service fdd496
      /* Go to the left node, or up and to the right.  */
Packit Service fdd496
      if (node->left)
Packit Service fdd496
	node = node->left;
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  bin_tree_t *prev = NULL;
Packit Service fdd496
	  while (node->right == prev || node->right == NULL)
Packit Service fdd496
	    {
Packit Service fdd496
	      prev = node;
Packit Service fdd496
	      node = node->parent;
Packit Service fdd496
	      if (!node)
Packit Service fdd496
		return REG_NOERROR;
Packit Service fdd496
	    }
Packit Service fdd496
	  node = node->right;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
Packit Service fdd496
   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
Packit Service fdd496
   backreferences as well.  Requires a preorder visit.  */
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
optimize_subexps (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = (re_dfa_t *) extra;
Packit Service fdd496
Packit Service fdd496
  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
Packit Service fdd496
    {
Packit Service fdd496
      int idx = node->token.opr.idx;
Packit Service fdd496
      node->token.opr.idx = dfa->subexp_map[idx];
Packit Service fdd496
      dfa->used_bkref_map |= 1 << node->token.opr.idx;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  else if (node->token.type == SUBEXP
Packit Service fdd496
	   && node->left && node->left->token.type == SUBEXP)
Packit Service fdd496
    {
Packit Service fdd496
      Idx other_idx = node->left->token.opr.idx;
Packit Service fdd496
Packit Service fdd496
      node->left = node->left->left;
Packit Service fdd496
      if (node->left)
Packit Service fdd496
	node->left->parent = node;
Packit Service fdd496
Packit Service fdd496
      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
Packit Service fdd496
      if (other_idx < BITSET_WORD_BITS)
Packit Service fdd496
	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
Packit Service fdd496
   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
lower_subexps (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  regex_t *preg = (regex_t *) extra;
Packit Service fdd496
  reg_errcode_t err = REG_NOERROR;
Packit Service fdd496
Packit Service fdd496
  if (node->left && node->left->token.type == SUBEXP)
Packit Service fdd496
    {
Packit Service fdd496
      node->left = lower_subexp (&err, preg, node->left);
Packit Service fdd496
      if (node->left)
Packit Service fdd496
	node->left->parent = node;
Packit Service fdd496
    }
Packit Service fdd496
  if (node->right && node->right->token.type == SUBEXP)
Packit Service fdd496
    {
Packit Service fdd496
      node->right = lower_subexp (&err, preg, node->right);
Packit Service fdd496
      if (node->right)
Packit Service fdd496
	node->right->parent = node;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return err;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  bin_tree_t *body = node->left;
Packit Service fdd496
  bin_tree_t *op, *cls, *tree1, *tree;
Packit Service fdd496
Packit Service fdd496
  if (preg->no_sub
Packit Service fdd496
      /* We do not optimize empty subexpressions, because otherwise we may
Packit Service fdd496
	 have bad CONCAT nodes with NULL children.  This is obviously not
Packit Service fdd496
	 very common, so we do not lose much.  An example that triggers
Packit Service fdd496
	 this case is the sed "script" /\(\)/x.  */
Packit Service fdd496
      && node->left != NULL
Packit Service fdd496
      && (node->token.opr.idx >= BITSET_WORD_BITS
Packit Service fdd496
	  || !(dfa->used_bkref_map
Packit Service fdd496
	       & ((bitset_word_t) 1 << node->token.opr.idx))))
Packit Service fdd496
    return node->left;
Packit Service fdd496
Packit Service fdd496
  /* Convert the SUBEXP node to the concatenation of an
Packit Service fdd496
     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
Packit Service fdd496
  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
Packit Service fdd496
  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
Packit Service fdd496
  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
Packit Service fdd496
  tree = create_tree (dfa, op, tree1, CONCAT);
Packit Service fdd496
  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
Packit Service fdd496
    {
Packit Service fdd496
      *err = REG_ESPACE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
Packit Service fdd496
  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
Packit Service fdd496
  return tree;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
Packit Service fdd496
   nodes.  Requires a postorder visit.  */
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
calc_first (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = (re_dfa_t *) extra;
Packit Service fdd496
  if (node->token.type == CONCAT)
Packit Service fdd496
    {
Packit Service fdd496
      node->first = node->left->first;
Packit Service fdd496
      node->node_idx = node->left->node_idx;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      node->first = node;
Packit Service fdd496
      node->node_idx = re_dfa_add_node (dfa, node->token);
Packit Service fdd496
      if (BE (node->node_idx == -1, 0))
Packit Service fdd496
	return REG_ESPACE;
Packit Service fdd496
      if (node->token.type == ANCHOR)
Packit Service fdd496
	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
Packit Service fdd496
    }
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
calc_next (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  switch (node->token.type)
Packit Service fdd496
    {
Packit Service fdd496
    case OP_DUP_ASTERISK:
Packit Service fdd496
      node->left->next = node;
Packit Service fdd496
      break;
Packit Service fdd496
    case CONCAT:
Packit Service fdd496
      node->left->next = node->right->first;
Packit Service fdd496
      node->right->next = node->next;
Packit Service fdd496
      break;
Packit Service fdd496
    default:
Packit Service fdd496
      if (node->left)
Packit Service fdd496
	node->left->next = node->next;
Packit Service fdd496
      if (node->right)
Packit Service fdd496
	node->right->next = node->next;
Packit Service fdd496
      break;
Packit Service fdd496
    }
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
link_nfa_nodes (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = (re_dfa_t *) extra;
Packit Service fdd496
  Idx idx = node->node_idx;
Packit Service fdd496
  reg_errcode_t err = REG_NOERROR;
Packit Service fdd496
Packit Service fdd496
  switch (node->token.type)
Packit Service fdd496
    {
Packit Service fdd496
    case CONCAT:
Packit Service fdd496
      break;
Packit Service fdd496
Packit Service fdd496
    case END_OF_RE:
Packit Service fdd496
      assert (node->next == NULL);
Packit Service fdd496
      break;
Packit Service fdd496
Packit Service fdd496
    case OP_DUP_ASTERISK:
Packit Service fdd496
    case OP_ALT:
Packit Service fdd496
      {
Packit Service fdd496
	Idx left, right;
Packit Service fdd496
	dfa->has_plural_match = 1;
Packit Service fdd496
	if (node->left != NULL)
Packit Service fdd496
	  left = node->left->first->node_idx;
Packit Service fdd496
	else
Packit Service fdd496
	  left = node->next->node_idx;
Packit Service fdd496
	if (node->right != NULL)
Packit Service fdd496
	  right = node->right->first->node_idx;
Packit Service fdd496
	else
Packit Service fdd496
	  right = node->next->node_idx;
Packit Service fdd496
	assert (left > -1);
Packit Service fdd496
	assert (right > -1);
Packit Service fdd496
	err = re_node_set_init_2 (dfa->edests + idx, left, right);
Packit Service fdd496
      }
Packit Service fdd496
      break;
Packit Service fdd496
Packit Service fdd496
    case ANCHOR:
Packit Service fdd496
    case OP_OPEN_SUBEXP:
Packit Service fdd496
    case OP_CLOSE_SUBEXP:
Packit Service fdd496
      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
Packit Service fdd496
      break;
Packit Service fdd496
Packit Service fdd496
    case OP_BACK_REF:
Packit Service fdd496
      dfa->nexts[idx] = node->next->node_idx;
Packit Service fdd496
      if (node->token.type == OP_BACK_REF)
Packit Service fdd496
	err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
Packit Service fdd496
      break;
Packit Service fdd496
Packit Service fdd496
    default:
Packit Service fdd496
      assert (!IS_EPSILON_NODE (node->token.type));
Packit Service fdd496
      dfa->nexts[idx] = node->next->node_idx;
Packit Service fdd496
      break;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return err;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Duplicate the epsilon closure of the node ROOT_NODE.
Packit Service fdd496
   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
Packit Service fdd496
   to their own constraint.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
internal_function
Packit Service fdd496
duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
Packit Service fdd496
			Idx root_node, unsigned int init_constraint)
Packit Service fdd496
{
Packit Service fdd496
  Idx org_node, clone_node;
Packit Service fdd496
  bool ok;
Packit Service fdd496
  unsigned int constraint = init_constraint;
Packit Service fdd496
  for (org_node = top_org_node, clone_node = top_clone_node;;)
Packit Service fdd496
    {
Packit Service fdd496
      Idx org_dest, clone_dest;
Packit Service fdd496
      if (dfa->nodes[org_node].type == OP_BACK_REF)
Packit Service fdd496
	{
Packit Service fdd496
	  /* If the back reference epsilon-transit, its destination must
Packit Service fdd496
	     also have the constraint.  Then duplicate the epsilon closure
Packit Service fdd496
	     of the destination of the back reference, and store it in
Packit Service fdd496
	     edests of the back reference.  */
Packit Service fdd496
	  org_dest = dfa->nexts[org_node];
Packit Service fdd496
	  re_node_set_empty (dfa->edests + clone_node);
Packit Service fdd496
	  clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit Service fdd496
	  if (BE (clone_dest == -1, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	  dfa->nexts[clone_node] = dfa->nexts[org_node];
Packit Service fdd496
	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit Service fdd496
	  if (BE (! ok, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	}
Packit Service fdd496
      else if (dfa->edests[org_node].nelem == 0)
Packit Service fdd496
	{
Packit Service fdd496
	  /* In case of the node can't epsilon-transit, don't duplicate the
Packit Service fdd496
	     destination and store the original destination as the
Packit Service fdd496
	     destination of the node.  */
Packit Service fdd496
	  dfa->nexts[clone_node] = dfa->nexts[org_node];
Packit Service fdd496
	  break;
Packit Service fdd496
	}
Packit Service fdd496
      else if (dfa->edests[org_node].nelem == 1)
Packit Service fdd496
	{
Packit Service fdd496
	  /* In case of the node can epsilon-transit, and it has only one
Packit Service fdd496
	     destination.  */
Packit Service fdd496
	  org_dest = dfa->edests[org_node].elems[0];
Packit Service fdd496
	  re_node_set_empty (dfa->edests + clone_node);
Packit Service fdd496
	  /* If the node is root_node itself, it means the epsilon closure
Packit Service fdd496
	     has a loop.  Then tie it to the destination of the root_node.  */
Packit Service fdd496
	  if (org_node == root_node && clone_node != org_node)
Packit Service fdd496
	    {
Packit Service fdd496
	      ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
Packit Service fdd496
	      if (BE (! ok, 0))
Packit Service fdd496
	        return REG_ESPACE;
Packit Service fdd496
	      break;
Packit Service fdd496
	    }
Packit Service fdd496
	  /* In case the node has another constraint, append it.  */
Packit Service fdd496
	  constraint |= dfa->nodes[org_node].constraint;
Packit Service fdd496
	  clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit Service fdd496
	  if (BE (clone_dest == -1, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit Service fdd496
	  if (BE (! ok, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	}
Packit Service fdd496
      else /* dfa->edests[org_node].nelem == 2 */
Packit Service fdd496
	{
Packit Service fdd496
	  /* In case of the node can epsilon-transit, and it has two
Packit Service fdd496
	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
Packit Service fdd496
	  org_dest = dfa->edests[org_node].elems[0];
Packit Service fdd496
	  re_node_set_empty (dfa->edests + clone_node);
Packit Service fdd496
	  /* Search for a duplicated node which satisfies the constraint.  */
Packit Service fdd496
	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
Packit Service fdd496
	  if (clone_dest == -1)
Packit Service fdd496
	    {
Packit Service fdd496
	      /* There is no such duplicated node, create a new one.  */
Packit Service fdd496
	      reg_errcode_t err;
Packit Service fdd496
	      clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit Service fdd496
	      if (BE (clone_dest == -1, 0))
Packit Service fdd496
		return REG_ESPACE;
Packit Service fdd496
	      ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit Service fdd496
	      if (BE (! ok, 0))
Packit Service fdd496
		return REG_ESPACE;
Packit Service fdd496
	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
Packit Service fdd496
					    root_node, constraint);
Packit Service fdd496
	      if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
		return err;
Packit Service fdd496
	    }
Packit Service fdd496
	  else
Packit Service fdd496
	    {
Packit Service fdd496
	      /* There is a duplicated node which satisfies the constraint,
Packit Service fdd496
		 use it to avoid infinite loop.  */
Packit Service fdd496
	      ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit Service fdd496
	      if (BE (! ok, 0))
Packit Service fdd496
		return REG_ESPACE;
Packit Service fdd496
	    }
Packit Service fdd496
Packit Service fdd496
	  org_dest = dfa->edests[org_node].elems[1];
Packit Service fdd496
	  clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit Service fdd496
	  if (BE (clone_dest == -1, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit Service fdd496
	  if (BE (! ok, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	}
Packit Service fdd496
      org_node = org_dest;
Packit Service fdd496
      clone_node = clone_dest;
Packit Service fdd496
    }
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Search for a node which is duplicated from the node ORG_NODE, and
Packit Service fdd496
   satisfies the constraint CONSTRAINT.  */
Packit Service fdd496
Packit Service fdd496
static Idx
Packit Service fdd496
search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
Packit Service fdd496
			unsigned int constraint)
Packit Service fdd496
{
Packit Service fdd496
  Idx idx;
Packit Service fdd496
  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
Packit Service fdd496
    {
Packit Service fdd496
      if (org_node == dfa->org_indices[idx]
Packit Service fdd496
	  && constraint == dfa->nodes[idx].constraint)
Packit Service fdd496
	return idx; /* Found.  */
Packit Service fdd496
    }
Packit Service fdd496
  return -1; /* Not found.  */
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
Packit Service fdd496
   Return the index of the new node, or -1 if insufficient storage is
Packit Service fdd496
   available.  */
Packit Service fdd496
Packit Service fdd496
static Idx
Packit Service fdd496
duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
Packit Service fdd496
{
Packit Service fdd496
  Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
Packit Service fdd496
  if (BE (dup_idx != -1, 1))
Packit Service fdd496
    {
Packit Service fdd496
      dfa->nodes[dup_idx].constraint = constraint;
Packit Service fdd496
      dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
Packit Service fdd496
      dfa->nodes[dup_idx].duplicated = 1;
Packit Service fdd496
Packit Service fdd496
      /* Store the index of the original node.  */
Packit Service fdd496
      dfa->org_indices[dup_idx] = org_idx;
Packit Service fdd496
    }
Packit Service fdd496
  return dup_idx;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
calc_inveclosure (re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  Idx src, idx;
Packit Service fdd496
  bool ok;
Packit Service fdd496
  for (idx = 0; idx < dfa->nodes_len; ++idx)
Packit Service fdd496
    re_node_set_init_empty (dfa->inveclosures + idx);
Packit Service fdd496
Packit Service fdd496
  for (src = 0; src < dfa->nodes_len; ++src)
Packit Service fdd496
    {
Packit Service fdd496
      Idx *elems = dfa->eclosures[src].elems;
Packit Service fdd496
      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
Packit Service fdd496
	{
Packit Service fdd496
	  ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
Packit Service fdd496
	  if (BE (! ok, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Calculate "eclosure" for all the node in DFA.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
calc_eclosure (re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  Idx node_idx;
Packit Service fdd496
  bool incomplete;
Packit Service fdd496
#ifdef DEBUG
Packit Service fdd496
  assert (dfa->nodes_len > 0);
Packit Service fdd496
#endif
Packit Service fdd496
  incomplete = false;
Packit Service fdd496
  /* For each nodes, calculate epsilon closure.  */
Packit Service fdd496
  for (node_idx = 0; ; ++node_idx)
Packit Service fdd496
    {
Packit Service fdd496
      reg_errcode_t err;
Packit Service fdd496
      re_node_set eclosure_elem;
Packit Service fdd496
      if (node_idx == dfa->nodes_len)
Packit Service fdd496
	{
Packit Service fdd496
	  if (!incomplete)
Packit Service fdd496
	    break;
Packit Service fdd496
	  incomplete = false;
Packit Service fdd496
	  node_idx = 0;
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
#ifdef DEBUG
Packit Service fdd496
      assert (dfa->eclosures[node_idx].nelem != -1);
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
      /* If we have already calculated, skip it.  */
Packit Service fdd496
      if (dfa->eclosures[node_idx].nelem != 0)
Packit Service fdd496
	continue;
Packit Service fdd496
      /* Calculate epsilon closure of 'node_idx'.  */
Packit Service fdd496
      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
Packit Service fdd496
      if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
	return err;
Packit Service fdd496
Packit Service fdd496
      if (dfa->eclosures[node_idx].nelem == 0)
Packit Service fdd496
	{
Packit Service fdd496
	  incomplete = true;
Packit Service fdd496
	  re_node_set_free (&eclosure_elem);
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Calculate epsilon closure of NODE.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
Packit Service fdd496
{
Packit Service fdd496
  reg_errcode_t err;
Packit Service fdd496
  Idx i;
Packit Service fdd496
  re_node_set eclosure;
Packit Service fdd496
  bool ok;
Packit Service fdd496
  bool incomplete = false;
Packit Service fdd496
  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
Packit Service fdd496
  if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
    return err;
Packit Service fdd496
Packit Service fdd496
  /* This indicates that we are calculating this node now.
Packit Service fdd496
     We reference this value to avoid infinite loop.  */
Packit Service fdd496
  dfa->eclosures[node].nelem = -1;
Packit Service fdd496
Packit Service fdd496
  /* If the current node has constraints, duplicate all nodes
Packit Service fdd496
     since they must inherit the constraints.  */
Packit Service fdd496
  if (dfa->nodes[node].constraint
Packit Service fdd496
      && dfa->edests[node].nelem
Packit Service fdd496
      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
Packit Service fdd496
    {
Packit Service fdd496
      err = duplicate_node_closure (dfa, node, node, node,
Packit Service fdd496
				    dfa->nodes[node].constraint);
Packit Service fdd496
      if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
	return err;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Expand each epsilon destination nodes.  */
Packit Service fdd496
  if (IS_EPSILON_NODE(dfa->nodes[node].type))
Packit Service fdd496
    for (i = 0; i < dfa->edests[node].nelem; ++i)
Packit Service fdd496
      {
Packit Service fdd496
	re_node_set eclosure_elem;
Packit Service fdd496
	Idx edest = dfa->edests[node].elems[i];
Packit Service fdd496
	/* If calculating the epsilon closure of 'edest' is in progress,
Packit Service fdd496
	   return intermediate result.  */
Packit Service fdd496
	if (dfa->eclosures[edest].nelem == -1)
Packit Service fdd496
	  {
Packit Service fdd496
	    incomplete = true;
Packit Service fdd496
	    continue;
Packit Service fdd496
	  }
Packit Service fdd496
	/* If we haven't calculated the epsilon closure of 'edest' yet,
Packit Service fdd496
	   calculate now. Otherwise use calculated epsilon closure.  */
Packit Service fdd496
	if (dfa->eclosures[edest].nelem == 0)
Packit Service fdd496
	  {
Packit Service fdd496
	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
Packit Service fdd496
	    if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
	      return err;
Packit Service fdd496
	  }
Packit Service fdd496
	else
Packit Service fdd496
	  eclosure_elem = dfa->eclosures[edest];
Packit Service fdd496
	/* Merge the epsilon closure of 'edest'.  */
Packit Service fdd496
	err = re_node_set_merge (&eclosure, &eclosure_elem);
Packit Service fdd496
	if (BE (err != REG_NOERROR, 0))
Packit Service fdd496
	  return err;
Packit Service fdd496
	/* If the epsilon closure of 'edest' is incomplete,
Packit Service fdd496
	   the epsilon closure of this node is also incomplete.  */
Packit Service fdd496
	if (dfa->eclosures[edest].nelem == 0)
Packit Service fdd496
	  {
Packit Service fdd496
	    incomplete = true;
Packit Service fdd496
	    re_node_set_free (&eclosure_elem);
Packit Service fdd496
	  }
Packit Service fdd496
      }
Packit Service fdd496
Packit Service fdd496
  /* An epsilon closure includes itself.  */
Packit Service fdd496
  ok = re_node_set_insert (&eclosure, node);
Packit Service fdd496
  if (BE (! ok, 0))
Packit Service fdd496
    return REG_ESPACE;
Packit Service fdd496
  if (incomplete && !root)
Packit Service fdd496
    dfa->eclosures[node].nelem = 0;
Packit Service fdd496
  else
Packit Service fdd496
    dfa->eclosures[node] = eclosure;
Packit Service fdd496
  *new_set = eclosure;
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496

Packit Service fdd496
/* Functions for token which are used in the parser.  */
Packit Service fdd496
Packit Service fdd496
/* Fetch a token from INPUT.
Packit Service fdd496
   We must not use this function inside bracket expressions.  */
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
internal_function
Packit Service fdd496
fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
Packit Service fdd496
{
Packit Service fdd496
  re_string_skip_bytes (input, peek_token (result, input, syntax));
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Peek a token from INPUT, and return the length of the token.
Packit Service fdd496
   We must not use this function inside bracket expressions.  */
Packit Service fdd496
Packit Service fdd496
static int
Packit Service fdd496
internal_function
Packit Service fdd496
peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
Packit Service fdd496
{
Packit Service fdd496
  unsigned char c;
Packit Service fdd496
Packit Service fdd496
  if (re_string_eoi (input))
Packit Service fdd496
    {
Packit Service fdd496
      token->type = END_OF_RE;
Packit Service fdd496
      return 0;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  c = re_string_peek_byte (input, 0);
Packit Service fdd496
  token->opr.c = c;
Packit Service fdd496
Packit Service fdd496
  token->word_char = 0;
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  token->mb_partial = 0;
Packit Service fdd496
  if (input->mb_cur_max > 1 &&
Packit Service fdd496
      !re_string_first_byte (input, re_string_cur_idx (input)))
Packit Service fdd496
    {
Packit Service fdd496
      token->type = CHARACTER;
Packit Service fdd496
      token->mb_partial = 1;
Packit Service fdd496
      return 1;
Packit Service fdd496
    }
Packit Service fdd496
#endif
Packit Service fdd496
  if (c == '\\')
Packit Service fdd496
    {
Packit Service fdd496
      unsigned char c2;
Packit Service fdd496
      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
Packit Service fdd496
	{
Packit Service fdd496
	  token->type = BACK_SLASH;
Packit Service fdd496
	  return 1;
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
      c2 = re_string_peek_byte_case (input, 1);
Packit Service fdd496
      token->opr.c = c2;
Packit Service fdd496
      token->type = CHARACTER;
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      if (input->mb_cur_max > 1)
Packit Service fdd496
	{
Packit Service fdd496
	  wint_t wc = re_string_wchar_at (input,
Packit Service fdd496
					  re_string_cur_idx (input) + 1);
Packit Service fdd496
	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
#endif
Packit Service fdd496
	token->word_char = IS_WORD_CHAR (c2) != 0;
Packit Service fdd496
Packit Service fdd496
      switch (c2)
Packit Service fdd496
	{
Packit Service fdd496
	case '|':
Packit Service fdd496
	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
Packit Service fdd496
	    token->type = OP_ALT;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '1': case '2': case '3': case '4': case '5':
Packit Service fdd496
	case '6': case '7': case '8': case '9':
Packit Service fdd496
	  if (!(syntax & RE_NO_BK_REFS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = OP_BACK_REF;
Packit Service fdd496
	      token->opr.idx = c2 - '1';
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case '<':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = ANCHOR;
Packit Service fdd496
	      token->opr.ctx_type = WORD_FIRST;
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case '>':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = ANCHOR;
Packit Service fdd496
	      token->opr.ctx_type = WORD_LAST;
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case 'b':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = ANCHOR;
Packit Service fdd496
	      token->opr.ctx_type = WORD_DELIM;
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case 'B':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = ANCHOR;
Packit Service fdd496
	      token->opr.ctx_type = NOT_WORD_DELIM;
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case 'w':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    token->type = OP_WORD;
Packit Service fdd496
	  break;
Packit Service fdd496
	case 'W':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    token->type = OP_NOTWORD;
Packit Service fdd496
	  break;
Packit Service fdd496
	case 's':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    token->type = OP_SPACE;
Packit Service fdd496
	  break;
Packit Service fdd496
	case 'S':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    token->type = OP_NOTSPACE;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '`':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = ANCHOR;
Packit Service fdd496
	      token->opr.ctx_type = BUF_FIRST;
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case '\'':
Packit Service fdd496
	  if (!(syntax & RE_NO_GNU_OPS))
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = ANCHOR;
Packit Service fdd496
	      token->opr.ctx_type = BUF_LAST;
Packit Service fdd496
	    }
Packit Service fdd496
	  break;
Packit Service fdd496
	case '(':
Packit Service fdd496
	  if (!(syntax & RE_NO_BK_PARENS))
Packit Service fdd496
	    token->type = OP_OPEN_SUBEXP;
Packit Service fdd496
	  break;
Packit Service fdd496
	case ')':
Packit Service fdd496
	  if (!(syntax & RE_NO_BK_PARENS))
Packit Service fdd496
	    token->type = OP_CLOSE_SUBEXP;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '+':
Packit Service fdd496
	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
Packit Service fdd496
	    token->type = OP_DUP_PLUS;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '?':
Packit Service fdd496
	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
Packit Service fdd496
	    token->type = OP_DUP_QUESTION;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '{':
Packit Service fdd496
	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
Packit Service fdd496
	    token->type = OP_OPEN_DUP_NUM;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '}':
Packit Service fdd496
	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
Packit Service fdd496
	    token->type = OP_CLOSE_DUP_NUM;
Packit Service fdd496
	  break;
Packit Service fdd496
	default:
Packit Service fdd496
	  break;
Packit Service fdd496
	}
Packit Service fdd496
      return 2;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  token->type = CHARACTER;
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (input->mb_cur_max > 1)
Packit Service fdd496
    {
Packit Service fdd496
      wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
Packit Service fdd496
      token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
#endif
Packit Service fdd496
    token->word_char = IS_WORD_CHAR (token->opr.c);
Packit Service fdd496
Packit Service fdd496
  switch (c)
Packit Service fdd496
    {
Packit Service fdd496
    case '\n':
Packit Service fdd496
      if (syntax & RE_NEWLINE_ALT)
Packit Service fdd496
	token->type = OP_ALT;
Packit Service fdd496
      break;
Packit Service fdd496
    case '|':
Packit Service fdd496
      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
Packit Service fdd496
	token->type = OP_ALT;
Packit Service fdd496
      break;
Packit Service fdd496
    case '*':
Packit Service fdd496
      token->type = OP_DUP_ASTERISK;
Packit Service fdd496
      break;
Packit Service fdd496
    case '+':
Packit Service fdd496
      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
Packit Service fdd496
	token->type = OP_DUP_PLUS;
Packit Service fdd496
      break;
Packit Service fdd496
    case '?':
Packit Service fdd496
      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
Packit Service fdd496
	token->type = OP_DUP_QUESTION;
Packit Service fdd496
      break;
Packit Service fdd496
    case '{':
Packit Service fdd496
      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
Packit Service fdd496
	token->type = OP_OPEN_DUP_NUM;
Packit Service fdd496
      break;
Packit Service fdd496
    case '}':
Packit Service fdd496
      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
Packit Service fdd496
	token->type = OP_CLOSE_DUP_NUM;
Packit Service fdd496
      break;
Packit Service fdd496
    case '(':
Packit Service fdd496
      if (syntax & RE_NO_BK_PARENS)
Packit Service fdd496
	token->type = OP_OPEN_SUBEXP;
Packit Service fdd496
      break;
Packit Service fdd496
    case ')':
Packit Service fdd496
      if (syntax & RE_NO_BK_PARENS)
Packit Service fdd496
	token->type = OP_CLOSE_SUBEXP;
Packit Service fdd496
      break;
Packit Service fdd496
    case '[':
Packit Service fdd496
      token->type = OP_OPEN_BRACKET;
Packit Service fdd496
      break;
Packit Service fdd496
    case '.':
Packit Service fdd496
      token->type = OP_PERIOD;
Packit Service fdd496
      break;
Packit Service fdd496
    case '^':
Packit Service fdd496
      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
Packit Service fdd496
	  re_string_cur_idx (input) != 0)
Packit Service fdd496
	{
Packit Service fdd496
	  char prev = re_string_peek_byte (input, -1);
Packit Service fdd496
	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
Packit Service fdd496
	    break;
Packit Service fdd496
	}
Packit Service fdd496
      token->type = ANCHOR;
Packit Service fdd496
      token->opr.ctx_type = LINE_FIRST;
Packit Service fdd496
      break;
Packit Service fdd496
    case '$':
Packit Service fdd496
      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
Packit Service fdd496
	  re_string_cur_idx (input) + 1 != re_string_length (input))
Packit Service fdd496
	{
Packit Service fdd496
	  re_token_t next;
Packit Service fdd496
	  re_string_skip_bytes (input, 1);
Packit Service fdd496
	  peek_token (&next, input, syntax);
Packit Service fdd496
	  re_string_skip_bytes (input, -1);
Packit Service fdd496
	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
Packit Service fdd496
	    break;
Packit Service fdd496
	}
Packit Service fdd496
      token->type = ANCHOR;
Packit Service fdd496
      token->opr.ctx_type = LINE_LAST;
Packit Service fdd496
      break;
Packit Service fdd496
    default:
Packit Service fdd496
      break;
Packit Service fdd496
    }
Packit Service fdd496
  return 1;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Peek a token from INPUT, and return the length of the token.
Packit Service fdd496
   We must not use this function out of bracket expressions.  */
Packit Service fdd496
Packit Service fdd496
static int
Packit Service fdd496
internal_function
Packit Service fdd496
peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
Packit Service fdd496
{
Packit Service fdd496
  unsigned char c;
Packit Service fdd496
  if (re_string_eoi (input))
Packit Service fdd496
    {
Packit Service fdd496
      token->type = END_OF_RE;
Packit Service fdd496
      return 0;
Packit Service fdd496
    }
Packit Service fdd496
  c = re_string_peek_byte (input, 0);
Packit Service fdd496
  token->opr.c = c;
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (input->mb_cur_max > 1 &&
Packit Service fdd496
      !re_string_first_byte (input, re_string_cur_idx (input)))
Packit Service fdd496
    {
Packit Service fdd496
      token->type = CHARACTER;
Packit Service fdd496
      return 1;
Packit Service fdd496
    }
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
Packit Service fdd496
  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
Packit Service fdd496
      && re_string_cur_idx (input) + 1 < re_string_length (input))
Packit Service fdd496
    {
Packit Service fdd496
      /* In this case, '\' escape a character.  */
Packit Service fdd496
      unsigned char c2;
Packit Service fdd496
      re_string_skip_bytes (input, 1);
Packit Service fdd496
      c2 = re_string_peek_byte (input, 0);
Packit Service fdd496
      token->opr.c = c2;
Packit Service fdd496
      token->type = CHARACTER;
Packit Service fdd496
      return 1;
Packit Service fdd496
    }
Packit Service fdd496
  if (c == '[') /* '[' is a special char in a bracket exps.  */
Packit Service fdd496
    {
Packit Service fdd496
      unsigned char c2;
Packit Service fdd496
      int token_len;
Packit Service fdd496
      if (re_string_cur_idx (input) + 1 < re_string_length (input))
Packit Service fdd496
	c2 = re_string_peek_byte (input, 1);
Packit Service fdd496
      else
Packit Service fdd496
	c2 = 0;
Packit Service fdd496
      token->opr.c = c2;
Packit Service fdd496
      token_len = 2;
Packit Service fdd496
      switch (c2)
Packit Service fdd496
	{
Packit Service fdd496
	case '.':
Packit Service fdd496
	  token->type = OP_OPEN_COLL_ELEM;
Packit Service fdd496
	  break;
Packit Service fdd496
	case '=':
Packit Service fdd496
	  token->type = OP_OPEN_EQUIV_CLASS;
Packit Service fdd496
	  break;
Packit Service fdd496
	case ':':
Packit Service fdd496
	  if (syntax & RE_CHAR_CLASSES)
Packit Service fdd496
	    {
Packit Service fdd496
	      token->type = OP_OPEN_CHAR_CLASS;
Packit Service fdd496
	      break;
Packit Service fdd496
	    }
Packit Service fdd496
	  /* else fall through.  */
Packit Service fdd496
	default:
Packit Service fdd496
	  token->type = CHARACTER;
Packit Service fdd496
	  token->opr.c = c;
Packit Service fdd496
	  token_len = 1;
Packit Service fdd496
	  break;
Packit Service fdd496
	}
Packit Service fdd496
      return token_len;
Packit Service fdd496
    }
Packit Service fdd496
  switch (c)
Packit Service fdd496
    {
Packit Service fdd496
    case '-':
Packit Service fdd496
      token->type = OP_CHARSET_RANGE;
Packit Service fdd496
      break;
Packit Service fdd496
    case ']':
Packit Service fdd496
      token->type = OP_CLOSE_BRACKET;
Packit Service fdd496
      break;
Packit Service fdd496
    case '^':
Packit Service fdd496
      token->type = OP_NON_MATCH_LIST;
Packit Service fdd496
      break;
Packit Service fdd496
    default:
Packit Service fdd496
      token->type = CHARACTER;
Packit Service fdd496
    }
Packit Service fdd496
  return 1;
Packit Service fdd496
}
Packit Service fdd496

Packit Service fdd496
/* Functions for parser.  */
Packit Service fdd496
Packit Service fdd496
/* Entry point of the parser.
Packit Service fdd496
   Parse the regular expression REGEXP and return the structure tree.
Packit Service fdd496
   If an error occurs, ERR is set by error code, and return NULL.
Packit Service fdd496
   This function build the following tree, from regular expression <reg_exp>:
Packit Service fdd496
	   CAT
Packit Service fdd496
	   / \
Packit Service fdd496
	  /   \
Packit Service fdd496
   <reg_exp>  EOR
Packit Service fdd496
Packit Service fdd496
   CAT means concatenation.
Packit Service fdd496
   EOR means end of regular expression.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
Packit Service fdd496
       reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  bin_tree_t *tree, *eor, *root;
Packit Service fdd496
  re_token_t current_token;
Packit Service fdd496
  dfa->syntax = syntax;
Packit Service fdd496
  fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
Packit Service fdd496
  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
Packit Service fdd496
  if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
    return NULL;
Packit Service fdd496
  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
Packit Service fdd496
  if (tree != NULL)
Packit Service fdd496
    root = create_tree (dfa, tree, eor, CONCAT);
Packit Service fdd496
  else
Packit Service fdd496
    root = eor;
Packit Service fdd496
  if (BE (eor == NULL || root == NULL, 0))
Packit Service fdd496
    {
Packit Service fdd496
      *err = REG_ESPACE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
  return root;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* This function build the following tree, from regular expression
Packit Service fdd496
   <branch1>|<branch2>:
Packit Service fdd496
	   ALT
Packit Service fdd496
	   / \
Packit Service fdd496
	  /   \
Packit Service fdd496
   <branch1> <branch2>
Packit Service fdd496
Packit Service fdd496
   ALT means alternative, which represents the operator '|'.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit Service fdd496
	       reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  bin_tree_t *tree, *branch = NULL;
Packit Service fdd496
  bitset_word_t initial_bkref_map = dfa->completed_bkref_map;
Packit Service fdd496
  tree = parse_branch (regexp, preg, token, syntax, nest, err);
Packit Service fdd496
  if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
    return NULL;
Packit Service fdd496
Packit Service fdd496
  while (token->type == OP_ALT)
Packit Service fdd496
    {
Packit Service fdd496
      fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
Packit Service fdd496
      if (token->type != OP_ALT && token->type != END_OF_RE
Packit Service fdd496
	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
Packit Service fdd496
	{
Packit Service fdd496
	  bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map;
Packit Service fdd496
	  dfa->completed_bkref_map = initial_bkref_map;
Packit Service fdd496
	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
Packit Service fdd496
	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      if (tree != NULL)
Packit Service fdd496
		postorder (tree, free_tree, NULL);
Packit Service fdd496
	      return NULL;
Packit Service fdd496
	    }
Packit Service fdd496
	  dfa->completed_bkref_map |= accumulated_bkref_map;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
	branch = NULL;
Packit Service fdd496
      tree = create_tree (dfa, tree, branch, OP_ALT);
Packit Service fdd496
      if (BE (tree == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESPACE;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
  return tree;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* This function build the following tree, from regular expression
Packit Service fdd496
   <exp1><exp2>:
Packit Service fdd496
	CAT
Packit Service fdd496
	/ \
Packit Service fdd496
       /   \
Packit Service fdd496
   <exp1> <exp2>
Packit Service fdd496
Packit Service fdd496
   CAT means concatenation.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit Service fdd496
	      reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  bin_tree_t *tree, *expr;
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  tree = parse_expression (regexp, preg, token, syntax, nest, err);
Packit Service fdd496
  if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
    return NULL;
Packit Service fdd496
Packit Service fdd496
  while (token->type != OP_ALT && token->type != END_OF_RE
Packit Service fdd496
	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
Packit Service fdd496
    {
Packit Service fdd496
      expr = parse_expression (regexp, preg, token, syntax, nest, err);
Packit Service fdd496
      if (BE (*err != REG_NOERROR && expr == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  if (tree != NULL)
Packit Service fdd496
	    postorder (tree, free_tree, NULL);
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      if (tree != NULL && expr != NULL)
Packit Service fdd496
	{
Packit Service fdd496
	  bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
Packit Service fdd496
	  if (newtree == NULL)
Packit Service fdd496
	    {
Packit Service fdd496
	      postorder (expr, free_tree, NULL);
Packit Service fdd496
	      postorder (tree, free_tree, NULL);
Packit Service fdd496
	      *err = REG_ESPACE;
Packit Service fdd496
	      return NULL;
Packit Service fdd496
	    }
Packit Service fdd496
	  tree = newtree;
Packit Service fdd496
	}
Packit Service fdd496
      else if (tree == NULL)
Packit Service fdd496
	tree = expr;
Packit Service fdd496
      /* Otherwise expr == NULL, we don't need to create new tree.  */
Packit Service fdd496
    }
Packit Service fdd496
  return tree;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* This function build the following tree, from regular expression a*:
Packit Service fdd496
	 *
Packit Service fdd496
	 |
Packit Service fdd496
	 a
Packit Service fdd496
*/
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit Service fdd496
		  reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  bin_tree_t *tree;
Packit Service fdd496
  switch (token->type)
Packit Service fdd496
    {
Packit Service fdd496
    case CHARACTER:
Packit Service fdd496
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
      if (BE (tree == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESPACE;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      if (dfa->mb_cur_max > 1)
Packit Service fdd496
	{
Packit Service fdd496
	  while (!re_string_eoi (regexp)
Packit Service fdd496
		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
Packit Service fdd496
	    {
Packit Service fdd496
	      bin_tree_t *mbc_remain;
Packit Service fdd496
	      fetch_token (token, regexp, syntax);
Packit Service fdd496
	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
Packit Service fdd496
	      if (BE (mbc_remain == NULL || tree == NULL, 0))
Packit Service fdd496
		{
Packit Service fdd496
		  *err = REG_ESPACE;
Packit Service fdd496
		  return NULL;
Packit Service fdd496
		}
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
#endif
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_OPEN_SUBEXP:
Packit Service fdd496
      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
Packit Service fdd496
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
	return NULL;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_OPEN_BRACKET:
Packit Service fdd496
      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
Packit Service fdd496
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
	return NULL;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_BACK_REF:
Packit Service fdd496
      if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESUBREG;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      dfa->used_bkref_map |= 1 << token->opr.idx;
Packit Service fdd496
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
      if (BE (tree == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESPACE;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      ++dfa->nbackref;
Packit Service fdd496
      dfa->has_mb_node = 1;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_OPEN_DUP_NUM:
Packit Service fdd496
      if (syntax & RE_CONTEXT_INVALID_DUP)
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_BADRPT;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      /* FALLTHROUGH */
Packit Service fdd496
    case OP_DUP_ASTERISK:
Packit Service fdd496
    case OP_DUP_PLUS:
Packit Service fdd496
    case OP_DUP_QUESTION:
Packit Service fdd496
      if (syntax & RE_CONTEXT_INVALID_OPS)
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_BADRPT;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      else if (syntax & RE_CONTEXT_INDEP_OPS)
Packit Service fdd496
	{
Packit Service fdd496
	  fetch_token (token, regexp, syntax);
Packit Service fdd496
	  return parse_expression (regexp, preg, token, syntax, nest, err);
Packit Service fdd496
	}
Packit Service fdd496
      /* else fall through  */
Packit Service fdd496
    case OP_CLOSE_SUBEXP:
Packit Service fdd496
      if ((token->type == OP_CLOSE_SUBEXP) &&
Packit Service fdd496
	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ERPAREN;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      /* else fall through  */
Packit Service fdd496
    case OP_CLOSE_DUP_NUM:
Packit Service fdd496
      /* We treat it as a normal character.  */
Packit Service fdd496
Packit Service fdd496
      /* Then we can these characters as normal characters.  */
Packit Service fdd496
      token->type = CHARACTER;
Packit Service fdd496
      /* mb_partial and word_char bits should be initialized already
Packit Service fdd496
	 by peek_token.  */
Packit Service fdd496
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
      if (BE (tree == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESPACE;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      break;
Packit Service fdd496
    case ANCHOR:
Packit Service fdd496
      if ((token->opr.ctx_type
Packit Service fdd496
	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
Packit Service fdd496
	  && dfa->word_ops_used == 0)
Packit Service fdd496
	init_word_char (dfa);
Packit Service fdd496
      if (token->opr.ctx_type == WORD_DELIM
Packit Service fdd496
	  || token->opr.ctx_type == NOT_WORD_DELIM)
Packit Service fdd496
	{
Packit Service fdd496
	  bin_tree_t *tree_first, *tree_last;
Packit Service fdd496
	  if (token->opr.ctx_type == WORD_DELIM)
Packit Service fdd496
	    {
Packit Service fdd496
	      token->opr.ctx_type = WORD_FIRST;
Packit Service fdd496
	      tree_first = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
	      token->opr.ctx_type = WORD_LAST;
Packit Service fdd496
	    }
Packit Service fdd496
	  else
Packit Service fdd496
	    {
Packit Service fdd496
	      token->opr.ctx_type = INSIDE_WORD;
Packit Service fdd496
	      tree_first = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
	      token->opr.ctx_type = INSIDE_NOTWORD;
Packit Service fdd496
	    }
Packit Service fdd496
	  tree_last = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
Packit Service fdd496
	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      *err = REG_ESPACE;
Packit Service fdd496
	      return NULL;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  tree = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
	  if (BE (tree == NULL, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      *err = REG_ESPACE;
Packit Service fdd496
	      return NULL;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
      /* We must return here, since ANCHORs can't be followed
Packit Service fdd496
	 by repetition operators.
Packit Service fdd496
	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
Packit Service fdd496
	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
Packit Service fdd496
      fetch_token (token, regexp, syntax);
Packit Service fdd496
      return tree;
Packit Service fdd496
    case OP_PERIOD:
Packit Service fdd496
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit Service fdd496
      if (BE (tree == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESPACE;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      if (dfa->mb_cur_max > 1)
Packit Service fdd496
	dfa->has_mb_node = 1;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_WORD:
Packit Service fdd496
    case OP_NOTWORD:
Packit Service fdd496
      tree = build_charclass_op (dfa, regexp->trans,
Packit Service fdd496
				 "alnum",
Packit Service fdd496
				 "_",
Packit Service fdd496
				 token->type == OP_NOTWORD, err);
Packit Service fdd496
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
	return NULL;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_SPACE:
Packit Service fdd496
    case OP_NOTSPACE:
Packit Service fdd496
      tree = build_charclass_op (dfa, regexp->trans,
Packit Service fdd496
				 "space",
Packit Service fdd496
				 "",
Packit Service fdd496
				 token->type == OP_NOTSPACE, err);
Packit Service fdd496
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit Service fdd496
	return NULL;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_ALT:
Packit Service fdd496
    case END_OF_RE:
Packit Service fdd496
      return NULL;
Packit Service fdd496
    case BACK_SLASH:
Packit Service fdd496
      *err = REG_EESCAPE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    default:
Packit Service fdd496
      /* Must not happen?  */
Packit Service fdd496
#ifdef DEBUG
Packit Service fdd496
      assert (0);
Packit Service fdd496
#endif
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
  fetch_token (token, regexp, syntax);
Packit Service fdd496
Packit Service fdd496
  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
Packit Service fdd496
	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
Packit Service fdd496
    {
Packit Service fdd496
      bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token,
Packit Service fdd496
					   syntax, err);
Packit Service fdd496
      if (BE (*err != REG_NOERROR && dup_tree == NULL, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  if (tree != NULL)
Packit Service fdd496
	    postorder (tree, free_tree, NULL);
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
      tree = dup_tree;
Packit Service fdd496
      /* In BRE consecutive duplications are not allowed.  */
Packit Service fdd496
      if ((syntax & RE_CONTEXT_INVALID_DUP)
Packit Service fdd496
	  && (token->type == OP_DUP_ASTERISK
Packit Service fdd496
	      || token->type == OP_OPEN_DUP_NUM))
Packit Service fdd496
	{
Packit Service fdd496
	  if (tree != NULL)
Packit Service fdd496
	    postorder (tree, free_tree, NULL);
Packit Service fdd496
	  *err = REG_BADRPT;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  return tree;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* This function build the following tree, from regular expression
Packit Service fdd496
   (<reg_exp>):
Packit Service fdd496
	 SUBEXP
Packit Service fdd496
	    |
Packit Service fdd496
	<reg_exp>
Packit Service fdd496
*/
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit Service fdd496
	       reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  re_dfa_t *dfa = preg->buffer;
Packit Service fdd496
  bin_tree_t *tree;
Packit Service fdd496
  size_t cur_nsub;
Packit Service fdd496
  cur_nsub = preg->re_nsub++;
Packit Service fdd496
Packit Service fdd496
  fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
Packit Service fdd496
Packit Service fdd496
  /* The subexpression may be a null string.  */
Packit Service fdd496
  if (token->type == OP_CLOSE_SUBEXP)
Packit Service fdd496
    tree = NULL;
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
Packit Service fdd496
      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  if (tree != NULL)
Packit Service fdd496
	    postorder (tree, free_tree, NULL);
Packit Service fdd496
	  *err = REG_EPAREN;
Packit Service fdd496
	}
Packit Service fdd496
      if (BE (*err != REG_NOERROR, 0))
Packit Service fdd496
	return NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  if (cur_nsub <= '9' - '1')
Packit Service fdd496
    dfa->completed_bkref_map |= 1 << cur_nsub;
Packit Service fdd496
Packit Service fdd496
  tree = create_tree (dfa, tree, NULL, SUBEXP);
Packit Service fdd496
  if (BE (tree == NULL, 0))
Packit Service fdd496
    {
Packit Service fdd496
      *err = REG_ESPACE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
  tree->token.opr.idx = cur_nsub;
Packit Service fdd496
  return tree;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
Packit Service fdd496
	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  bin_tree_t *tree = NULL, *old_tree = NULL;
Packit Service fdd496
  Idx i, start, end, start_idx = re_string_cur_idx (regexp);
Packit Service fdd496
  re_token_t start_token = *token;
Packit Service fdd496
Packit Service fdd496
  if (token->type == OP_OPEN_DUP_NUM)
Packit Service fdd496
    {
Packit Service fdd496
      end = 0;
Packit Service fdd496
      start = fetch_number (regexp, token, syntax);
Packit Service fdd496
      if (start == -1)
Packit Service fdd496
	{
Packit Service fdd496
	  if (token->type == CHARACTER && token->opr.c == ',')
Packit Service fdd496
	    start = 0; /* We treat "{,m}" as "{0,m}".  */
Packit Service fdd496
	  else
Packit Service fdd496
	    {
Packit Service fdd496
	      *err = REG_BADBR; /* <re>{} is invalid.  */
Packit Service fdd496
	      return NULL;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
      if (BE (start != -2, 1))
Packit Service fdd496
	{
Packit Service fdd496
	  /* We treat "{n}" as "{n,n}".  */
Packit Service fdd496
	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
Packit Service fdd496
		 : ((token->type == CHARACTER && token->opr.c == ',')
Packit Service fdd496
		    ? fetch_number (regexp, token, syntax) : -2));
Packit Service fdd496
	}
Packit Service fdd496
      if (BE (start == -2 || end == -2, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  /* Invalid sequence.  */
Packit Service fdd496
	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      if (token->type == END_OF_RE)
Packit Service fdd496
		*err = REG_EBRACE;
Packit Service fdd496
	      else
Packit Service fdd496
		*err = REG_BADBR;
Packit Service fdd496
Packit Service fdd496
	      return NULL;
Packit Service fdd496
	    }
Packit Service fdd496
Packit Service fdd496
	  /* If the syntax bit is set, rollback.  */
Packit Service fdd496
	  re_string_set_index (regexp, start_idx);
Packit Service fdd496
	  *token = start_token;
Packit Service fdd496
	  token->type = CHARACTER;
Packit Service fdd496
	  /* mb_partial and word_char bits should be already initialized by
Packit Service fdd496
	     peek_token.  */
Packit Service fdd496
	  return elem;
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
      if (BE ((end != -1 && start > end)
Packit Service fdd496
	      || token->type != OP_CLOSE_DUP_NUM, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  /* First number greater than second.  */
Packit Service fdd496
	  *err = REG_BADBR;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
      if (BE (RE_DUP_MAX < (end == -1 ? start : end), 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_ESIZE;
Packit Service fdd496
	  return NULL;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
Packit Service fdd496
      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  fetch_token (token, regexp, syntax);
Packit Service fdd496
Packit Service fdd496
  if (BE (elem == NULL, 0))
Packit Service fdd496
    return NULL;
Packit Service fdd496
  if (BE (start == 0 && end == 0, 0))
Packit Service fdd496
    {
Packit Service fdd496
      postorder (elem, free_tree, NULL);
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
Packit Service fdd496
  if (BE (start > 0, 0))
Packit Service fdd496
    {
Packit Service fdd496
      tree = elem;
Packit Service fdd496
      for (i = 2; i <= start; ++i)
Packit Service fdd496
	{
Packit Service fdd496
	  elem = duplicate_tree (elem, dfa);
Packit Service fdd496
	  tree = create_tree (dfa, tree, elem, CONCAT);
Packit Service fdd496
	  if (BE (elem == NULL || tree == NULL, 0))
Packit Service fdd496
	    goto parse_dup_op_espace;
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
      if (start == end)
Packit Service fdd496
	return tree;
Packit Service fdd496
Packit Service fdd496
      /* Duplicate ELEM before it is marked optional.  */
Packit Service fdd496
      elem = duplicate_tree (elem, dfa);
Packit Service fdd496
      if (BE (elem == NULL, 0))
Packit Service fdd496
        goto parse_dup_op_espace;
Packit Service fdd496
      old_tree = tree;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    old_tree = NULL;
Packit Service fdd496
Packit Service fdd496
  if (elem->token.type == SUBEXP)
Packit Service fdd496
    {
Packit Service fdd496
      uintptr_t subidx = elem->token.opr.idx;
Packit Service fdd496
      postorder (elem, mark_opt_subexp, (void *) subidx);
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  tree = create_tree (dfa, elem, NULL,
Packit Service fdd496
		      (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
Packit Service fdd496
  if (BE (tree == NULL, 0))
Packit Service fdd496
    goto parse_dup_op_espace;
Packit Service fdd496
Packit Service fdd496
/* From gnulib's "intprops.h":
Packit Service fdd496
   True if the arithmetic type T is signed.  */
Packit Service fdd496
#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
Packit Service fdd496
Packit Service fdd496
  /* This loop is actually executed only when end != -1,
Packit Service fdd496
     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
Packit Service fdd496
     already created the start+1-th copy.  */
Packit Service fdd496
  if (TYPE_SIGNED (Idx) || end != -1)
Packit Service fdd496
    for (i = start + 2; i <= end; ++i)
Packit Service fdd496
      {
Packit Service fdd496
	elem = duplicate_tree (elem, dfa);
Packit Service fdd496
	tree = create_tree (dfa, tree, elem, CONCAT);
Packit Service fdd496
	if (BE (elem == NULL || tree == NULL, 0))
Packit Service fdd496
	  goto parse_dup_op_espace;
Packit Service fdd496
Packit Service fdd496
	tree = create_tree (dfa, tree, NULL, OP_ALT);
Packit Service fdd496
	if (BE (tree == NULL, 0))
Packit Service fdd496
	  goto parse_dup_op_espace;
Packit Service fdd496
      }
Packit Service fdd496
Packit Service fdd496
  if (old_tree)
Packit Service fdd496
    tree = create_tree (dfa, old_tree, tree, CONCAT);
Packit Service fdd496
Packit Service fdd496
  return tree;
Packit Service fdd496
Packit Service fdd496
 parse_dup_op_espace:
Packit Service fdd496
  *err = REG_ESPACE;
Packit Service fdd496
  return NULL;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Size of the names for collating symbol/equivalence_class/character_class.
Packit Service fdd496
   I'm not sure, but maybe enough.  */
Packit Service fdd496
#define BRACKET_NAME_BUF_SIZE 32
Packit Service fdd496
Packit Service fdd496
#ifndef _LIBC
Packit Service fdd496
Packit Service fdd496
# ifdef RE_ENABLE_I18N
Packit Service fdd496
/* Convert the byte B to the corresponding wide character.  In a
Packit Service fdd496
   unibyte locale, treat B as itself if it is an encoding error.
Packit Service fdd496
   In a multibyte locale, return WEOF if B is an encoding error.  */
Packit Service fdd496
static wint_t
Packit Service fdd496
parse_byte (unsigned char b, re_charset_t *mbcset)
Packit Service fdd496
{
Packit Service fdd496
  wint_t wc = __btowc (b);
Packit Service fdd496
  return wc == WEOF && !mbcset ? b : wc;
Packit Service fdd496
}
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
Packit Service fdd496
     Build the range expression which starts from START_ELEM, and ends
Packit Service fdd496
     at END_ELEM.  The result are written to MBCSET and SBCSET.
Packit Service fdd496
     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
Packit Service fdd496
     mbcset->range_ends, is a pointer argument since we may
Packit Service fdd496
     update it.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
internal_function
Packit Service fdd496
# ifdef RE_ENABLE_I18N
Packit Service fdd496
build_range_exp (const reg_syntax_t syntax,
Packit Service fdd496
                 bitset_t sbcset,
Packit Service fdd496
                 re_charset_t *mbcset,
Packit Service fdd496
                 Idx *range_alloc,
Packit Service fdd496
                 const bracket_elem_t *start_elem,
Packit Service fdd496
                 const bracket_elem_t *end_elem)
Packit Service fdd496
# else /* not RE_ENABLE_I18N */
Packit Service fdd496
build_range_exp (const reg_syntax_t syntax,
Packit Service fdd496
                 bitset_t sbcset,
Packit Service fdd496
                 const bracket_elem_t *start_elem,
Packit Service fdd496
                 const bracket_elem_t *end_elem)
Packit Service fdd496
# endif /* not RE_ENABLE_I18N */
Packit Service fdd496
{
Packit Service fdd496
  unsigned int start_ch, end_ch;
Packit Service fdd496
  /* Equivalence Classes and Character Classes can't be a range start/end.  */
Packit Service fdd496
  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
Packit Service fdd496
	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
Packit Service fdd496
	  0))
Packit Service fdd496
    return REG_ERANGE;
Packit Service fdd496
Packit Service fdd496
  /* We can handle no multi character collating elements without libc
Packit Service fdd496
     support.  */
Packit Service fdd496
  if (BE ((start_elem->type == COLL_SYM
Packit Service fdd496
	   && strlen ((char *) start_elem->opr.name) > 1)
Packit Service fdd496
	  || (end_elem->type == COLL_SYM
Packit Service fdd496
	      && strlen ((char *) end_elem->opr.name) > 1), 0))
Packit Service fdd496
    return REG_ECOLLATE;
Packit Service fdd496
Packit Service fdd496
# ifdef RE_ENABLE_I18N
Packit Service fdd496
  {
Packit Service fdd496
    wchar_t wc;
Packit Service fdd496
    wint_t start_wc;
Packit Service fdd496
    wint_t end_wc;
Packit Service fdd496
Packit Service fdd496
    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
Packit Service fdd496
		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
Packit Service fdd496
		   : 0));
Packit Service fdd496
    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
Packit Service fdd496
	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
Packit Service fdd496
		 : 0));
Packit Service fdd496
    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
Packit Service fdd496
		? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
Packit Service fdd496
    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
Packit Service fdd496
	      ? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
Packit Service fdd496
    if (start_wc == WEOF || end_wc == WEOF)
Packit Service fdd496
      return REG_ECOLLATE;
Packit Service fdd496
    else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
Packit Service fdd496
      return REG_ERANGE;
Packit Service fdd496
Packit Service fdd496
    /* Got valid collation sequence values, add them as a new entry.
Packit Service fdd496
       However, for !_LIBC we have no collation elements: if the
Packit Service fdd496
       character set is single byte, the single byte character set
Packit Service fdd496
       that we build below suffices.  parse_bracket_exp passes
Packit Service fdd496
       no MBCSET if dfa->mb_cur_max == 1.  */
Packit Service fdd496
    if (mbcset)
Packit Service fdd496
      {
Packit Service fdd496
	/* Check the space of the arrays.  */
Packit Service fdd496
	if (BE (*range_alloc == mbcset->nranges, 0))
Packit Service fdd496
	  {
Packit Service fdd496
	    /* There is not enough space, need realloc.  */
Packit Service fdd496
	    wchar_t *new_array_start, *new_array_end;
Packit Service fdd496
	    Idx new_nranges;
Packit Service fdd496
Packit Service fdd496
	    /* +1 in case of mbcset->nranges is 0.  */
Packit Service fdd496
	    new_nranges = 2 * mbcset->nranges + 1;
Packit Service fdd496
	    /* Use realloc since mbcset->range_starts and mbcset->range_ends
Packit Service fdd496
	       are NULL if *range_alloc == 0.  */
Packit Service fdd496
	    new_array_start = re_realloc (mbcset->range_starts, wchar_t,
Packit Service fdd496
					  new_nranges);
Packit Service fdd496
	    new_array_end = re_realloc (mbcset->range_ends, wchar_t,
Packit Service fdd496
					new_nranges);
Packit Service fdd496
Packit Service fdd496
	    if (BE (new_array_start == NULL || new_array_end == NULL, 0))
Packit Service fdd496
	      {
Packit Service fdd496
		re_free (new_array_start);
Packit Service fdd496
		re_free (new_array_end);
Packit Service fdd496
		return REG_ESPACE;
Packit Service fdd496
	      }
Packit Service fdd496
Packit Service fdd496
	    mbcset->range_starts = new_array_start;
Packit Service fdd496
	    mbcset->range_ends = new_array_end;
Packit Service fdd496
	    *range_alloc = new_nranges;
Packit Service fdd496
	  }
Packit Service fdd496
Packit Service fdd496
	mbcset->range_starts[mbcset->nranges] = start_wc;
Packit Service fdd496
	mbcset->range_ends[mbcset->nranges++] = end_wc;
Packit Service fdd496
      }
Packit Service fdd496
Packit Service fdd496
    /* Build the table for single byte characters.  */
Packit Service fdd496
    for (wc = 0; wc < SBC_MAX; ++wc)
Packit Service fdd496
      {
Packit Service fdd496
	if (start_wc <= wc && wc <= end_wc)
Packit Service fdd496
	  bitset_set (sbcset, wc);
Packit Service fdd496
      }
Packit Service fdd496
  }
Packit Service fdd496
# else /* not RE_ENABLE_I18N */
Packit Service fdd496
  {
Packit Service fdd496
    unsigned int ch;
Packit Service fdd496
    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
Packit Service fdd496
		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
Packit Service fdd496
		   : 0));
Packit Service fdd496
    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
Packit Service fdd496
	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
Packit Service fdd496
		 : 0));
Packit Service fdd496
    if (start_ch > end_ch)
Packit Service fdd496
      return REG_ERANGE;
Packit Service fdd496
    /* Build the table for single byte characters.  */
Packit Service fdd496
    for (ch = 0; ch < SBC_MAX; ++ch)
Packit Service fdd496
      if (start_ch <= ch  && ch <= end_ch)
Packit Service fdd496
	bitset_set (sbcset, ch);
Packit Service fdd496
  }
Packit Service fdd496
# endif /* not RE_ENABLE_I18N */
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
#endif /* not _LIBC */
Packit Service fdd496
Packit Service fdd496
#ifndef _LIBC
Packit Service fdd496
/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
Packit Service fdd496
   Build the collating element which is represented by NAME.
Packit Service fdd496
   The result are written to MBCSET and SBCSET.
Packit Service fdd496
   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
Packit Service fdd496
   pointer argument since we may update it.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
internal_function
Packit Service fdd496
# ifdef RE_ENABLE_I18N
Packit Service fdd496
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Packit Service fdd496
			Idx *coll_sym_alloc, const unsigned char *name)
Packit Service fdd496
# else /* not RE_ENABLE_I18N */
Packit Service fdd496
build_collating_symbol (bitset_t sbcset, const unsigned char *name)
Packit Service fdd496
# endif /* not RE_ENABLE_I18N */
Packit Service fdd496
{
Packit Service fdd496
  size_t name_len = strlen ((const char *) name);
Packit Service fdd496
  if (BE (name_len != 1, 0))
Packit Service fdd496
    return REG_ECOLLATE;
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      bitset_set (sbcset, name[0]);
Packit Service fdd496
      return REG_NOERROR;
Packit Service fdd496
    }
Packit Service fdd496
}
Packit Service fdd496
#endif /* not _LIBC */
Packit Service fdd496
Packit Service fdd496
/* This function parse bracket expression like "[abc]", "[a-c]",
Packit Service fdd496
   "[[.a-a.]]" etc.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
Packit Service fdd496
		   reg_syntax_t syntax, reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
  const unsigned char *collseqmb;
Packit Service fdd496
  const char *collseqwc;
Packit Service fdd496
  uint32_t nrules;
Packit Service fdd496
  int32_t table_size;
Packit Service fdd496
  const int32_t *symb_table;
Packit Service fdd496
  const unsigned char *extra;
Packit Service fdd496
Packit Service fdd496
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit Service fdd496
     Seek the collating symbol entry corresponding to NAME.
Packit Service fdd496
     Return the index of the symbol in the SYMB_TABLE,
Packit Service fdd496
     or -1 if not found.  */
Packit Service fdd496
Packit Service fdd496
  auto inline int32_t
Packit Service fdd496
  __attribute__ ((always_inline))
Packit Service fdd496
  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
Packit Service fdd496
    {
Packit Service fdd496
      int32_t elem;
Packit Service fdd496
Packit Service fdd496
      for (elem = 0; elem < table_size; elem++)
Packit Service fdd496
	if (symb_table[2 * elem] != 0)
Packit Service fdd496
	  {
Packit Service fdd496
	    int32_t idx = symb_table[2 * elem + 1];
Packit Service fdd496
	    /* Skip the name of collating element name.  */
Packit Service fdd496
	    idx += 1 + extra[idx];
Packit Service fdd496
	    if (/* Compare the length of the name.  */
Packit Service fdd496
		name_len == extra[idx]
Packit Service fdd496
		/* Compare the name.  */
Packit Service fdd496
		&& memcmp (name, &extra[idx + 1], name_len) == 0)
Packit Service fdd496
	      /* Yep, this is the entry.  */
Packit Service fdd496
	      return elem;
Packit Service fdd496
	  }
Packit Service fdd496
      return -1;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit Service fdd496
     Look up the collation sequence value of BR_ELEM.
Packit Service fdd496
     Return the value if succeeded, UINT_MAX otherwise.  */
Packit Service fdd496
Packit Service fdd496
  auto inline unsigned int
Packit Service fdd496
  __attribute__ ((always_inline))
Packit Service fdd496
  lookup_collation_sequence_value (bracket_elem_t *br_elem)
Packit Service fdd496
    {
Packit Service fdd496
      if (br_elem->type == SB_CHAR)
Packit Service fdd496
	{
Packit Service fdd496
	  /*
Packit Service fdd496
	  if (MB_CUR_MAX == 1)
Packit Service fdd496
	  */
Packit Service fdd496
	  if (nrules == 0)
Packit Service fdd496
	    return collseqmb[br_elem->opr.ch];
Packit Service fdd496
	  else
Packit Service fdd496
	    {
Packit Service fdd496
	      wint_t wc = __btowc (br_elem->opr.ch);
Packit Service fdd496
	      return __collseq_table_lookup (collseqwc, wc);
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
      else if (br_elem->type == MB_CHAR)
Packit Service fdd496
	{
Packit Service fdd496
	  if (nrules != 0)
Packit Service fdd496
	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
Packit Service fdd496
	}
Packit Service fdd496
      else if (br_elem->type == COLL_SYM)
Packit Service fdd496
	{
Packit Service fdd496
	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
Packit Service fdd496
	  if (nrules != 0)
Packit Service fdd496
	    {
Packit Service fdd496
	      int32_t elem, idx;
Packit Service fdd496
	      elem = seek_collating_symbol_entry (br_elem->opr.name,
Packit Service fdd496
						  sym_name_len);
Packit Service fdd496
	      if (elem != -1)
Packit Service fdd496
		{
Packit Service fdd496
		  /* We found the entry.  */
Packit Service fdd496
		  idx = symb_table[2 * elem + 1];
Packit Service fdd496
		  /* Skip the name of collating element name.  */
Packit Service fdd496
		  idx += 1 + extra[idx];
Packit Service fdd496
		  /* Skip the byte sequence of the collating element.  */
Packit Service fdd496
		  idx += 1 + extra[idx];
Packit Service fdd496
		  /* Adjust for the alignment.  */
Packit Service fdd496
		  idx = (idx + 3) & ~3;
Packit Service fdd496
		  /* Skip the multibyte collation sequence value.  */
Packit Service fdd496
		  idx += sizeof (unsigned int);
Packit Service fdd496
		  /* Skip the wide char sequence of the collating element.  */
Packit Service fdd496
		  idx += sizeof (unsigned int) *
Packit Service fdd496
		    (1 + *(unsigned int *) (extra + idx));
Packit Service fdd496
		  /* Return the collation sequence value.  */
Packit Service fdd496
		  return *(unsigned int *) (extra + idx);
Packit Service fdd496
		}
Packit Service fdd496
	      else if (sym_name_len == 1)
Packit Service fdd496
		{
Packit Service fdd496
		  /* No valid character.  Match it as a single byte
Packit Service fdd496
		     character.  */
Packit Service fdd496
		  return collseqmb[br_elem->opr.name[0]];
Packit Service fdd496
		}
Packit Service fdd496
	    }
Packit Service fdd496
	  else if (sym_name_len == 1)
Packit Service fdd496
	    return collseqmb[br_elem->opr.name[0]];
Packit Service fdd496
	}
Packit Service fdd496
      return UINT_MAX;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit Service fdd496
     Build the range expression which starts from START_ELEM, and ends
Packit Service fdd496
     at END_ELEM.  The result are written to MBCSET and SBCSET.
Packit Service fdd496
     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
Packit Service fdd496
     mbcset->range_ends, is a pointer argument since we may
Packit Service fdd496
     update it.  */
Packit Service fdd496
Packit Service fdd496
  auto inline reg_errcode_t
Packit Service fdd496
  __attribute__ ((always_inline))
Packit Service fdd496
  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
Packit Service fdd496
		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
Packit Service fdd496
    {
Packit Service fdd496
      unsigned int ch;
Packit Service fdd496
      uint32_t start_collseq;
Packit Service fdd496
      uint32_t end_collseq;
Packit Service fdd496
Packit Service fdd496
      /* Equivalence Classes and Character Classes can't be a range
Packit Service fdd496
	 start/end.  */
Packit Service fdd496
      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
Packit Service fdd496
	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
Packit Service fdd496
	      0))
Packit Service fdd496
	return REG_ERANGE;
Packit Service fdd496
Packit Service fdd496
      /* FIXME: Implement rational ranges here, too.  */
Packit Service fdd496
      start_collseq = lookup_collation_sequence_value (start_elem);
Packit Service fdd496
      end_collseq = lookup_collation_sequence_value (end_elem);
Packit Service fdd496
      /* Check start/end collation sequence values.  */
Packit Service fdd496
      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
Packit Service fdd496
	return REG_ECOLLATE;
Packit Service fdd496
      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
Packit Service fdd496
	return REG_ERANGE;
Packit Service fdd496
Packit Service fdd496
      /* Got valid collation sequence values, add them as a new entry.
Packit Service fdd496
	 However, if we have no collation elements, and the character set
Packit Service fdd496
	 is single byte, the single byte character set that we
Packit Service fdd496
	 build below suffices. */
Packit Service fdd496
      if (nrules > 0 || dfa->mb_cur_max > 1)
Packit Service fdd496
	{
Packit Service fdd496
	  /* Check the space of the arrays.  */
Packit Service fdd496
	  if (BE (*range_alloc == mbcset->nranges, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      /* There is not enough space, need realloc.  */
Packit Service fdd496
	      uint32_t *new_array_start;
Packit Service fdd496
	      uint32_t *new_array_end;
Packit Service fdd496
	      Idx new_nranges;
Packit Service fdd496
Packit Service fdd496
	      /* +1 in case of mbcset->nranges is 0.  */
Packit Service fdd496
	      new_nranges = 2 * mbcset->nranges + 1;
Packit Service fdd496
	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
Packit Service fdd496
					    new_nranges);
Packit Service fdd496
	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
Packit Service fdd496
					  new_nranges);
Packit Service fdd496
Packit Service fdd496
	      if (BE (new_array_start == NULL || new_array_end == NULL, 0))
Packit Service fdd496
		return REG_ESPACE;
Packit Service fdd496
Packit Service fdd496
	      mbcset->range_starts = new_array_start;
Packit Service fdd496
	      mbcset->range_ends = new_array_end;
Packit Service fdd496
	      *range_alloc = new_nranges;
Packit Service fdd496
	    }
Packit Service fdd496
Packit Service fdd496
	  mbcset->range_starts[mbcset->nranges] = start_collseq;
Packit Service fdd496
	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
      /* Build the table for single byte characters.  */
Packit Service fdd496
      for (ch = 0; ch < SBC_MAX; ch++)
Packit Service fdd496
	{
Packit Service fdd496
	  uint32_t ch_collseq;
Packit Service fdd496
	  /*
Packit Service fdd496
	  if (MB_CUR_MAX == 1)
Packit Service fdd496
	  */
Packit Service fdd496
	  if (nrules == 0)
Packit Service fdd496
	    ch_collseq = collseqmb[ch];
Packit Service fdd496
	  else
Packit Service fdd496
	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
Packit Service fdd496
	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
Packit Service fdd496
	    bitset_set (sbcset, ch);
Packit Service fdd496
	}
Packit Service fdd496
      return REG_NOERROR;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit Service fdd496
     Build the collating element which is represented by NAME.
Packit Service fdd496
     The result are written to MBCSET and SBCSET.
Packit Service fdd496
     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
Packit Service fdd496
     pointer argument since we may update it.  */
Packit Service fdd496
Packit Service fdd496
  auto inline reg_errcode_t
Packit Service fdd496
  __attribute__ ((always_inline))
Packit Service fdd496
  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Packit Service fdd496
			  Idx *coll_sym_alloc, const unsigned char *name)
Packit Service fdd496
    {
Packit Service fdd496
      int32_t elem, idx;
Packit Service fdd496
      size_t name_len = strlen ((const char *) name);
Packit Service fdd496
      if (nrules != 0)
Packit Service fdd496
	{
Packit Service fdd496
	  elem = seek_collating_symbol_entry (name, name_len);
Packit Service fdd496
	  if (elem != -1)
Packit Service fdd496
	    {
Packit Service fdd496
	      /* We found the entry.  */
Packit Service fdd496
	      idx = symb_table[2 * elem + 1];
Packit Service fdd496
	      /* Skip the name of collating element name.  */
Packit Service fdd496
	      idx += 1 + extra[idx];
Packit Service fdd496
	    }
Packit Service fdd496
	  else if (name_len == 1)
Packit Service fdd496
	    {
Packit Service fdd496
	      /* No valid character, treat it as a normal
Packit Service fdd496
		 character.  */
Packit Service fdd496
	      bitset_set (sbcset, name[0]);
Packit Service fdd496
	      return REG_NOERROR;
Packit Service fdd496
	    }
Packit Service fdd496
	  else
Packit Service fdd496
	    return REG_ECOLLATE;
Packit Service fdd496
Packit Service fdd496
	  /* Got valid collation sequence, add it as a new entry.  */
Packit Service fdd496
	  /* Check the space of the arrays.  */
Packit Service fdd496
	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      /* Not enough, realloc it.  */
Packit Service fdd496
	      /* +1 in case of mbcset->ncoll_syms is 0.  */
Packit Service fdd496
	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
Packit Service fdd496
	      /* Use realloc since mbcset->coll_syms is NULL
Packit Service fdd496
		 if *alloc == 0.  */
Packit Service fdd496
	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
Packit Service fdd496
						   new_coll_sym_alloc);
Packit Service fdd496
	      if (BE (new_coll_syms == NULL, 0))
Packit Service fdd496
		return REG_ESPACE;
Packit Service fdd496
	      mbcset->coll_syms = new_coll_syms;
Packit Service fdd496
	      *coll_sym_alloc = new_coll_sym_alloc;
Packit Service fdd496
	    }
Packit Service fdd496
	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
Packit Service fdd496
	  return REG_NOERROR;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  if (BE (name_len != 1, 0))
Packit Service fdd496
	    return REG_ECOLLATE;
Packit Service fdd496
	  else
Packit Service fdd496
	    {
Packit Service fdd496
	      bitset_set (sbcset, name[0]);
Packit Service fdd496
	      return REG_NOERROR;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  re_token_t br_token;
Packit Service fdd496
  re_bitset_ptr_t sbcset;
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  re_charset_t *mbcset;
Packit Service fdd496
  Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
Packit Service fdd496
  Idx equiv_class_alloc = 0, char_class_alloc = 0;
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
  bool non_match = false;
Packit Service fdd496
  bin_tree_t *work_tree;
Packit Service fdd496
  int token_len;
Packit Service fdd496
  bool first_round = true;
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
  collseqmb = (const unsigned char *)
Packit Service fdd496
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
Packit Service fdd496
  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
Packit Service fdd496
  if (nrules)
Packit Service fdd496
    {
Packit Service fdd496
      /*
Packit Service fdd496
      if (MB_CUR_MAX > 1)
Packit Service fdd496
      */
Packit Service fdd496
      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
Packit Service fdd496
      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
Packit Service fdd496
      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
Packit Service fdd496
						  _NL_COLLATE_SYMB_TABLEMB);
Packit Service fdd496
      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
Packit Service fdd496
						   _NL_COLLATE_SYMB_EXTRAMB);
Packit Service fdd496
    }
Packit Service fdd496
#endif
Packit Service fdd496
  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (BE (sbcset == NULL || mbcset == NULL, 0))
Packit Service fdd496
#else
Packit Service fdd496
  if (BE (sbcset == NULL, 0))
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
    {
Packit Service fdd496
      re_free (sbcset);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      re_free (mbcset);
Packit Service fdd496
#endif
Packit Service fdd496
      *err = REG_ESPACE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  token_len = peek_token_bracket (token, regexp, syntax);
Packit Service fdd496
  if (BE (token->type == END_OF_RE, 0))
Packit Service fdd496
    {
Packit Service fdd496
      *err = REG_BADPAT;
Packit Service fdd496
      goto parse_bracket_exp_free_return;
Packit Service fdd496
    }
Packit Service fdd496
  if (token->type == OP_NON_MATCH_LIST)
Packit Service fdd496
    {
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      mbcset->non_match = 1;
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
      non_match = true;
Packit Service fdd496
      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
Packit Service fdd496
	bitset_set (sbcset, '\n');
Packit Service fdd496
      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
Packit Service fdd496
      token_len = peek_token_bracket (token, regexp, syntax);
Packit Service fdd496
      if (BE (token->type == END_OF_RE, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_BADPAT;
Packit Service fdd496
	  goto parse_bracket_exp_free_return;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  /* We treat the first ']' as a normal character.  */
Packit Service fdd496
  if (token->type == OP_CLOSE_BRACKET)
Packit Service fdd496
    token->type = CHARACTER;
Packit Service fdd496
Packit Service fdd496
  while (1)
Packit Service fdd496
    {
Packit Service fdd496
      bracket_elem_t start_elem, end_elem;
Packit Service fdd496
      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
Packit Service fdd496
      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
Packit Service fdd496
      reg_errcode_t ret;
Packit Service fdd496
      int token_len2 = 0;
Packit Service fdd496
      bool is_range_exp = false;
Packit Service fdd496
      re_token_t token2;
Packit Service fdd496
Packit Service fdd496
      start_elem.opr.name = start_name_buf;
Packit Service fdd496
      start_elem.type = COLL_SYM;
Packit Service fdd496
      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
Packit Service fdd496
				   syntax, first_round);
Packit Service fdd496
      if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = ret;
Packit Service fdd496
	  goto parse_bracket_exp_free_return;
Packit Service fdd496
	}
Packit Service fdd496
      first_round = false;
Packit Service fdd496
Packit Service fdd496
      /* Get information about the next token.  We need it in any case.  */
Packit Service fdd496
      token_len = peek_token_bracket (token, regexp, syntax);
Packit Service fdd496
Packit Service fdd496
      /* Do not check for ranges if we know they are not allowed.  */
Packit Service fdd496
      if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
Packit Service fdd496
	{
Packit Service fdd496
	  if (BE (token->type == END_OF_RE, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      *err = REG_EBRACK;
Packit Service fdd496
	      goto parse_bracket_exp_free_return;
Packit Service fdd496
	    }
Packit Service fdd496
	  if (token->type == OP_CHARSET_RANGE)
Packit Service fdd496
	    {
Packit Service fdd496
	      re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
Packit Service fdd496
	      token_len2 = peek_token_bracket (&token2, regexp, syntax);
Packit Service fdd496
	      if (BE (token2.type == END_OF_RE, 0))
Packit Service fdd496
		{
Packit Service fdd496
		  *err = REG_EBRACK;
Packit Service fdd496
		  goto parse_bracket_exp_free_return;
Packit Service fdd496
		}
Packit Service fdd496
	      if (token2.type == OP_CLOSE_BRACKET)
Packit Service fdd496
		{
Packit Service fdd496
		  /* We treat the last '-' as a normal character.  */
Packit Service fdd496
		  re_string_skip_bytes (regexp, -token_len);
Packit Service fdd496
		  token->type = CHARACTER;
Packit Service fdd496
		}
Packit Service fdd496
	      else
Packit Service fdd496
		is_range_exp = true;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
Packit Service fdd496
      if (is_range_exp == true)
Packit Service fdd496
	{
Packit Service fdd496
	  end_elem.opr.name = end_name_buf;
Packit Service fdd496
	  end_elem.type = COLL_SYM;
Packit Service fdd496
	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
Packit Service fdd496
				       dfa, syntax, true);
Packit Service fdd496
	  if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
	    {
Packit Service fdd496
	      *err = ret;
Packit Service fdd496
	      goto parse_bracket_exp_free_return;
Packit Service fdd496
	    }
Packit Service fdd496
Packit Service fdd496
	  token_len = peek_token_bracket (token, regexp, syntax);
Packit Service fdd496
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
Packit Service fdd496
				  &start_elem, &end_elem);
Packit Service fdd496
#else
Packit Service fdd496
# ifdef RE_ENABLE_I18N
Packit Service fdd496
	  *err = build_range_exp (syntax, sbcset,
Packit Service fdd496
				  dfa->mb_cur_max > 1 ? mbcset : NULL,
Packit Service fdd496
				  &range_alloc, &start_elem, &end_elem);
Packit Service fdd496
# else
Packit Service fdd496
	  *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
Packit Service fdd496
# endif
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
	  if (BE (*err != REG_NOERROR, 0))
Packit Service fdd496
	    goto parse_bracket_exp_free_return;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  switch (start_elem.type)
Packit Service fdd496
	    {
Packit Service fdd496
	    case SB_CHAR:
Packit Service fdd496
	      bitset_set (sbcset, start_elem.opr.ch);
Packit Service fdd496
	      break;
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
	    case MB_CHAR:
Packit Service fdd496
	      /* Check whether the array has enough space.  */
Packit Service fdd496
	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))
Packit Service fdd496
		{
Packit Service fdd496
		  wchar_t *new_mbchars;
Packit Service fdd496
		  /* Not enough, realloc it.  */
Packit Service fdd496
		  /* +1 in case of mbcset->nmbchars is 0.  */
Packit Service fdd496
		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
Packit Service fdd496
		  /* Use realloc since array is NULL if *alloc == 0.  */
Packit Service fdd496
		  new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
Packit Service fdd496
					    mbchar_alloc);
Packit Service fdd496
		  if (BE (new_mbchars == NULL, 0))
Packit Service fdd496
		    goto parse_bracket_exp_espace;
Packit Service fdd496
		  mbcset->mbchars = new_mbchars;
Packit Service fdd496
		}
Packit Service fdd496
	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
Packit Service fdd496
	      break;
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
	    case EQUIV_CLASS:
Packit Service fdd496
	      *err = build_equiv_class (sbcset,
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
					mbcset, &equiv_class_alloc,
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
					start_elem.opr.name);
Packit Service fdd496
	      if (BE (*err != REG_NOERROR, 0))
Packit Service fdd496
		goto parse_bracket_exp_free_return;
Packit Service fdd496
	      break;
Packit Service fdd496
	    case COLL_SYM:
Packit Service fdd496
	      *err = build_collating_symbol (sbcset,
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
					     mbcset, &coll_sym_alloc,
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
					     start_elem.opr.name);
Packit Service fdd496
	      if (BE (*err != REG_NOERROR, 0))
Packit Service fdd496
		goto parse_bracket_exp_free_return;
Packit Service fdd496
	      break;
Packit Service fdd496
	    case CHAR_CLASS:
Packit Service fdd496
	      *err = build_charclass (regexp->trans, sbcset,
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
				      mbcset, &char_class_alloc,
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
				      (const char *) start_elem.opr.name,
Packit Service fdd496
				      syntax);
Packit Service fdd496
	      if (BE (*err != REG_NOERROR, 0))
Packit Service fdd496
	       goto parse_bracket_exp_free_return;
Packit Service fdd496
	      break;
Packit Service fdd496
	    default:
Packit Service fdd496
	      assert (0);
Packit Service fdd496
	      break;
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
      if (BE (token->type == END_OF_RE, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  *err = REG_EBRACK;
Packit Service fdd496
	  goto parse_bracket_exp_free_return;
Packit Service fdd496
	}
Packit Service fdd496
      if (token->type == OP_CLOSE_BRACKET)
Packit Service fdd496
	break;
Packit Service fdd496
    }
Packit Service fdd496
Packit Service fdd496
  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
Packit Service fdd496
Packit Service fdd496
  /* If it is non-matching list.  */
Packit Service fdd496
  if (non_match)
Packit Service fdd496
    bitset_not (sbcset);
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  /* Ensure only single byte characters are set.  */
Packit Service fdd496
  if (dfa->mb_cur_max > 1)
Packit Service fdd496
    bitset_mask (sbcset, dfa->sb_char);
Packit Service fdd496
Packit Service fdd496
  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
Packit Service fdd496
      || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
Packit Service fdd496
						     || mbcset->non_match)))
Packit Service fdd496
    {
Packit Service fdd496
      bin_tree_t *mbc_tree;
Packit Service fdd496
      int sbc_idx;
Packit Service fdd496
      /* Build a tree for complex bracket.  */
Packit Service fdd496
      dfa->has_mb_node = 1;
Packit Service fdd496
      br_token.type = COMPLEX_BRACKET;
Packit Service fdd496
      br_token.opr.mbcset = mbcset;
Packit Service fdd496
      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit Service fdd496
      if (BE (mbc_tree == NULL, 0))
Packit Service fdd496
	goto parse_bracket_exp_espace;
Packit Service fdd496
      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
Packit Service fdd496
	if (sbcset[sbc_idx])
Packit Service fdd496
	  break;
Packit Service fdd496
      /* If there are no bits set in sbcset, there is no point
Packit Service fdd496
	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
Packit Service fdd496
      if (sbc_idx < BITSET_WORDS)
Packit Service fdd496
	{
Packit Service fdd496
	  /* Build a tree for simple bracket.  */
Packit Service fdd496
	  br_token.type = SIMPLE_BRACKET;
Packit Service fdd496
	  br_token.opr.sbcset = sbcset;
Packit Service fdd496
	  work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit Service fdd496
	  if (BE (work_tree == NULL, 0))
Packit Service fdd496
	    goto parse_bracket_exp_espace;
Packit Service fdd496
Packit Service fdd496
	  /* Then join them by ALT node.  */
Packit Service fdd496
	  work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
Packit Service fdd496
	  if (BE (work_tree == NULL, 0))
Packit Service fdd496
	    goto parse_bracket_exp_espace;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  re_free (sbcset);
Packit Service fdd496
	  work_tree = mbc_tree;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
    {
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      free_charset (mbcset);
Packit Service fdd496
#endif
Packit Service fdd496
      /* Build a tree for simple bracket.  */
Packit Service fdd496
      br_token.type = SIMPLE_BRACKET;
Packit Service fdd496
      br_token.opr.sbcset = sbcset;
Packit Service fdd496
      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit Service fdd496
      if (BE (work_tree == NULL, 0))
Packit Service fdd496
	goto parse_bracket_exp_espace;
Packit Service fdd496
    }
Packit Service fdd496
  return work_tree;
Packit Service fdd496
Packit Service fdd496
 parse_bracket_exp_espace:
Packit Service fdd496
  *err = REG_ESPACE;
Packit Service fdd496
 parse_bracket_exp_free_return:
Packit Service fdd496
  re_free (sbcset);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  free_charset (mbcset);
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
  return NULL;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Parse an element in the bracket expression.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
Packit Service fdd496
		       re_token_t *token, int token_len, re_dfa_t *dfa,
Packit Service fdd496
		       reg_syntax_t syntax, bool accept_hyphen)
Packit Service fdd496
{
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  int cur_char_size;
Packit Service fdd496
  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
Packit Service fdd496
  if (cur_char_size > 1)
Packit Service fdd496
    {
Packit Service fdd496
      elem->type = MB_CHAR;
Packit Service fdd496
      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
Packit Service fdd496
      re_string_skip_bytes (regexp, cur_char_size);
Packit Service fdd496
      return REG_NOERROR;
Packit Service fdd496
    }
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
Packit Service fdd496
  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
Packit Service fdd496
      || token->type == OP_OPEN_EQUIV_CLASS)
Packit Service fdd496
    return parse_bracket_symbol (elem, regexp, token);
Packit Service fdd496
  if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
Packit Service fdd496
    {
Packit Service fdd496
      /* A '-' must only appear as anything but a range indicator before
Packit Service fdd496
	 the closing bracket.  Everything else is an error.  */
Packit Service fdd496
      re_token_t token2;
Packit Service fdd496
      (void) peek_token_bracket (&token2, regexp, syntax);
Packit Service fdd496
      if (token2.type != OP_CLOSE_BRACKET)
Packit Service fdd496
	/* The actual error value is not standardized since this whole
Packit Service fdd496
	   case is undefined.  But ERANGE makes good sense.  */
Packit Service fdd496
	return REG_ERANGE;
Packit Service fdd496
    }
Packit Service fdd496
  elem->type = SB_CHAR;
Packit Service fdd496
  elem->opr.ch = token->opr.c;
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
Packit Service fdd496
   such as [:<character_class>:], [.<collating_element>.], and
Packit Service fdd496
   [=<equivalent_class>=].  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
Packit Service fdd496
		      re_token_t *token)
Packit Service fdd496
{
Packit Service fdd496
  unsigned char ch, delim = token->opr.c;
Packit Service fdd496
  int i = 0;
Packit Service fdd496
  if (re_string_eoi(regexp))
Packit Service fdd496
    return REG_EBRACK;
Packit Service fdd496
  for (;; ++i)
Packit Service fdd496
    {
Packit Service fdd496
      if (i >= BRACKET_NAME_BUF_SIZE)
Packit Service fdd496
	return REG_EBRACK;
Packit Service fdd496
      if (token->type == OP_OPEN_CHAR_CLASS)
Packit Service fdd496
	ch = re_string_fetch_byte_case (regexp);
Packit Service fdd496
      else
Packit Service fdd496
	ch = re_string_fetch_byte (regexp);
Packit Service fdd496
      if (re_string_eoi(regexp))
Packit Service fdd496
	return REG_EBRACK;
Packit Service fdd496
      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
Packit Service fdd496
	break;
Packit Service fdd496
      elem->opr.name[i] = ch;
Packit Service fdd496
    }
Packit Service fdd496
  re_string_skip_bytes (regexp, 1);
Packit Service fdd496
  elem->opr.name[i] = '\0';
Packit Service fdd496
  switch (token->type)
Packit Service fdd496
    {
Packit Service fdd496
    case OP_OPEN_COLL_ELEM:
Packit Service fdd496
      elem->type = COLL_SYM;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_OPEN_EQUIV_CLASS:
Packit Service fdd496
      elem->type = EQUIV_CLASS;
Packit Service fdd496
      break;
Packit Service fdd496
    case OP_OPEN_CHAR_CLASS:
Packit Service fdd496
      elem->type = CHAR_CLASS;
Packit Service fdd496
      break;
Packit Service fdd496
    default:
Packit Service fdd496
      break;
Packit Service fdd496
    }
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
  /* Helper function for parse_bracket_exp.
Packit Service fdd496
     Build the equivalence class which is represented by NAME.
Packit Service fdd496
     The result are written to MBCSET and SBCSET.
Packit Service fdd496
     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
Packit Service fdd496
     is a pointer argument since we may update it.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
Packit Service fdd496
		   Idx *equiv_class_alloc, const unsigned char *name)
Packit Service fdd496
#else /* not RE_ENABLE_I18N */
Packit Service fdd496
build_equiv_class (bitset_t sbcset, const unsigned char *name)
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
{
Packit Service fdd496
#ifdef _LIBC
Packit Service fdd496
  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
Packit Service fdd496
  if (nrules != 0)
Packit Service fdd496
    {
Packit Service fdd496
      const int32_t *table, *indirect;
Packit Service fdd496
      const unsigned char *weights, *extra, *cp;
Packit Service fdd496
      unsigned char char_buf[2];
Packit Service fdd496
      int32_t idx1, idx2;
Packit Service fdd496
      unsigned int ch;
Packit Service fdd496
      size_t len;
Packit Service fdd496
      /* Calculate the index for equivalence class.  */
Packit Service fdd496
      cp = name;
Packit Service fdd496
      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
Packit Service fdd496
      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
Packit Service fdd496
					       _NL_COLLATE_WEIGHTMB);
Packit Service fdd496
      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
Packit Service fdd496
						   _NL_COLLATE_EXTRAMB);
Packit Service fdd496
      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
Packit Service fdd496
						_NL_COLLATE_INDIRECTMB);
Packit Service fdd496
      idx1 = findidx (table, indirect, extra, &cp, -1);
Packit Service fdd496
      if (BE (idx1 == 0 || *cp != '\0', 0))
Packit Service fdd496
	/* This isn't a valid character.  */
Packit Service fdd496
	return REG_ECOLLATE;
Packit Service fdd496
Packit Service fdd496
      /* Build single byte matching table for this equivalence class.  */
Packit Service fdd496
      len = weights[idx1 & 0xffffff];
Packit Service fdd496
      for (ch = 0; ch < SBC_MAX; ++ch)
Packit Service fdd496
	{
Packit Service fdd496
	  char_buf[0] = ch;
Packit Service fdd496
	  cp = char_buf;
Packit Service fdd496
	  idx2 = findidx (table, indirect, extra, &cp, 1);
Packit Service fdd496
/*
Packit Service fdd496
	  idx2 = table[ch];
Packit Service fdd496
*/
Packit Service fdd496
	  if (idx2 == 0)
Packit Service fdd496
	    /* This isn't a valid character.  */
Packit Service fdd496
	    continue;
Packit Service fdd496
	  /* Compare only if the length matches and the collation rule
Packit Service fdd496
	     index is the same.  */
Packit Service fdd496
	  if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
Packit Service fdd496
	    {
Packit Service fdd496
	      int cnt = 0;
Packit Service fdd496
Packit Service fdd496
	      while (cnt <= len &&
Packit Service fdd496
		     weights[(idx1 & 0xffffff) + 1 + cnt]
Packit Service fdd496
		     == weights[(idx2 & 0xffffff) + 1 + cnt])
Packit Service fdd496
		++cnt;
Packit Service fdd496
Packit Service fdd496
	      if (cnt > len)
Packit Service fdd496
		bitset_set (sbcset, ch);
Packit Service fdd496
	    }
Packit Service fdd496
	}
Packit Service fdd496
      /* Check whether the array has enough space.  */
Packit Service fdd496
      if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
Packit Service fdd496
	{
Packit Service fdd496
	  /* Not enough, realloc it.  */
Packit Service fdd496
	  /* +1 in case of mbcset->nequiv_classes is 0.  */
Packit Service fdd496
	  Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
Packit Service fdd496
	  /* Use realloc since the array is NULL if *alloc == 0.  */
Packit Service fdd496
	  int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
Packit Service fdd496
						   int32_t,
Packit Service fdd496
						   new_equiv_class_alloc);
Packit Service fdd496
	  if (BE (new_equiv_classes == NULL, 0))
Packit Service fdd496
	    return REG_ESPACE;
Packit Service fdd496
	  mbcset->equiv_classes = new_equiv_classes;
Packit Service fdd496
	  *equiv_class_alloc = new_equiv_class_alloc;
Packit Service fdd496
	}
Packit Service fdd496
      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
#endif /* _LIBC */
Packit Service fdd496
    {
Packit Service fdd496
      if (BE (strlen ((const char *) name) != 1, 0))
Packit Service fdd496
	return REG_ECOLLATE;
Packit Service fdd496
      bitset_set (sbcset, *name);
Packit Service fdd496
    }
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
  /* Helper function for parse_bracket_exp.
Packit Service fdd496
     Build the character class which is represented by NAME.
Packit Service fdd496
     The result are written to MBCSET and SBCSET.
Packit Service fdd496
     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
Packit Service fdd496
     is a pointer argument since we may update it.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
Packit Service fdd496
		 re_charset_t *mbcset, Idx *char_class_alloc,
Packit Service fdd496
		 const char *class_name, reg_syntax_t syntax)
Packit Service fdd496
#else /* not RE_ENABLE_I18N */
Packit Service fdd496
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
Packit Service fdd496
		 const char *class_name, reg_syntax_t syntax)
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
{
Packit Service fdd496
  int i;
Packit Service fdd496
  const char *name = class_name;
Packit Service fdd496
Packit Service fdd496
  /* In case of REG_ICASE "upper" and "lower" match the both of
Packit Service fdd496
     upper and lower cases.  */
Packit Service fdd496
  if ((syntax & RE_ICASE)
Packit Service fdd496
      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
Packit Service fdd496
    name = "alpha";
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  /* Check the space of the arrays.  */
Packit Service fdd496
  if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
Packit Service fdd496
    {
Packit Service fdd496
      /* Not enough, realloc it.  */
Packit Service fdd496
      /* +1 in case of mbcset->nchar_classes is 0.  */
Packit Service fdd496
      Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
Packit Service fdd496
      /* Use realloc since array is NULL if *alloc == 0.  */
Packit Service fdd496
      wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
Packit Service fdd496
					       new_char_class_alloc);
Packit Service fdd496
      if (BE (new_char_classes == NULL, 0))
Packit Service fdd496
	return REG_ESPACE;
Packit Service fdd496
      mbcset->char_classes = new_char_classes;
Packit Service fdd496
      *char_class_alloc = new_char_class_alloc;
Packit Service fdd496
    }
Packit Service fdd496
  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
Packit Service fdd496
#define BUILD_CHARCLASS_LOOP(ctype_func)	\
Packit Service fdd496
  do {						\
Packit Service fdd496
    if (BE (trans != NULL, 0))			\
Packit Service fdd496
      {						\
Packit Service fdd496
	for (i = 0; i < SBC_MAX; ++i)		\
Packit Service fdd496
	  if (ctype_func (i))			\
Packit Service fdd496
	    bitset_set (sbcset, trans[i]);	\
Packit Service fdd496
      }						\
Packit Service fdd496
    else					\
Packit Service fdd496
      {						\
Packit Service fdd496
	for (i = 0; i < SBC_MAX; ++i)		\
Packit Service fdd496
	  if (ctype_func (i))			\
Packit Service fdd496
	    bitset_set (sbcset, i);		\
Packit Service fdd496
      }						\
Packit Service fdd496
  } while (0)
Packit Service fdd496
Packit Service fdd496
  if (strcmp (name, "alnum") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isalnum);
Packit Service fdd496
  else if (strcmp (name, "cntrl") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (iscntrl);
Packit Service fdd496
  else if (strcmp (name, "lower") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (islower);
Packit Service fdd496
  else if (strcmp (name, "space") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isspace);
Packit Service fdd496
  else if (strcmp (name, "alpha") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isalpha);
Packit Service fdd496
  else if (strcmp (name, "digit") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isdigit);
Packit Service fdd496
  else if (strcmp (name, "print") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isprint);
Packit Service fdd496
  else if (strcmp (name, "upper") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isupper);
Packit Service fdd496
  else if (strcmp (name, "blank") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isblank);
Packit Service fdd496
  else if (strcmp (name, "graph") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isgraph);
Packit Service fdd496
  else if (strcmp (name, "punct") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (ispunct);
Packit Service fdd496
  else if (strcmp (name, "xdigit") == 0)
Packit Service fdd496
    BUILD_CHARCLASS_LOOP (isxdigit);
Packit Service fdd496
  else
Packit Service fdd496
    return REG_ECTYPE;
Packit Service fdd496
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
Packit Service fdd496
		    const char *class_name,
Packit Service fdd496
		    const char *extra, bool non_match,
Packit Service fdd496
		    reg_errcode_t *err)
Packit Service fdd496
{
Packit Service fdd496
  re_bitset_ptr_t sbcset;
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  re_charset_t *mbcset;
Packit Service fdd496
  Idx alloc = 0;
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
  reg_errcode_t ret;
Packit Service fdd496
  re_token_t br_token;
Packit Service fdd496
  bin_tree_t *tree;
Packit Service fdd496
Packit Service fdd496
  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
Packit Service fdd496
  if (BE (sbcset == NULL, 0))
Packit Service fdd496
    {
Packit Service fdd496
      *err = REG_ESPACE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
Packit Service fdd496
  if (BE (mbcset == NULL, 0))
Packit Service fdd496
    {
Packit Service fdd496
      re_free (sbcset);
Packit Service fdd496
      *err = REG_ESPACE;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
  mbcset->non_match = non_match;
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
Packit Service fdd496
  /* We don't care the syntax in this case.  */
Packit Service fdd496
  ret = build_charclass (trans, sbcset,
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
			 mbcset, &alloc,
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
			 class_name, 0);
Packit Service fdd496
Packit Service fdd496
  if (BE (ret != REG_NOERROR, 0))
Packit Service fdd496
    {
Packit Service fdd496
      re_free (sbcset);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
      free_charset (mbcset);
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
      *err = ret;
Packit Service fdd496
      return NULL;
Packit Service fdd496
    }
Packit Service fdd496
  /* \w match '_' also.  */
Packit Service fdd496
  for (; *extra; extra++)
Packit Service fdd496
    bitset_set (sbcset, *extra);
Packit Service fdd496
Packit Service fdd496
  /* If it is non-matching list.  */
Packit Service fdd496
  if (non_match)
Packit Service fdd496
    bitset_not (sbcset);
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  /* Ensure only single byte characters are set.  */
Packit Service fdd496
  if (dfa->mb_cur_max > 1)
Packit Service fdd496
    bitset_mask (sbcset, dfa->sb_char);
Packit Service fdd496
#endif
Packit Service fdd496
Packit Service fdd496
  /* Build a tree for simple bracket.  */
Packit Service fdd496
#if defined GCC_LINT || defined lint
Packit Service fdd496
  memset (&br_token, 0, sizeof br_token);
Packit Service fdd496
#endif
Packit Service fdd496
  br_token.type = SIMPLE_BRACKET;
Packit Service fdd496
  br_token.opr.sbcset = sbcset;
Packit Service fdd496
  tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit Service fdd496
  if (BE (tree == NULL, 0))
Packit Service fdd496
    goto build_word_op_espace;
Packit Service fdd496
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (dfa->mb_cur_max > 1)
Packit Service fdd496
    {
Packit Service fdd496
      bin_tree_t *mbc_tree;
Packit Service fdd496
      /* Build a tree for complex bracket.  */
Packit Service fdd496
      br_token.type = COMPLEX_BRACKET;
Packit Service fdd496
      br_token.opr.mbcset = mbcset;
Packit Service fdd496
      dfa->has_mb_node = 1;
Packit Service fdd496
      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit Service fdd496
      if (BE (mbc_tree == NULL, 0))
Packit Service fdd496
	goto build_word_op_espace;
Packit Service fdd496
      /* Then join them by ALT node.  */
Packit Service fdd496
      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
Packit Service fdd496
      if (BE (mbc_tree != NULL, 1))
Packit Service fdd496
	return tree;
Packit Service fdd496
    }
Packit Service fdd496
  else
Packit Service fdd496
    {
Packit Service fdd496
      free_charset (mbcset);
Packit Service fdd496
      return tree;
Packit Service fdd496
    }
Packit Service fdd496
#else /* not RE_ENABLE_I18N */
Packit Service fdd496
  return tree;
Packit Service fdd496
#endif /* not RE_ENABLE_I18N */
Packit Service fdd496
Packit Service fdd496
 build_word_op_espace:
Packit Service fdd496
  re_free (sbcset);
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  free_charset (mbcset);
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
  *err = REG_ESPACE;
Packit Service fdd496
  return NULL;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* This is intended for the expressions like "a{1,3}".
Packit Service fdd496
   Fetch a number from 'input', and return the number.
Packit Service fdd496
   Return -1 if the number field is empty like "{,1}".
Packit Service fdd496
   Return RE_DUP_MAX + 1 if the number field is too large.
Packit Service fdd496
   Return -2 if an error occurred.  */
Packit Service fdd496
Packit Service fdd496
static Idx
Packit Service fdd496
fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
Packit Service fdd496
{
Packit Service fdd496
  Idx num = -1;
Packit Service fdd496
  unsigned char c;
Packit Service fdd496
  while (1)
Packit Service fdd496
    {
Packit Service fdd496
      fetch_token (token, input, syntax);
Packit Service fdd496
      c = token->opr.c;
Packit Service fdd496
      if (BE (token->type == END_OF_RE, 0))
Packit Service fdd496
	return -2;
Packit Service fdd496
      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
Packit Service fdd496
	break;
Packit Service fdd496
      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
Packit Service fdd496
	     ? -2
Packit Service fdd496
	     : num == -1
Packit Service fdd496
	     ? c - '0'
Packit Service fdd496
	     : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
Packit Service fdd496
    }
Packit Service fdd496
  return num;
Packit Service fdd496
}
Packit Service fdd496

Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
static void
Packit Service fdd496
free_charset (re_charset_t *cset)
Packit Service fdd496
{
Packit Service fdd496
  re_free (cset->mbchars);
Packit Service fdd496
# ifdef _LIBC
Packit Service fdd496
  re_free (cset->coll_syms);
Packit Service fdd496
  re_free (cset->equiv_classes);
Packit Service fdd496
  re_free (cset->range_starts);
Packit Service fdd496
  re_free (cset->range_ends);
Packit Service fdd496
# endif
Packit Service fdd496
  re_free (cset->char_classes);
Packit Service fdd496
  re_free (cset);
Packit Service fdd496
}
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496

Packit Service fdd496
/* Functions for binary tree operation.  */
Packit Service fdd496
Packit Service fdd496
/* Create a tree node.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
Packit Service fdd496
	     re_token_type_t type)
Packit Service fdd496
{
Packit Service fdd496
  re_token_t t;
Packit Service fdd496
#if defined GCC_LINT || defined lint
Packit Service fdd496
  memset (&t, 0, sizeof t);
Packit Service fdd496
#endif
Packit Service fdd496
  t.type = type;
Packit Service fdd496
  return create_token_tree (dfa, left, right, &t);
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
Packit Service fdd496
		   const re_token_t *token)
Packit Service fdd496
{
Packit Service fdd496
  bin_tree_t *tree;
Packit Service fdd496
  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
Packit Service fdd496
    {
Packit Service fdd496
      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
Packit Service fdd496
Packit Service fdd496
      if (storage == NULL)
Packit Service fdd496
	return NULL;
Packit Service fdd496
      storage->next = dfa->str_tree_storage;
Packit Service fdd496
      dfa->str_tree_storage = storage;
Packit Service fdd496
      dfa->str_tree_storage_idx = 0;
Packit Service fdd496
    }
Packit Service fdd496
  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
Packit Service fdd496
Packit Service fdd496
  tree->parent = NULL;
Packit Service fdd496
  tree->left = left;
Packit Service fdd496
  tree->right = right;
Packit Service fdd496
  tree->token = *token;
Packit Service fdd496
  tree->token.duplicated = 0;
Packit Service fdd496
  tree->token.opt_subexp = 0;
Packit Service fdd496
  tree->first = NULL;
Packit Service fdd496
  tree->next = NULL;
Packit Service fdd496
  tree->node_idx = -1;
Packit Service fdd496
Packit Service fdd496
  if (left != NULL)
Packit Service fdd496
    left->parent = tree;
Packit Service fdd496
  if (right != NULL)
Packit Service fdd496
    right->parent = tree;
Packit Service fdd496
  return tree;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Mark the tree SRC as an optional subexpression.
Packit Service fdd496
   To be called from preorder or postorder.  */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
mark_opt_subexp (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  Idx idx = (uintptr_t) extra;
Packit Service fdd496
  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
Packit Service fdd496
    node->token.opt_subexp = 1;
Packit Service fdd496
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Free the allocated memory inside NODE. */
Packit Service fdd496
Packit Service fdd496
static void
Packit Service fdd496
free_token (re_token_t *node)
Packit Service fdd496
{
Packit Service fdd496
#ifdef RE_ENABLE_I18N
Packit Service fdd496
  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
Packit Service fdd496
    free_charset (node->opr.mbcset);
Packit Service fdd496
  else
Packit Service fdd496
#endif /* RE_ENABLE_I18N */
Packit Service fdd496
    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
Packit Service fdd496
      re_free (node->opr.sbcset);
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
/* Worker function for tree walking.  Free the allocated memory inside NODE
Packit Service fdd496
   and its children. */
Packit Service fdd496
Packit Service fdd496
static reg_errcode_t
Packit Service fdd496
free_tree (void *extra, bin_tree_t *node)
Packit Service fdd496
{
Packit Service fdd496
  free_token (&node->token);
Packit Service fdd496
  return REG_NOERROR;
Packit Service fdd496
}
Packit Service fdd496
Packit Service fdd496
Packit Service fdd496
/* Duplicate the node SRC, and return new node.  This is a preorder
Packit Service fdd496
   visit similar to the one implemented by the generic visitor, but
Packit Service fdd496
   we need more infrastructure to maintain two parallel trees --- so,
Packit Service fdd496
   it's easier to duplicate.  */
Packit Service fdd496
Packit Service fdd496
static bin_tree_t *
Packit Service fdd496
duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
Packit Service fdd496
{
Packit Service fdd496
  const bin_tree_t *node;
Packit Service fdd496
  bin_tree_t *dup_root;
Packit Service fdd496
  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
Packit Service fdd496
Packit Service fdd496
  for (node = root; ; )
Packit Service fdd496
    {
Packit Service fdd496
      /* Create a new tree and link it back to the current parent.  */
Packit Service fdd496
      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
Packit Service fdd496
      if (*p_new == NULL)
Packit Service fdd496
	return NULL;
Packit Service fdd496
      (*p_new)->parent = dup_node;
Packit Service fdd496
      (*p_new)->token.duplicated = 1;
Packit Service fdd496
      dup_node = *p_new;
Packit Service fdd496
Packit Service fdd496
      /* Go to the left node, or up and to the right.  */
Packit Service fdd496
      if (node->left)
Packit Service fdd496
	{
Packit Service fdd496
	  node = node->left;
Packit Service fdd496
	  p_new = &dup_node->left;
Packit Service fdd496
	}
Packit Service fdd496
      else
Packit Service fdd496
	{
Packit Service fdd496
	  const bin_tree_t *prev = NULL;
Packit Service fdd496
	  while (node->right == prev || node->right == NULL)
Packit Service fdd496
	    {
Packit Service fdd496
	      prev = node;
Packit Service fdd496
	      node = node->parent;
Packit Service fdd496
	      dup_node = dup_node->parent;
Packit Service fdd496
	      if (!node)
Packit Service fdd496
		return dup_root;
Packit Service fdd496
	    }
Packit Service fdd496
	  node = node->right;
Packit Service fdd496
	  p_new = &dup_node->right;
Packit Service fdd496
	}
Packit Service fdd496
    }
Packit Service fdd496
}