Blame lib/regcomp.c

Packit 709fb3
/* Extended regular expression matching and search library.
Packit 709fb3
   Copyright (C) 2002-2017 Free Software Foundation, Inc.
Packit 709fb3
   This file is part of the GNU C Library.
Packit 709fb3
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Packit 709fb3
Packit 709fb3
   The GNU C Library is free software; you can redistribute it and/or
Packit 709fb3
   modify it under the terms of the GNU General Public
Packit 709fb3
   License as published by the Free Software Foundation; either
Packit 709fb3
   version 3 of the License, or (at your option) any later version.
Packit 709fb3
Packit 709fb3
   The GNU C Library is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 709fb3
   General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public
Packit 709fb3
   License along with the GNU C Library; if not, see
Packit 709fb3
   <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
# include <locale/weight.h>
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
Packit 709fb3
					  size_t length, reg_syntax_t syntax);
Packit 709fb3
static void re_compile_fastmap_iter (regex_t *bufp,
Packit 709fb3
				     const re_dfastate_t *init_state,
Packit 709fb3
				     char *fastmap);
Packit 709fb3
static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
static void free_charset (re_charset_t *cset);
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
static void free_workarea_compile (regex_t *preg);
Packit 709fb3
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
static void optimize_utf8 (re_dfa_t *dfa);
Packit 709fb3
#endif
Packit 709fb3
static reg_errcode_t analyze (regex_t *preg);
Packit 709fb3
static reg_errcode_t preorder (bin_tree_t *root,
Packit 709fb3
			       reg_errcode_t (fn (void *, bin_tree_t *)),
Packit 709fb3
			       void *extra);
Packit 709fb3
static reg_errcode_t postorder (bin_tree_t *root,
Packit 709fb3
				reg_errcode_t (fn (void *, bin_tree_t *)),
Packit 709fb3
				void *extra);
Packit 709fb3
static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
Packit 709fb3
static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
Packit 709fb3
static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
Packit 709fb3
				 bin_tree_t *node);
Packit 709fb3
static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
Packit 709fb3
static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
Packit 709fb3
static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
Packit 709fb3
static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
Packit 709fb3
static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
Packit 709fb3
				   unsigned int constraint);
Packit 709fb3
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
Packit 709fb3
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
Packit 709fb3
					 Idx node, bool root);
Packit 709fb3
static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
Packit 709fb3
static Idx fetch_number (re_string_t *input, re_token_t *token,
Packit 709fb3
			 reg_syntax_t syntax);
Packit 709fb3
static int peek_token (re_token_t *token, re_string_t *input,
Packit 709fb3
			reg_syntax_t syntax) internal_function;
Packit 709fb3
static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
Packit 709fb3
			  reg_syntax_t syntax, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
Packit 709fb3
				  re_token_t *token, reg_syntax_t syntax,
Packit 709fb3
				  Idx nest, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
Packit 709fb3
				 re_token_t *token, reg_syntax_t syntax,
Packit 709fb3
				 Idx nest, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
Packit 709fb3
				     re_token_t *token, reg_syntax_t syntax,
Packit 709fb3
				     Idx nest, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
Packit 709fb3
				  re_token_t *token, reg_syntax_t syntax,
Packit 709fb3
				  Idx nest, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
Packit 709fb3
				 re_dfa_t *dfa, re_token_t *token,
Packit 709fb3
				 reg_syntax_t syntax, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
Packit 709fb3
				      re_token_t *token, reg_syntax_t syntax,
Packit 709fb3
				      reg_errcode_t *err);
Packit 709fb3
static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
Packit 709fb3
					    re_string_t *regexp,
Packit 709fb3
					    re_token_t *token, int token_len,
Packit 709fb3
					    re_dfa_t *dfa,
Packit 709fb3
					    reg_syntax_t syntax,
Packit 709fb3
					    bool accept_hyphen);
Packit 709fb3
static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
Packit 709fb3
					  re_string_t *regexp,
Packit 709fb3
					  re_token_t *token);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
static reg_errcode_t build_equiv_class (bitset_t sbcset,
Packit 709fb3
					re_charset_t *mbcset,
Packit 709fb3
					Idx *equiv_class_alloc,
Packit 709fb3
					const unsigned char *name);
Packit 709fb3
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
Packit 709fb3
				      bitset_t sbcset,
Packit 709fb3
				      re_charset_t *mbcset,
Packit 709fb3
				      Idx *char_class_alloc,
Packit 709fb3
				      const char *class_name,
Packit 709fb3
				      reg_syntax_t syntax);
Packit 709fb3
#else  /* not RE_ENABLE_I18N */
Packit 709fb3
static reg_errcode_t build_equiv_class (bitset_t sbcset,
Packit 709fb3
					const unsigned char *name);
Packit 709fb3
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
Packit 709fb3
				      bitset_t sbcset,
Packit 709fb3
				      const char *class_name,
Packit 709fb3
				      reg_syntax_t syntax);
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
Packit 709fb3
				       RE_TRANSLATE_TYPE trans,
Packit 709fb3
				       const char *class_name,
Packit 709fb3
				       const char *extra,
Packit 709fb3
				       bool non_match, reg_errcode_t *err);
Packit 709fb3
static bin_tree_t *create_tree (re_dfa_t *dfa,
Packit 709fb3
				bin_tree_t *left, bin_tree_t *right,
Packit 709fb3
				re_token_type_t type);
Packit 709fb3
static bin_tree_t *create_token_tree (re_dfa_t *dfa,
Packit 709fb3
				      bin_tree_t *left, bin_tree_t *right,
Packit 709fb3
				      const re_token_t *token);
Packit 709fb3
static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
Packit 709fb3
static void free_token (re_token_t *node);
Packit 709fb3
static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
Packit 709fb3
static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
Packit 709fb3

Packit 709fb3
/* This table gives an error message for each of the error codes listed
Packit 709fb3
   in regex.h.  Obviously the order here has to be same as there.
Packit 709fb3
   POSIX doesn't require that we do anything for REG_NOERROR,
Packit 709fb3
   but why not be nice?  */
Packit 709fb3
Packit 709fb3
static const char __re_error_msgid[] =
Packit 709fb3
  {
Packit 709fb3
#define REG_NOERROR_IDX	0
Packit 709fb3
    gettext_noop ("Success")	/* REG_NOERROR */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
Packit 709fb3
    gettext_noop ("No match")	/* REG_NOMATCH */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
Packit 709fb3
    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
Packit 709fb3
    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
Packit 709fb3
    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
Packit 709fb3
    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
Packit 709fb3
    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
Packit 709fb3
    gettext_noop ("Unmatched [, [^, [:, [., or [=")	/* REG_EBRACK */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=")
Packit 709fb3
    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
Packit 709fb3
    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
Packit 709fb3
    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
Packit 709fb3
    gettext_noop ("Invalid range end")	/* REG_ERANGE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
Packit 709fb3
    gettext_noop ("Memory exhausted") /* REG_ESPACE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
Packit 709fb3
    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
Packit 709fb3
    gettext_noop ("Premature end of regular expression") /* REG_EEND */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
Packit 709fb3
    gettext_noop ("Regular expression too big") /* REG_ESIZE */
Packit 709fb3
    "\0"
Packit 709fb3
#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
Packit 709fb3
    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
Packit 709fb3
  };
Packit 709fb3
Packit 709fb3
static const size_t __re_error_msgid_idx[] =
Packit 709fb3
  {
Packit 709fb3
    REG_NOERROR_IDX,
Packit 709fb3
    REG_NOMATCH_IDX,
Packit 709fb3
    REG_BADPAT_IDX,
Packit 709fb3
    REG_ECOLLATE_IDX,
Packit 709fb3
    REG_ECTYPE_IDX,
Packit 709fb3
    REG_EESCAPE_IDX,
Packit 709fb3
    REG_ESUBREG_IDX,
Packit 709fb3
    REG_EBRACK_IDX,
Packit 709fb3
    REG_EPAREN_IDX,
Packit 709fb3
    REG_EBRACE_IDX,
Packit 709fb3
    REG_BADBR_IDX,
Packit 709fb3
    REG_ERANGE_IDX,
Packit 709fb3
    REG_ESPACE_IDX,
Packit 709fb3
    REG_BADRPT_IDX,
Packit 709fb3
    REG_EEND_IDX,
Packit 709fb3
    REG_ESIZE_IDX,
Packit 709fb3
    REG_ERPAREN_IDX
Packit 709fb3
  };
Packit 709fb3

Packit 709fb3
/* Entry points for GNU code.  */
Packit 709fb3
Packit 709fb3
/* re_compile_pattern is the GNU regular expression compiler: it
Packit 709fb3
   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
Packit 709fb3
   Returns 0 if the pattern was valid, otherwise an error string.
Packit 709fb3
Packit 709fb3
   Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
Packit 709fb3
   are set in BUFP on entry.  */
Packit 709fb3
Packit 709fb3
const char *
Packit 709fb3
re_compile_pattern (const char *pattern, size_t length,
Packit 709fb3
		    struct re_pattern_buffer *bufp)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
Packit 709fb3
  /* And GNU code determines whether or not to get register information
Packit 709fb3
     by passing null for the REGS argument to re_match, etc., not by
Packit 709fb3
     setting no_sub, unless RE_NO_SUB is set.  */
Packit 709fb3
  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
Packit 709fb3
Packit 709fb3
  /* Match anchors at newline.  */
Packit 709fb3
  bufp->newline_anchor = 1;
Packit 709fb3
Packit 709fb3
  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
Packit 709fb3
Packit 709fb3
  if (!ret)
Packit 709fb3
    return NULL;
Packit 709fb3
  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
Packit 709fb3
}
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
weak_alias (__re_compile_pattern, re_compile_pattern)
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
/* Set by 're_set_syntax' to the current regexp syntax to recognize.  Can
Packit 709fb3
   also be assigned to arbitrarily: each pattern buffer stores its own
Packit 709fb3
   syntax, so it can be changed between regex compilations.  */
Packit 709fb3
/* This has no initializer because initialized variables in Emacs
Packit 709fb3
   become read-only after dumping.  */
Packit 709fb3
reg_syntax_t re_syntax_options;
Packit 709fb3
Packit 709fb3
Packit 709fb3
/* Specify the precise syntax of regexps for compilation.  This provides
Packit 709fb3
   for compatibility for various utilities which historically have
Packit 709fb3
   different, incompatible syntaxes.
Packit 709fb3
Packit 709fb3
   The argument SYNTAX is a bit mask comprised of the various bits
Packit 709fb3
   defined in regex.h.  We return the old syntax.  */
Packit 709fb3
Packit 709fb3
reg_syntax_t
Packit 709fb3
re_set_syntax (reg_syntax_t syntax)
Packit 709fb3
{
Packit 709fb3
  reg_syntax_t ret = re_syntax_options;
Packit 709fb3
Packit 709fb3
  re_syntax_options = syntax;
Packit 709fb3
  return ret;
Packit 709fb3
}
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
weak_alias (__re_set_syntax, re_set_syntax)
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
int
Packit 709fb3
re_compile_fastmap (struct re_pattern_buffer *bufp)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = bufp->buffer;
Packit 709fb3
  char *fastmap = bufp->fastmap;
Packit 709fb3
Packit 709fb3
  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
Packit 709fb3
  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
Packit 709fb3
  if (dfa->init_state != dfa->init_state_word)
Packit 709fb3
    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
Packit 709fb3
  if (dfa->init_state != dfa->init_state_nl)
Packit 709fb3
    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
Packit 709fb3
  if (dfa->init_state != dfa->init_state_begbuf)
Packit 709fb3
    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
Packit 709fb3
  bufp->fastmap_accurate = 1;
Packit 709fb3
  return 0;
Packit 709fb3
}
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
weak_alias (__re_compile_fastmap, re_compile_fastmap)
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
static inline void
Packit 709fb3
__attribute__ ((always_inline))
Packit 709fb3
re_set_fastmap (char *fastmap, bool icase, int ch)
Packit 709fb3
{
Packit 709fb3
  fastmap[ch] = 1;
Packit 709fb3
  if (icase)
Packit 709fb3
    fastmap[tolower (ch)] = 1;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Helper function for re_compile_fastmap.
Packit 709fb3
   Compile fastmap for the initial_state INIT_STATE.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
Packit 709fb3
			 char *fastmap)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = bufp->buffer;
Packit 709fb3
  Idx node_cnt;
Packit 709fb3
  bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
Packit 709fb3
  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
Packit 709fb3
    {
Packit 709fb3
      Idx node = init_state->nodes.elems[node_cnt];
Packit 709fb3
      re_token_type_t type = dfa->nodes[node].type;
Packit 709fb3
Packit 709fb3
      if (type == CHARACTER)
Packit 709fb3
	{
Packit 709fb3
	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
Packit 709fb3
	    {
Packit 709fb3
	      unsigned char buf[MB_LEN_MAX];
Packit 709fb3
	      unsigned char *p;
Packit 709fb3
	      wchar_t wc;
Packit 709fb3
	      mbstate_t state;
Packit 709fb3
Packit 709fb3
	      p = buf;
Packit 709fb3
	      *p++ = dfa->nodes[node].opr.c;
Packit 709fb3
	      while (++node < dfa->nodes_len
Packit 709fb3
		     &&	dfa->nodes[node].type == CHARACTER
Packit 709fb3
		     && dfa->nodes[node].mb_partial)
Packit 709fb3
		*p++ = dfa->nodes[node].opr.c;
Packit 709fb3
	      memset (&state, '\0', sizeof (state));
Packit 709fb3
	      if (__mbrtowc (&wc, (const char *) buf, p - buf,
Packit 709fb3
			     &state) == p - buf
Packit 709fb3
		  && (__wcrtomb ((char *) buf, __towlower (wc), &state)
Packit 709fb3
		      != (size_t) -1))
Packit 709fb3
		re_set_fastmap (fastmap, false, buf[0]);
Packit 709fb3
	    }
Packit 709fb3
#endif
Packit 709fb3
	}
Packit 709fb3
      else if (type == SIMPLE_BRACKET)
Packit 709fb3
	{
Packit 709fb3
	  int i, ch;
Packit 709fb3
	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
Packit 709fb3
	    {
Packit 709fb3
	      int j;
Packit 709fb3
	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
Packit 709fb3
	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
Packit 709fb3
		if (w & ((bitset_word_t) 1 << j))
Packit 709fb3
		  re_set_fastmap (fastmap, icase, ch);
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      else if (type == COMPLEX_BRACKET)
Packit 709fb3
	{
Packit 709fb3
	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
Packit 709fb3
	  Idx i;
Packit 709fb3
Packit 709fb3
# ifdef _LIBC
Packit 709fb3
	  /* See if we have to try all bytes which start multiple collation
Packit 709fb3
	     elements.
Packit 709fb3
	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
Packit 709fb3
		  collation element, and don't catch 'b' since 'b' is
Packit 709fb3
		  the only collation element which starts from 'b' (and
Packit 709fb3
		  it is caught by SIMPLE_BRACKET).  */
Packit 709fb3
	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
Packit 709fb3
		  && (cset->ncoll_syms || cset->nranges))
Packit 709fb3
		{
Packit 709fb3
		  const int32_t *table = (const int32_t *)
Packit 709fb3
		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
Packit 709fb3
		  for (i = 0; i < SBC_MAX; ++i)
Packit 709fb3
		    if (table[i] < 0)
Packit 709fb3
		      re_set_fastmap (fastmap, icase, i);
Packit 709fb3
		}
Packit 709fb3
# endif /* _LIBC */
Packit 709fb3
Packit 709fb3
	  /* See if we have to start the match at all multibyte characters,
Packit 709fb3
	     i.e. where we would not find an invalid sequence.  This only
Packit 709fb3
	     applies to multibyte character sets; for single byte character
Packit 709fb3
	     sets, the SIMPLE_BRACKET again suffices.  */
Packit 709fb3
	  if (dfa->mb_cur_max > 1
Packit 709fb3
	      && (cset->nchar_classes || cset->non_match || cset->nranges
Packit 709fb3
# ifdef _LIBC
Packit 709fb3
		  || cset->nequiv_classes
Packit 709fb3
# endif /* _LIBC */
Packit 709fb3
		 ))
Packit 709fb3
	    {
Packit 709fb3
	      unsigned char c = 0;
Packit 709fb3
	      do
Packit 709fb3
		{
Packit 709fb3
		  mbstate_t mbs;
Packit 709fb3
		  memset (&mbs, 0, sizeof (mbs));
Packit 709fb3
		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
Packit 709fb3
		    re_set_fastmap (fastmap, false, (int) c);
Packit 709fb3
		}
Packit 709fb3
	      while (++c != 0);
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      /* ... Else catch all bytes which can start the mbchars.  */
Packit 709fb3
	      for (i = 0; i < cset->nmbchars; ++i)
Packit 709fb3
		{
Packit 709fb3
		  char buf[256];
Packit 709fb3
		  mbstate_t state;
Packit 709fb3
		  memset (&state, '\0', sizeof (state));
Packit 709fb3
		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
Packit 709fb3
		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
Packit 709fb3
		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
Packit 709fb3
		    {
Packit 709fb3
		      if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
Packit 709fb3
			  != (size_t) -1)
Packit 709fb3
			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
Packit 709fb3
		    }
Packit 709fb3
		}
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
      else if (type == OP_PERIOD
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	       || type == OP_UTF8_PERIOD
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
	       || type == END_OF_RE)
Packit 709fb3
	{
Packit 709fb3
	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
Packit 709fb3
	  if (type == END_OF_RE)
Packit 709fb3
	    bufp->can_be_null = 1;
Packit 709fb3
	  return;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
/* Entry point for POSIX code.  */
Packit 709fb3
/* regcomp takes a regular expression as a string and compiles it.
Packit 709fb3
Packit 709fb3
   PREG is a regex_t *.  We do not expect any fields to be initialized,
Packit 709fb3
   since POSIX says we shouldn't.  Thus, we set
Packit 709fb3
Packit 709fb3
     'buffer' to the compiled pattern;
Packit 709fb3
     'used' to the length of the compiled pattern;
Packit 709fb3
     'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
Packit 709fb3
       REG_EXTENDED bit in CFLAGS is set; otherwise, to
Packit 709fb3
       RE_SYNTAX_POSIX_BASIC;
Packit 709fb3
     'newline_anchor' to REG_NEWLINE being set in CFLAGS;
Packit 709fb3
     'fastmap' to an allocated space for the fastmap;
Packit 709fb3
     'fastmap_accurate' to zero;
Packit 709fb3
     're_nsub' to the number of subexpressions in PATTERN.
Packit 709fb3
Packit 709fb3
   PATTERN is the address of the pattern string.
Packit 709fb3
Packit 709fb3
   CFLAGS is a series of bits which affect compilation.
Packit 709fb3
Packit 709fb3
     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
Packit 709fb3
     use POSIX basic syntax.
Packit 709fb3
Packit 709fb3
     If REG_NEWLINE is set, then . and [^...] don't match newline.
Packit 709fb3
     Also, regexec will try a match beginning after every newline.
Packit 709fb3
Packit 709fb3
     If REG_ICASE is set, then we considers upper- and lowercase
Packit 709fb3
     versions of letters to be equivalent when matching.
Packit 709fb3
Packit 709fb3
     If REG_NOSUB is set, then when PREG is passed to regexec, that
Packit 709fb3
     routine will report only success or failure, and nothing about the
Packit 709fb3
     registers.
Packit 709fb3
Packit 709fb3
   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
Packit 709fb3
   the return codes and their meanings.)  */
Packit 709fb3
Packit 709fb3
int
Packit 709fb3
regcomp (regex_t *_Restrict_ preg, const char *_Restrict_ pattern, int cflags)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
Packit 709fb3
			 : RE_SYNTAX_POSIX_BASIC);
Packit 709fb3
Packit 709fb3
  preg->buffer = NULL;
Packit 709fb3
  preg->allocated = 0;
Packit 709fb3
  preg->used = 0;
Packit 709fb3
Packit 709fb3
  /* Try to allocate space for the fastmap.  */
Packit 709fb3
  preg->fastmap = re_malloc (char, SBC_MAX);
Packit 709fb3
  if (BE (preg->fastmap == NULL, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
Packit 709fb3
  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
Packit 709fb3
Packit 709fb3
  /* If REG_NEWLINE is set, newlines are treated differently.  */
Packit 709fb3
  if (cflags & REG_NEWLINE)
Packit 709fb3
    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
Packit 709fb3
      syntax &= ~RE_DOT_NEWLINE;
Packit 709fb3
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
Packit 709fb3
      /* It also changes the matching behavior.  */
Packit 709fb3
      preg->newline_anchor = 1;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    preg->newline_anchor = 0;
Packit 709fb3
  preg->no_sub = !!(cflags & REG_NOSUB);
Packit 709fb3
  preg->translate = NULL;
Packit 709fb3
Packit 709fb3
  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
Packit 709fb3
Packit 709fb3
  /* POSIX doesn't distinguish between an unmatched open-group and an
Packit 709fb3
     unmatched close-group: both are REG_EPAREN.  */
Packit 709fb3
  if (ret == REG_ERPAREN)
Packit 709fb3
    ret = REG_EPAREN;
Packit 709fb3
Packit 709fb3
  /* We have already checked preg->fastmap != NULL.  */
Packit 709fb3
  if (BE (ret == REG_NOERROR, 1))
Packit 709fb3
    /* Compute the fastmap now, since regexec cannot modify the pattern
Packit 709fb3
       buffer.  This function never fails in this implementation.  */
Packit 709fb3
    (void) re_compile_fastmap (preg);
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      /* Some error occurred while compiling the expression.  */
Packit 709fb3
      re_free (preg->fastmap);
Packit 709fb3
      preg->fastmap = NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return (int) ret;
Packit 709fb3
}
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
weak_alias (__regcomp, regcomp)
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
/* Returns a message corresponding to an error code, ERRCODE, returned
Packit 709fb3
   from either regcomp or regexec.   We don't use PREG here.  */
Packit 709fb3
Packit 709fb3
size_t
Packit 709fb3
regerror (int errcode, const regex_t *_Restrict_ preg, char *_Restrict_ errbuf,
Packit 709fb3
	  size_t errbuf_size)
Packit 709fb3
{
Packit 709fb3
  const char *msg;
Packit 709fb3
  size_t msg_size;
Packit 709fb3
Packit 709fb3
  if (BE (errcode < 0
Packit 709fb3
	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
Packit 709fb3
			       / sizeof (__re_error_msgid_idx[0])), 0))
Packit 709fb3
    /* Only error codes returned by the rest of the code should be passed
Packit 709fb3
       to this routine.  If we are given anything else, or if other regex
Packit 709fb3
       code generates an invalid error code, then the program has a bug.
Packit 709fb3
       Dump core so we can fix it.  */
Packit 709fb3
    abort ();
Packit 709fb3
Packit 709fb3
  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
Packit 709fb3
Packit 709fb3
  msg_size = strlen (msg) + 1; /* Includes the null.  */
Packit 709fb3
Packit 709fb3
  if (BE (errbuf_size != 0, 1))
Packit 709fb3
    {
Packit 709fb3
      size_t cpy_size = msg_size;
Packit 709fb3
      if (BE (msg_size > errbuf_size, 0))
Packit 709fb3
	{
Packit 709fb3
	  cpy_size = errbuf_size - 1;
Packit 709fb3
	  errbuf[cpy_size] = '\0';
Packit 709fb3
	}
Packit 709fb3
      memcpy (errbuf, msg, cpy_size);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return msg_size;
Packit 709fb3
}
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
weak_alias (__regerror, regerror)
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
/* This static array is used for the map to single-byte characters when
Packit 709fb3
   UTF-8 is used.  Otherwise we would allocate memory just to initialize
Packit 709fb3
   it the same all the time.  UTF-8 is the preferred encoding so this is
Packit 709fb3
   a worthwhile optimization.  */
Packit 709fb3
static const bitset_t utf8_sb_map =
Packit 709fb3
{
Packit 709fb3
  /* Set the first 128 bits.  */
Packit 709fb3
# if defined __GNUC__ && !defined __STRICT_ANSI__
Packit 709fb3
  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
Packit 709fb3
# else
Packit 709fb3
#  if 4 * BITSET_WORD_BITS < ASCII_CHARS
Packit 709fb3
#   error "bitset_word_t is narrower than 32 bits"
Packit 709fb3
#  elif 3 * BITSET_WORD_BITS < ASCII_CHARS
Packit 709fb3
  BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
Packit 709fb3
#  elif 2 * BITSET_WORD_BITS < ASCII_CHARS
Packit 709fb3
  BITSET_WORD_MAX, BITSET_WORD_MAX,
Packit 709fb3
#  elif 1 * BITSET_WORD_BITS < ASCII_CHARS
Packit 709fb3
  BITSET_WORD_MAX,
Packit 709fb3
#  endif
Packit 709fb3
  (BITSET_WORD_MAX
Packit 709fb3
   >> (SBC_MAX % BITSET_WORD_BITS == 0
Packit 709fb3
       ? 0
Packit 709fb3
       : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
Packit 709fb3
# endif
Packit 709fb3
};
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
free_dfa_content (re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  Idx i, j;
Packit 709fb3
Packit 709fb3
  if (dfa->nodes)
Packit 709fb3
    for (i = 0; i < dfa->nodes_len; ++i)
Packit 709fb3
      free_token (dfa->nodes + i);
Packit 709fb3
  re_free (dfa->nexts);
Packit 709fb3
  for (i = 0; i < dfa->nodes_len; ++i)
Packit 709fb3
    {
Packit 709fb3
      if (dfa->eclosures != NULL)
Packit 709fb3
	re_node_set_free (dfa->eclosures + i);
Packit 709fb3
      if (dfa->inveclosures != NULL)
Packit 709fb3
	re_node_set_free (dfa->inveclosures + i);
Packit 709fb3
      if (dfa->edests != NULL)
Packit 709fb3
	re_node_set_free (dfa->edests + i);
Packit 709fb3
    }
Packit 709fb3
  re_free (dfa->edests);
Packit 709fb3
  re_free (dfa->eclosures);
Packit 709fb3
  re_free (dfa->inveclosures);
Packit 709fb3
  re_free (dfa->nodes);
Packit 709fb3
Packit 709fb3
  if (dfa->state_table)
Packit 709fb3
    for (i = 0; i <= dfa->state_hash_mask; ++i)
Packit 709fb3
      {
Packit 709fb3
	struct re_state_table_entry *entry = dfa->state_table + i;
Packit 709fb3
	for (j = 0; j < entry->num; ++j)
Packit 709fb3
	  {
Packit 709fb3
	    re_dfastate_t *state = entry->array[j];
Packit 709fb3
	    free_state (state);
Packit 709fb3
	  }
Packit 709fb3
	re_free (entry->array);
Packit 709fb3
      }
Packit 709fb3
  re_free (dfa->state_table);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (dfa->sb_char != utf8_sb_map)
Packit 709fb3
    re_free (dfa->sb_char);
Packit 709fb3
#endif
Packit 709fb3
  re_free (dfa->subexp_map);
Packit 709fb3
#ifdef DEBUG
Packit 709fb3
  re_free (dfa->re_str);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  re_free (dfa);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
Packit 709fb3
/* Free dynamically allocated space used by PREG.  */
Packit 709fb3
Packit 709fb3
void
Packit 709fb3
regfree (regex_t *preg)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  if (BE (dfa != NULL, 1))
Packit 709fb3
    {
Packit 709fb3
      lock_fini (dfa->lock);
Packit 709fb3
      free_dfa_content (dfa);
Packit 709fb3
    }
Packit 709fb3
  preg->buffer = NULL;
Packit 709fb3
  preg->allocated = 0;
Packit 709fb3
Packit 709fb3
  re_free (preg->fastmap);
Packit 709fb3
  preg->fastmap = NULL;
Packit 709fb3
Packit 709fb3
  re_free (preg->translate);
Packit 709fb3
  preg->translate = NULL;
Packit 709fb3
}
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
weak_alias (__regfree, regfree)
Packit 709fb3
#endif
Packit 709fb3

Packit 709fb3
/* Entry points compatible with 4.2 BSD regex library.  We don't define
Packit 709fb3
   them unless specifically requested.  */
Packit 709fb3
Packit 709fb3
#if defined _REGEX_RE_COMP || defined _LIBC
Packit 709fb3
Packit 709fb3
/* BSD has one and only one pattern buffer.  */
Packit 709fb3
static struct re_pattern_buffer re_comp_buf;
Packit 709fb3
Packit 709fb3
char *
Packit 709fb3
# ifdef _LIBC
Packit 709fb3
/* Make these definitions weak in libc, so POSIX programs can redefine
Packit 709fb3
   these names if they don't use our functions, and still use
Packit 709fb3
   regcomp/regexec above without link errors.  */
Packit 709fb3
weak_function
Packit 709fb3
# endif
Packit 709fb3
re_comp (const char *s)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
  char *fastmap;
Packit 709fb3
Packit 709fb3
  if (!s)
Packit 709fb3
    {
Packit 709fb3
      if (!re_comp_buf.buffer)
Packit 709fb3
	return gettext ("No previous regular expression");
Packit 709fb3
      return 0;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  if (re_comp_buf.buffer)
Packit 709fb3
    {
Packit 709fb3
      fastmap = re_comp_buf.fastmap;
Packit 709fb3
      re_comp_buf.fastmap = NULL;
Packit 709fb3
      __regfree (&re_comp_buf);
Packit 709fb3
      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
Packit 709fb3
      re_comp_buf.fastmap = fastmap;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  if (re_comp_buf.fastmap == NULL)
Packit 709fb3
    {
Packit 709fb3
      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
Packit 709fb3
      if (re_comp_buf.fastmap == NULL)
Packit 709fb3
	return (char *) gettext (__re_error_msgid
Packit 709fb3
				 + __re_error_msgid_idx[(int) REG_ESPACE]);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Since 're_exec' always passes NULL for the 'regs' argument, we
Packit 709fb3
     don't need to initialize the pattern buffer fields which affect it.  */
Packit 709fb3
Packit 709fb3
  /* Match anchors at newlines.  */
Packit 709fb3
  re_comp_buf.newline_anchor = 1;
Packit 709fb3
Packit 709fb3
  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
Packit 709fb3
Packit 709fb3
  if (!ret)
Packit 709fb3
    return NULL;
Packit 709fb3
Packit 709fb3
  /* Yes, we're discarding 'const' here if !HAVE_LIBINTL.  */
Packit 709fb3
  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
libc_freeres_fn (free_mem)
Packit 709fb3
{
Packit 709fb3
  __regfree (&re_comp_buf);
Packit 709fb3
}
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
#endif /* _REGEX_RE_COMP */
Packit 709fb3

Packit 709fb3
/* Internal entry point.
Packit 709fb3
   Compile the regular expression PATTERN, whose length is LENGTH.
Packit 709fb3
   SYNTAX indicate regular expression's syntax.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
re_compile_internal (regex_t *preg, const char * pattern, size_t length,
Packit 709fb3
		     reg_syntax_t syntax)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t err = REG_NOERROR;
Packit 709fb3
  re_dfa_t *dfa;
Packit 709fb3
  re_string_t regexp;
Packit 709fb3
Packit 709fb3
  /* Initialize the pattern buffer.  */
Packit 709fb3
  preg->fastmap_accurate = 0;
Packit 709fb3
  preg->syntax = syntax;
Packit 709fb3
  preg->not_bol = preg->not_eol = 0;
Packit 709fb3
  preg->used = 0;
Packit 709fb3
  preg->re_nsub = 0;
Packit 709fb3
  preg->can_be_null = 0;
Packit 709fb3
  preg->regs_allocated = REGS_UNALLOCATED;
Packit 709fb3
Packit 709fb3
  /* Initialize the dfa.  */
Packit 709fb3
  dfa = preg->buffer;
Packit 709fb3
  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
Packit 709fb3
    {
Packit 709fb3
      /* If zero allocated, but buffer is non-null, try to realloc
Packit 709fb3
	 enough space.  This loses if buffer's address is bogus, but
Packit 709fb3
	 that is the user's responsibility.  If ->buffer is NULL this
Packit 709fb3
	 is a simple allocation.  */
Packit 709fb3
      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
Packit 709fb3
      if (dfa == NULL)
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      preg->allocated = sizeof (re_dfa_t);
Packit 709fb3
      preg->buffer = dfa;
Packit 709fb3
    }
Packit 709fb3
  preg->used = sizeof (re_dfa_t);
Packit 709fb3
Packit 709fb3
  err = init_dfa (dfa, length);
Packit 709fb3
  if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0))
Packit 709fb3
    err = REG_ESPACE;
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      free_dfa_content (dfa);
Packit 709fb3
      preg->buffer = NULL;
Packit 709fb3
      preg->allocated = 0;
Packit 709fb3
      return err;
Packit 709fb3
    }
Packit 709fb3
#ifdef DEBUG
Packit 709fb3
  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
Packit 709fb3
  dfa->re_str = re_malloc (char, length + 1);
Packit 709fb3
  strncpy (dfa->re_str, pattern, length + 1);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  err = re_string_construct (&regexp, pattern, length, preg->translate,
Packit 709fb3
			     (syntax & RE_ICASE) != 0, dfa);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
    re_compile_internal_free_return:
Packit 709fb3
      free_workarea_compile (preg);
Packit 709fb3
      re_string_destruct (&regexp);
Packit 709fb3
      lock_fini (dfa->lock);
Packit 709fb3
      free_dfa_content (dfa);
Packit 709fb3
      preg->buffer = NULL;
Packit 709fb3
      preg->allocated = 0;
Packit 709fb3
      return err;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Parse the regular expression, and build a structure tree.  */
Packit 709fb3
  preg->re_nsub = 0;
Packit 709fb3
  dfa->str_tree = parse (&regexp, preg, syntax, &err;;
Packit 709fb3
  if (BE (dfa->str_tree == NULL, 0))
Packit 709fb3
    goto re_compile_internal_free_return;
Packit 709fb3
Packit 709fb3
  /* Analyze the tree and create the nfa.  */
Packit 709fb3
  err = analyze (preg);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    goto re_compile_internal_free_return;
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  /* If possible, do searching in single byte encoding to speed things up.  */
Packit 709fb3
  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
Packit 709fb3
    optimize_utf8 (dfa);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  /* Then create the initial state of the dfa.  */
Packit 709fb3
  err = create_initial_state (dfa);
Packit 709fb3
Packit 709fb3
  /* Release work areas.  */
Packit 709fb3
  free_workarea_compile (preg);
Packit 709fb3
  re_string_destruct (&regexp);
Packit 709fb3
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      lock_fini (dfa->lock);
Packit 709fb3
      free_dfa_content (dfa);
Packit 709fb3
      preg->buffer = NULL;
Packit 709fb3
      preg->allocated = 0;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return err;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Initialize DFA.  We use the length of the regular expression PAT_LEN
Packit 709fb3
   as the initial length of some arrays.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
init_dfa (re_dfa_t *dfa, size_t pat_len)
Packit 709fb3
{
Packit 709fb3
  __re_size_t table_size;
Packit 709fb3
#ifndef _LIBC
Packit 709fb3
  const char *codeset_name;
Packit 709fb3
#endif
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
Packit 709fb3
#else
Packit 709fb3
  size_t max_i18n_object_size = 0;
Packit 709fb3
#endif
Packit 709fb3
  size_t max_object_size =
Packit 709fb3
    MAX (sizeof (struct re_state_table_entry),
Packit 709fb3
	 MAX (sizeof (re_token_t),
Packit 709fb3
	      MAX (sizeof (re_node_set),
Packit 709fb3
		   MAX (sizeof (regmatch_t),
Packit 709fb3
			max_i18n_object_size))));
Packit 709fb3
Packit 709fb3
  memset (dfa, '\0', sizeof (re_dfa_t));
Packit 709fb3
Packit 709fb3
  /* Force allocation of str_tree_storage the first time.  */
Packit 709fb3
  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
Packit 709fb3
Packit 709fb3
  /* Avoid overflows.  The extra "/ 2" is for the table_size doubling
Packit 709fb3
     calculation below, and for similar doubling calculations
Packit 709fb3
     elsewhere.  And it's <= rather than <, because some of the
Packit 709fb3
     doubling calculations add 1 afterwards.  */
Packit 709fb3
  if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
Packit 709fb3
  dfa->nodes_alloc = pat_len + 1;
Packit 709fb3
  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
Packit 709fb3
Packit 709fb3
  /*  table_size = 2 ^ ceil(log pat_len) */
Packit 709fb3
  for (table_size = 1; ; table_size <<= 1)
Packit 709fb3
    if (table_size > pat_len)
Packit 709fb3
      break;
Packit 709fb3
Packit 709fb3
  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
Packit 709fb3
  dfa->state_hash_mask = table_size - 1;
Packit 709fb3
Packit 709fb3
  dfa->mb_cur_max = MB_CUR_MAX;
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
  if (dfa->mb_cur_max == 6
Packit 709fb3
      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
Packit 709fb3
    dfa->is_utf8 = 1;
Packit 709fb3
  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
Packit 709fb3
		       != 0);
Packit 709fb3
#else
Packit 709fb3
  codeset_name = nl_langinfo (CODESET);
Packit 709fb3
  if ((codeset_name[0] == 'U' || codeset_name[0] == 'u')
Packit 709fb3
      && (codeset_name[1] == 'T' || codeset_name[1] == 't')
Packit 709fb3
      && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
Packit 709fb3
      && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)
Packit 709fb3
    dfa->is_utf8 = 1;
Packit 709fb3
Packit 709fb3
  /* We check exhaustively in the loop below if this charset is a
Packit 709fb3
     superset of ASCII.  */
Packit 709fb3
  dfa->map_notascii = 0;
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (dfa->mb_cur_max > 1)
Packit 709fb3
    {
Packit 709fb3
      if (dfa->is_utf8)
Packit 709fb3
	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  int i, j, ch;
Packit 709fb3
Packit 709fb3
	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
Packit 709fb3
	  if (BE (dfa->sb_char == NULL, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
Packit 709fb3
	  /* Set the bits corresponding to single byte chars.  */
Packit 709fb3
	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
Packit 709fb3
	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
Packit 709fb3
	      {
Packit 709fb3
		wint_t wch = __btowc (ch);
Packit 709fb3
		if (wch != WEOF)
Packit 709fb3
		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
Packit 709fb3
# ifndef _LIBC
Packit 709fb3
		if (isascii (ch) && wch != ch)
Packit 709fb3
		  dfa->map_notascii = 1;
Packit 709fb3
# endif
Packit 709fb3
	      }
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Initialize WORD_CHAR table, which indicate which character is
Packit 709fb3
   "word".  In this case "word" means that it is the word construction
Packit 709fb3
   character used by some operators like "\<", "\>", etc.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
init_word_char (re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  int i = 0;
Packit 709fb3
  int j;
Packit 709fb3
  int ch = 0;
Packit 709fb3
  dfa->word_ops_used = 1;
Packit 709fb3
  if (BE (dfa->map_notascii == 0, 1))
Packit 709fb3
    {
Packit 709fb3
      bitset_word_t bits0 = 0x00000000;
Packit 709fb3
      bitset_word_t bits1 = 0x03ff0000;
Packit 709fb3
      bitset_word_t bits2 = 0x87fffffe;
Packit 709fb3
      bitset_word_t bits3 = 0x07fffffe;
Packit 709fb3
      if (BITSET_WORD_BITS == 64)
Packit 709fb3
	{
Packit 709fb3
	  dfa->word_char[0] = bits1 << 31 << 1 | bits0;
Packit 709fb3
	  dfa->word_char[1] = bits3 << 31 << 1 | bits2;
Packit 709fb3
	  i = 2;
Packit 709fb3
	}
Packit 709fb3
      else if (BITSET_WORD_BITS == 32)
Packit 709fb3
	{
Packit 709fb3
	  dfa->word_char[0] = bits0;
Packit 709fb3
	  dfa->word_char[1] = bits1;
Packit 709fb3
	  dfa->word_char[2] = bits2;
Packit 709fb3
	  dfa->word_char[3] = bits3;
Packit 709fb3
	  i = 4;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
        goto general_case;
Packit 709fb3
      ch = 128;
Packit 709fb3
Packit 709fb3
      if (BE (dfa->is_utf8, 1))
Packit 709fb3
	{
Packit 709fb3
	  memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
Packit 709fb3
	  return;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
 general_case:
Packit 709fb3
  for (; i < BITSET_WORDS; ++i)
Packit 709fb3
    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
Packit 709fb3
      if (isalnum (ch) || ch == '_')
Packit 709fb3
	dfa->word_char[i] |= (bitset_word_t) 1 << j;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Free the work area which are only used while compiling.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
free_workarea_compile (regex_t *preg)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  bin_tree_storage_t *storage, *next;
Packit 709fb3
  for (storage = dfa->str_tree_storage; storage; storage = next)
Packit 709fb3
    {
Packit 709fb3
      next = storage->next;
Packit 709fb3
      re_free (storage);
Packit 709fb3
    }
Packit 709fb3
  dfa->str_tree_storage = NULL;
Packit 709fb3
  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
Packit 709fb3
  dfa->str_tree = NULL;
Packit 709fb3
  re_free (dfa->org_indices);
Packit 709fb3
  dfa->org_indices = NULL;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Create initial states for all contexts.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
create_initial_state (re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  Idx first, i;
Packit 709fb3
  reg_errcode_t err;
Packit 709fb3
  re_node_set init_nodes;
Packit 709fb3
Packit 709fb3
  /* Initial states have the epsilon closure of the node which is
Packit 709fb3
     the first node of the regular expression.  */
Packit 709fb3
  first = dfa->str_tree->first->node_idx;
Packit 709fb3
  dfa->init_node = first;
Packit 709fb3
  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    return err;
Packit 709fb3
Packit 709fb3
  /* The back-references which are in initial states can epsilon transit,
Packit 709fb3
     since in this case all of the subexpressions can be null.
Packit 709fb3
     Then we add epsilon closures of the nodes which are the next nodes of
Packit 709fb3
     the back-references.  */
Packit 709fb3
  if (dfa->nbackref > 0)
Packit 709fb3
    for (i = 0; i < init_nodes.nelem; ++i)
Packit 709fb3
      {
Packit 709fb3
	Idx node_idx = init_nodes.elems[i];
Packit 709fb3
	re_token_type_t type = dfa->nodes[node_idx].type;
Packit 709fb3
Packit 709fb3
	Idx clexp_idx;
Packit 709fb3
	if (type != OP_BACK_REF)
Packit 709fb3
	  continue;
Packit 709fb3
	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
Packit 709fb3
	  {
Packit 709fb3
	    re_token_t *clexp_node;
Packit 709fb3
	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
Packit 709fb3
	    if (clexp_node->type == OP_CLOSE_SUBEXP
Packit 709fb3
		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
Packit 709fb3
	      break;
Packit 709fb3
	  }
Packit 709fb3
	if (clexp_idx == init_nodes.nelem)
Packit 709fb3
	  continue;
Packit 709fb3
Packit 709fb3
	if (type == OP_BACK_REF)
Packit 709fb3
	  {
Packit 709fb3
	    Idx dest_idx = dfa->edests[node_idx].elems[0];
Packit 709fb3
	    if (!re_node_set_contains (&init_nodes, dest_idx))
Packit 709fb3
	      {
Packit 709fb3
		reg_errcode_t merge_err
Packit 709fb3
                  = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
Packit 709fb3
		if (merge_err != REG_NOERROR)
Packit 709fb3
		  return merge_err;
Packit 709fb3
		i = 0;
Packit 709fb3
	      }
Packit 709fb3
	  }
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
  /* It must be the first time to invoke acquire_state.  */
Packit 709fb3
  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
Packit 709fb3
  /* We don't check ERR here, since the initial state must not be NULL.  */
Packit 709fb3
  if (BE (dfa->init_state == NULL, 0))
Packit 709fb3
    return err;
Packit 709fb3
  if (dfa->init_state->has_constraint)
Packit 709fb3
    {
Packit 709fb3
      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
Packit 709fb3
						       CONTEXT_WORD);
Packit 709fb3
      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
Packit 709fb3
						     CONTEXT_NEWLINE);
Packit 709fb3
      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
Packit 709fb3
							 &init_nodes,
Packit 709fb3
							 CONTEXT_NEWLINE
Packit 709fb3
							 | CONTEXT_BEGBUF);
Packit 709fb3
      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
Packit 709fb3
	      || dfa->init_state_begbuf == NULL, 0))
Packit 709fb3
	return err;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    dfa->init_state_word = dfa->init_state_nl
Packit 709fb3
      = dfa->init_state_begbuf = dfa->init_state;
Packit 709fb3
Packit 709fb3
  re_node_set_free (&init_nodes);
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
/* If it is possible to do searching in single byte encoding instead of UTF-8
Packit 709fb3
   to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
Packit 709fb3
   DFA nodes where needed.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
optimize_utf8 (re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  Idx node;
Packit 709fb3
  int i;
Packit 709fb3
  bool mb_chars = false;
Packit 709fb3
  bool has_period = false;
Packit 709fb3
Packit 709fb3
  for (node = 0; node < dfa->nodes_len; ++node)
Packit 709fb3
    switch (dfa->nodes[node].type)
Packit 709fb3
      {
Packit 709fb3
      case CHARACTER:
Packit 709fb3
	if (dfa->nodes[node].opr.c >= ASCII_CHARS)
Packit 709fb3
	  mb_chars = true;
Packit 709fb3
	break;
Packit 709fb3
      case ANCHOR:
Packit 709fb3
	switch (dfa->nodes[node].opr.ctx_type)
Packit 709fb3
	  {
Packit 709fb3
	  case LINE_FIRST:
Packit 709fb3
	  case LINE_LAST:
Packit 709fb3
	  case BUF_FIRST:
Packit 709fb3
	  case BUF_LAST:
Packit 709fb3
	    break;
Packit 709fb3
	  default:
Packit 709fb3
	    /* Word anchors etc. cannot be handled.  It's okay to test
Packit 709fb3
	       opr.ctx_type since constraints (for all DFA nodes) are
Packit 709fb3
	       created by ORing one or more opr.ctx_type values.  */
Packit 709fb3
	    return;
Packit 709fb3
	  }
Packit 709fb3
	break;
Packit 709fb3
      case OP_PERIOD:
Packit 709fb3
	has_period = true;
Packit 709fb3
	break;
Packit 709fb3
      case OP_BACK_REF:
Packit 709fb3
      case OP_ALT:
Packit 709fb3
      case END_OF_RE:
Packit 709fb3
      case OP_DUP_ASTERISK:
Packit 709fb3
      case OP_OPEN_SUBEXP:
Packit 709fb3
      case OP_CLOSE_SUBEXP:
Packit 709fb3
	break;
Packit 709fb3
      case COMPLEX_BRACKET:
Packit 709fb3
	return;
Packit 709fb3
      case SIMPLE_BRACKET:
Packit 709fb3
	/* Just double check.  */
Packit 709fb3
	{
Packit 709fb3
	  int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
Packit 709fb3
			? 0
Packit 709fb3
			: BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
Packit 709fb3
	  for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
Packit 709fb3
	    {
Packit 709fb3
	      if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
Packit 709fb3
		return;
Packit 709fb3
	      rshift = 0;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
	break;
Packit 709fb3
      default:
Packit 709fb3
	abort ();
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
  if (mb_chars || has_period)
Packit 709fb3
    for (node = 0; node < dfa->nodes_len; ++node)
Packit 709fb3
      {
Packit 709fb3
	if (dfa->nodes[node].type == CHARACTER
Packit 709fb3
	    && dfa->nodes[node].opr.c >= ASCII_CHARS)
Packit 709fb3
	  dfa->nodes[node].mb_partial = 0;
Packit 709fb3
	else if (dfa->nodes[node].type == OP_PERIOD)
Packit 709fb3
	  dfa->nodes[node].type = OP_UTF8_PERIOD;
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
  /* The search can be in single byte locale.  */
Packit 709fb3
  dfa->mb_cur_max = 1;
Packit 709fb3
  dfa->is_utf8 = 0;
Packit 709fb3
  dfa->has_mb_node = dfa->nbackref > 0 || has_period;
Packit 709fb3
}
Packit 709fb3
#endif
Packit 709fb3

Packit 709fb3
/* Analyze the structure tree, and calculate "first", "next", "edest",
Packit 709fb3
   "eclosure", and "inveclosure".  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
analyze (regex_t *preg)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
Packit 709fb3
  /* Allocate arrays.  */
Packit 709fb3
  dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
Packit 709fb3
  dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
Packit 709fb3
  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
Packit 709fb3
  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
Packit 709fb3
  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
Packit 709fb3
	  || dfa->eclosures == NULL, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
Packit 709fb3
  dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
Packit 709fb3
  if (dfa->subexp_map != NULL)
Packit 709fb3
    {
Packit 709fb3
      Idx i;
Packit 709fb3
      for (i = 0; i < preg->re_nsub; i++)
Packit 709fb3
	dfa->subexp_map[i] = i;
Packit 709fb3
      preorder (dfa->str_tree, optimize_subexps, dfa);
Packit 709fb3
      for (i = 0; i < preg->re_nsub; i++)
Packit 709fb3
	if (dfa->subexp_map[i] != i)
Packit 709fb3
	  break;
Packit 709fb3
      if (i == preg->re_nsub)
Packit 709fb3
	{
Packit 709fb3
	  free (dfa->subexp_map);
Packit 709fb3
	  dfa->subexp_map = NULL;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  ret = postorder (dfa->str_tree, lower_subexps, preg);
Packit 709fb3
  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
    return ret;
Packit 709fb3
  ret = postorder (dfa->str_tree, calc_first, dfa);
Packit 709fb3
  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
    return ret;
Packit 709fb3
  preorder (dfa->str_tree, calc_next, dfa);
Packit 709fb3
  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
Packit 709fb3
  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
    return ret;
Packit 709fb3
  ret = calc_eclosure (dfa);
Packit 709fb3
  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
    return ret;
Packit 709fb3
Packit 709fb3
  /* We only need this during the prune_impossible_nodes pass in regexec.c;
Packit 709fb3
     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
Packit 709fb3
  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
Packit 709fb3
      || dfa->nbackref)
Packit 709fb3
    {
Packit 709fb3
      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
Packit 709fb3
      if (BE (dfa->inveclosures == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      ret = calc_inveclosure (dfa);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return ret;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Our parse trees are very unbalanced, so we cannot use a stack to
Packit 709fb3
   implement parse tree visits.  Instead, we use parent pointers and
Packit 709fb3
   some hairy code in these two functions.  */
Packit 709fb3
static reg_errcode_t
Packit 709fb3
postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
Packit 709fb3
	   void *extra)
Packit 709fb3
{
Packit 709fb3
  bin_tree_t *node, *prev;
Packit 709fb3
Packit 709fb3
  for (node = root; ; )
Packit 709fb3
    {
Packit 709fb3
      /* Descend down the tree, preferably to the left (or to the right
Packit 709fb3
	 if that's the only child).  */
Packit 709fb3
      while (node->left || node->right)
Packit 709fb3
	if (node->left)
Packit 709fb3
	  node = node->left;
Packit 709fb3
	else
Packit 709fb3
	  node = node->right;
Packit 709fb3
Packit 709fb3
      do
Packit 709fb3
	{
Packit 709fb3
	  reg_errcode_t err = fn (extra, node);
Packit 709fb3
	  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
	    return err;
Packit 709fb3
	  if (node->parent == NULL)
Packit 709fb3
	    return REG_NOERROR;
Packit 709fb3
	  prev = node;
Packit 709fb3
	  node = node->parent;
Packit 709fb3
	}
Packit 709fb3
      /* Go up while we have a node that is reached from the right.  */
Packit 709fb3
      while (node->right == prev || node->right == NULL);
Packit 709fb3
      node = node->right;
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
Packit 709fb3
	  void *extra)
Packit 709fb3
{
Packit 709fb3
  bin_tree_t *node;
Packit 709fb3
Packit 709fb3
  for (node = root; ; )
Packit 709fb3
    {
Packit 709fb3
      reg_errcode_t err = fn (extra, node);
Packit 709fb3
      if (BE (err != REG_NOERROR, 0))
Packit 709fb3
	return err;
Packit 709fb3
Packit 709fb3
      /* Go to the left node, or up and to the right.  */
Packit 709fb3
      if (node->left)
Packit 709fb3
	node = node->left;
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  bin_tree_t *prev = NULL;
Packit 709fb3
	  while (node->right == prev || node->right == NULL)
Packit 709fb3
	    {
Packit 709fb3
	      prev = node;
Packit 709fb3
	      node = node->parent;
Packit 709fb3
	      if (!node)
Packit 709fb3
		return REG_NOERROR;
Packit 709fb3
	    }
Packit 709fb3
	  node = node->right;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
Packit 709fb3
   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
Packit 709fb3
   backreferences as well.  Requires a preorder visit.  */
Packit 709fb3
static reg_errcode_t
Packit 709fb3
optimize_subexps (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = (re_dfa_t *) extra;
Packit 709fb3
Packit 709fb3
  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
Packit 709fb3
    {
Packit 709fb3
      int idx = node->token.opr.idx;
Packit 709fb3
      node->token.opr.idx = dfa->subexp_map[idx];
Packit 709fb3
      dfa->used_bkref_map |= 1 << node->token.opr.idx;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  else if (node->token.type == SUBEXP
Packit 709fb3
	   && node->left && node->left->token.type == SUBEXP)
Packit 709fb3
    {
Packit 709fb3
      Idx other_idx = node->left->token.opr.idx;
Packit 709fb3
Packit 709fb3
      node->left = node->left->left;
Packit 709fb3
      if (node->left)
Packit 709fb3
	node->left->parent = node;
Packit 709fb3
Packit 709fb3
      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
Packit 709fb3
      if (other_idx < BITSET_WORD_BITS)
Packit 709fb3
	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
Packit 709fb3
   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
Packit 709fb3
static reg_errcode_t
Packit 709fb3
lower_subexps (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  regex_t *preg = (regex_t *) extra;
Packit 709fb3
  reg_errcode_t err = REG_NOERROR;
Packit 709fb3
Packit 709fb3
  if (node->left && node->left->token.type == SUBEXP)
Packit 709fb3
    {
Packit 709fb3
      node->left = lower_subexp (&err, preg, node->left);
Packit 709fb3
      if (node->left)
Packit 709fb3
	node->left->parent = node;
Packit 709fb3
    }
Packit 709fb3
  if (node->right && node->right->token.type == SUBEXP)
Packit 709fb3
    {
Packit 709fb3
      node->right = lower_subexp (&err, preg, node->right);
Packit 709fb3
      if (node->right)
Packit 709fb3
	node->right->parent = node;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return err;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  bin_tree_t *body = node->left;
Packit 709fb3
  bin_tree_t *op, *cls, *tree1, *tree;
Packit 709fb3
Packit 709fb3
  if (preg->no_sub
Packit 709fb3
      /* We do not optimize empty subexpressions, because otherwise we may
Packit 709fb3
	 have bad CONCAT nodes with NULL children.  This is obviously not
Packit 709fb3
	 very common, so we do not lose much.  An example that triggers
Packit 709fb3
	 this case is the sed "script" /\(\)/x.  */
Packit 709fb3
      && node->left != NULL
Packit 709fb3
      && (node->token.opr.idx >= BITSET_WORD_BITS
Packit 709fb3
	  || !(dfa->used_bkref_map
Packit 709fb3
	       & ((bitset_word_t) 1 << node->token.opr.idx))))
Packit 709fb3
    return node->left;
Packit 709fb3
Packit 709fb3
  /* Convert the SUBEXP node to the concatenation of an
Packit 709fb3
     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
Packit 709fb3
  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
Packit 709fb3
  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
Packit 709fb3
  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
Packit 709fb3
  tree = create_tree (dfa, op, tree1, CONCAT);
Packit 709fb3
  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
Packit 709fb3
    {
Packit 709fb3
      *err = REG_ESPACE;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
Packit 709fb3
  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
Packit 709fb3
  return tree;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
Packit 709fb3
   nodes.  Requires a postorder visit.  */
Packit 709fb3
static reg_errcode_t
Packit 709fb3
calc_first (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = (re_dfa_t *) extra;
Packit 709fb3
  if (node->token.type == CONCAT)
Packit 709fb3
    {
Packit 709fb3
      node->first = node->left->first;
Packit 709fb3
      node->node_idx = node->left->node_idx;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      node->first = node;
Packit 709fb3
      node->node_idx = re_dfa_add_node (dfa, node->token);
Packit 709fb3
      if (BE (node->node_idx == -1, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      if (node->token.type == ANCHOR)
Packit 709fb3
	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
Packit 709fb3
static reg_errcode_t
Packit 709fb3
calc_next (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  switch (node->token.type)
Packit 709fb3
    {
Packit 709fb3
    case OP_DUP_ASTERISK:
Packit 709fb3
      node->left->next = node;
Packit 709fb3
      break;
Packit 709fb3
    case CONCAT:
Packit 709fb3
      node->left->next = node->right->first;
Packit 709fb3
      node->right->next = node->next;
Packit 709fb3
      break;
Packit 709fb3
    default:
Packit 709fb3
      if (node->left)
Packit 709fb3
	node->left->next = node->next;
Packit 709fb3
      if (node->right)
Packit 709fb3
	node->right->next = node->next;
Packit 709fb3
      break;
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
Packit 709fb3
static reg_errcode_t
Packit 709fb3
link_nfa_nodes (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = (re_dfa_t *) extra;
Packit 709fb3
  Idx idx = node->node_idx;
Packit 709fb3
  reg_errcode_t err = REG_NOERROR;
Packit 709fb3
Packit 709fb3
  switch (node->token.type)
Packit 709fb3
    {
Packit 709fb3
    case CONCAT:
Packit 709fb3
      break;
Packit 709fb3
Packit 709fb3
    case END_OF_RE:
Packit 709fb3
      assert (node->next == NULL);
Packit 709fb3
      break;
Packit 709fb3
Packit 709fb3
    case OP_DUP_ASTERISK:
Packit 709fb3
    case OP_ALT:
Packit 709fb3
      {
Packit 709fb3
	Idx left, right;
Packit 709fb3
	dfa->has_plural_match = 1;
Packit 709fb3
	if (node->left != NULL)
Packit 709fb3
	  left = node->left->first->node_idx;
Packit 709fb3
	else
Packit 709fb3
	  left = node->next->node_idx;
Packit 709fb3
	if (node->right != NULL)
Packit 709fb3
	  right = node->right->first->node_idx;
Packit 709fb3
	else
Packit 709fb3
	  right = node->next->node_idx;
Packit 709fb3
	assert (left > -1);
Packit 709fb3
	assert (right > -1);
Packit 709fb3
	err = re_node_set_init_2 (dfa->edests + idx, left, right);
Packit 709fb3
      }
Packit 709fb3
      break;
Packit 709fb3
Packit 709fb3
    case ANCHOR:
Packit 709fb3
    case OP_OPEN_SUBEXP:
Packit 709fb3
    case OP_CLOSE_SUBEXP:
Packit 709fb3
      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
Packit 709fb3
      break;
Packit 709fb3
Packit 709fb3
    case OP_BACK_REF:
Packit 709fb3
      dfa->nexts[idx] = node->next->node_idx;
Packit 709fb3
      if (node->token.type == OP_BACK_REF)
Packit 709fb3
	err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
Packit 709fb3
      break;
Packit 709fb3
Packit 709fb3
    default:
Packit 709fb3
      assert (!IS_EPSILON_NODE (node->token.type));
Packit 709fb3
      dfa->nexts[idx] = node->next->node_idx;
Packit 709fb3
      break;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return err;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Duplicate the epsilon closure of the node ROOT_NODE.
Packit 709fb3
   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
Packit 709fb3
   to their own constraint.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function
Packit 709fb3
duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
Packit 709fb3
			Idx root_node, unsigned int init_constraint)
Packit 709fb3
{
Packit 709fb3
  Idx org_node, clone_node;
Packit 709fb3
  bool ok;
Packit 709fb3
  unsigned int constraint = init_constraint;
Packit 709fb3
  for (org_node = top_org_node, clone_node = top_clone_node;;)
Packit 709fb3
    {
Packit 709fb3
      Idx org_dest, clone_dest;
Packit 709fb3
      if (dfa->nodes[org_node].type == OP_BACK_REF)
Packit 709fb3
	{
Packit 709fb3
	  /* If the back reference epsilon-transit, its destination must
Packit 709fb3
	     also have the constraint.  Then duplicate the epsilon closure
Packit 709fb3
	     of the destination of the back reference, and store it in
Packit 709fb3
	     edests of the back reference.  */
Packit 709fb3
	  org_dest = dfa->nexts[org_node];
Packit 709fb3
	  re_node_set_empty (dfa->edests + clone_node);
Packit 709fb3
	  clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit 709fb3
	  if (BE (clone_dest == -1, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	  dfa->nexts[clone_node] = dfa->nexts[org_node];
Packit 709fb3
	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit 709fb3
	  if (BE (! ok, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	}
Packit 709fb3
      else if (dfa->edests[org_node].nelem == 0)
Packit 709fb3
	{
Packit 709fb3
	  /* In case of the node can't epsilon-transit, don't duplicate the
Packit 709fb3
	     destination and store the original destination as the
Packit 709fb3
	     destination of the node.  */
Packit 709fb3
	  dfa->nexts[clone_node] = dfa->nexts[org_node];
Packit 709fb3
	  break;
Packit 709fb3
	}
Packit 709fb3
      else if (dfa->edests[org_node].nelem == 1)
Packit 709fb3
	{
Packit 709fb3
	  /* In case of the node can epsilon-transit, and it has only one
Packit 709fb3
	     destination.  */
Packit 709fb3
	  org_dest = dfa->edests[org_node].elems[0];
Packit 709fb3
	  re_node_set_empty (dfa->edests + clone_node);
Packit 709fb3
	  /* If the node is root_node itself, it means the epsilon closure
Packit 709fb3
	     has a loop.  Then tie it to the destination of the root_node.  */
Packit 709fb3
	  if (org_node == root_node && clone_node != org_node)
Packit 709fb3
	    {
Packit 709fb3
	      ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
Packit 709fb3
	      if (BE (! ok, 0))
Packit 709fb3
	        return REG_ESPACE;
Packit 709fb3
	      break;
Packit 709fb3
	    }
Packit 709fb3
	  /* In case the node has another constraint, append it.  */
Packit 709fb3
	  constraint |= dfa->nodes[org_node].constraint;
Packit 709fb3
	  clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit 709fb3
	  if (BE (clone_dest == -1, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit 709fb3
	  if (BE (! ok, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	}
Packit 709fb3
      else /* dfa->edests[org_node].nelem == 2 */
Packit 709fb3
	{
Packit 709fb3
	  /* In case of the node can epsilon-transit, and it has two
Packit 709fb3
	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
Packit 709fb3
	  org_dest = dfa->edests[org_node].elems[0];
Packit 709fb3
	  re_node_set_empty (dfa->edests + clone_node);
Packit 709fb3
	  /* Search for a duplicated node which satisfies the constraint.  */
Packit 709fb3
	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
Packit 709fb3
	  if (clone_dest == -1)
Packit 709fb3
	    {
Packit 709fb3
	      /* There is no such duplicated node, create a new one.  */
Packit 709fb3
	      reg_errcode_t err;
Packit 709fb3
	      clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit 709fb3
	      if (BE (clone_dest == -1, 0))
Packit 709fb3
		return REG_ESPACE;
Packit 709fb3
	      ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit 709fb3
	      if (BE (! ok, 0))
Packit 709fb3
		return REG_ESPACE;
Packit 709fb3
	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
Packit 709fb3
					    root_node, constraint);
Packit 709fb3
	      if (BE (err != REG_NOERROR, 0))
Packit 709fb3
		return err;
Packit 709fb3
	    }
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      /* There is a duplicated node which satisfies the constraint,
Packit 709fb3
		 use it to avoid infinite loop.  */
Packit 709fb3
	      ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit 709fb3
	      if (BE (! ok, 0))
Packit 709fb3
		return REG_ESPACE;
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  org_dest = dfa->edests[org_node].elems[1];
Packit 709fb3
	  clone_dest = duplicate_node (dfa, org_dest, constraint);
Packit 709fb3
	  if (BE (clone_dest == -1, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
Packit 709fb3
	  if (BE (! ok, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	}
Packit 709fb3
      org_node = org_dest;
Packit 709fb3
      clone_node = clone_dest;
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Search for a node which is duplicated from the node ORG_NODE, and
Packit 709fb3
   satisfies the constraint CONSTRAINT.  */
Packit 709fb3
Packit 709fb3
static Idx
Packit 709fb3
search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
Packit 709fb3
			unsigned int constraint)
Packit 709fb3
{
Packit 709fb3
  Idx idx;
Packit 709fb3
  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
Packit 709fb3
    {
Packit 709fb3
      if (org_node == dfa->org_indices[idx]
Packit 709fb3
	  && constraint == dfa->nodes[idx].constraint)
Packit 709fb3
	return idx; /* Found.  */
Packit 709fb3
    }
Packit 709fb3
  return -1; /* Not found.  */
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
Packit 709fb3
   Return the index of the new node, or -1 if insufficient storage is
Packit 709fb3
   available.  */
Packit 709fb3
Packit 709fb3
static Idx
Packit 709fb3
duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
Packit 709fb3
{
Packit 709fb3
  Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
Packit 709fb3
  if (BE (dup_idx != -1, 1))
Packit 709fb3
    {
Packit 709fb3
      dfa->nodes[dup_idx].constraint = constraint;
Packit 709fb3
      dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
Packit 709fb3
      dfa->nodes[dup_idx].duplicated = 1;
Packit 709fb3
Packit 709fb3
      /* Store the index of the original node.  */
Packit 709fb3
      dfa->org_indices[dup_idx] = org_idx;
Packit 709fb3
    }
Packit 709fb3
  return dup_idx;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
calc_inveclosure (re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  Idx src, idx;
Packit 709fb3
  bool ok;
Packit 709fb3
  for (idx = 0; idx < dfa->nodes_len; ++idx)
Packit 709fb3
    re_node_set_init_empty (dfa->inveclosures + idx);
Packit 709fb3
Packit 709fb3
  for (src = 0; src < dfa->nodes_len; ++src)
Packit 709fb3
    {
Packit 709fb3
      Idx *elems = dfa->eclosures[src].elems;
Packit 709fb3
      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
Packit 709fb3
	{
Packit 709fb3
	  ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
Packit 709fb3
	  if (BE (! ok, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Calculate "eclosure" for all the node in DFA.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
calc_eclosure (re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  Idx node_idx;
Packit 709fb3
  bool incomplete;
Packit 709fb3
#ifdef DEBUG
Packit 709fb3
  assert (dfa->nodes_len > 0);
Packit 709fb3
#endif
Packit 709fb3
  incomplete = false;
Packit 709fb3
  /* For each nodes, calculate epsilon closure.  */
Packit 709fb3
  for (node_idx = 0; ; ++node_idx)
Packit 709fb3
    {
Packit 709fb3
      reg_errcode_t err;
Packit 709fb3
      re_node_set eclosure_elem;
Packit 709fb3
      if (node_idx == dfa->nodes_len)
Packit 709fb3
	{
Packit 709fb3
	  if (!incomplete)
Packit 709fb3
	    break;
Packit 709fb3
	  incomplete = false;
Packit 709fb3
	  node_idx = 0;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
#ifdef DEBUG
Packit 709fb3
      assert (dfa->eclosures[node_idx].nelem != -1);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
      /* If we have already calculated, skip it.  */
Packit 709fb3
      if (dfa->eclosures[node_idx].nelem != 0)
Packit 709fb3
	continue;
Packit 709fb3
      /* Calculate epsilon closure of 'node_idx'.  */
Packit 709fb3
      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
Packit 709fb3
      if (BE (err != REG_NOERROR, 0))
Packit 709fb3
	return err;
Packit 709fb3
Packit 709fb3
      if (dfa->eclosures[node_idx].nelem == 0)
Packit 709fb3
	{
Packit 709fb3
	  incomplete = true;
Packit 709fb3
	  re_node_set_free (&eclosure_elem);
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Calculate epsilon closure of NODE.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t err;
Packit 709fb3
  Idx i;
Packit 709fb3
  re_node_set eclosure;
Packit 709fb3
  bool ok;
Packit 709fb3
  bool incomplete = false;
Packit 709fb3
  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    return err;
Packit 709fb3
Packit 709fb3
  /* This indicates that we are calculating this node now.
Packit 709fb3
     We reference this value to avoid infinite loop.  */
Packit 709fb3
  dfa->eclosures[node].nelem = -1;
Packit 709fb3
Packit 709fb3
  /* If the current node has constraints, duplicate all nodes
Packit 709fb3
     since they must inherit the constraints.  */
Packit 709fb3
  if (dfa->nodes[node].constraint
Packit 709fb3
      && dfa->edests[node].nelem
Packit 709fb3
      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
Packit 709fb3
    {
Packit 709fb3
      err = duplicate_node_closure (dfa, node, node, node,
Packit 709fb3
				    dfa->nodes[node].constraint);
Packit 709fb3
      if (BE (err != REG_NOERROR, 0))
Packit 709fb3
	return err;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Expand each epsilon destination nodes.  */
Packit 709fb3
  if (IS_EPSILON_NODE(dfa->nodes[node].type))
Packit 709fb3
    for (i = 0; i < dfa->edests[node].nelem; ++i)
Packit 709fb3
      {
Packit 709fb3
	re_node_set eclosure_elem;
Packit 709fb3
	Idx edest = dfa->edests[node].elems[i];
Packit 709fb3
	/* If calculating the epsilon closure of 'edest' is in progress,
Packit 709fb3
	   return intermediate result.  */
Packit 709fb3
	if (dfa->eclosures[edest].nelem == -1)
Packit 709fb3
	  {
Packit 709fb3
	    incomplete = true;
Packit 709fb3
	    continue;
Packit 709fb3
	  }
Packit 709fb3
	/* If we haven't calculated the epsilon closure of 'edest' yet,
Packit 709fb3
	   calculate now. Otherwise use calculated epsilon closure.  */
Packit 709fb3
	if (dfa->eclosures[edest].nelem == 0)
Packit 709fb3
	  {
Packit 709fb3
	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
Packit 709fb3
	    if (BE (err != REG_NOERROR, 0))
Packit 709fb3
	      return err;
Packit 709fb3
	  }
Packit 709fb3
	else
Packit 709fb3
	  eclosure_elem = dfa->eclosures[edest];
Packit 709fb3
	/* Merge the epsilon closure of 'edest'.  */
Packit 709fb3
	err = re_node_set_merge (&eclosure, &eclosure_elem);
Packit 709fb3
	if (BE (err != REG_NOERROR, 0))
Packit 709fb3
	  return err;
Packit 709fb3
	/* If the epsilon closure of 'edest' is incomplete,
Packit 709fb3
	   the epsilon closure of this node is also incomplete.  */
Packit 709fb3
	if (dfa->eclosures[edest].nelem == 0)
Packit 709fb3
	  {
Packit 709fb3
	    incomplete = true;
Packit 709fb3
	    re_node_set_free (&eclosure_elem);
Packit 709fb3
	  }
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
  /* An epsilon closure includes itself.  */
Packit 709fb3
  ok = re_node_set_insert (&eclosure, node);
Packit 709fb3
  if (BE (! ok, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
  if (incomplete && !root)
Packit 709fb3
    dfa->eclosures[node].nelem = 0;
Packit 709fb3
  else
Packit 709fb3
    dfa->eclosures[node] = eclosure;
Packit 709fb3
  *new_set = eclosure;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
/* Functions for token which are used in the parser.  */
Packit 709fb3
Packit 709fb3
/* Fetch a token from INPUT.
Packit 709fb3
   We must not use this function inside bracket expressions.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
Packit 709fb3
{
Packit 709fb3
  re_string_skip_bytes (input, peek_token (result, input, syntax));
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Peek a token from INPUT, and return the length of the token.
Packit 709fb3
   We must not use this function inside bracket expressions.  */
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
internal_function
Packit 709fb3
peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
Packit 709fb3
{
Packit 709fb3
  unsigned char c;
Packit 709fb3
Packit 709fb3
  if (re_string_eoi (input))
Packit 709fb3
    {
Packit 709fb3
      token->type = END_OF_RE;
Packit 709fb3
      return 0;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  c = re_string_peek_byte (input, 0);
Packit 709fb3
  token->opr.c = c;
Packit 709fb3
Packit 709fb3
  token->word_char = 0;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  token->mb_partial = 0;
Packit 709fb3
  if (input->mb_cur_max > 1 &&
Packit 709fb3
      !re_string_first_byte (input, re_string_cur_idx (input)))
Packit 709fb3
    {
Packit 709fb3
      token->type = CHARACTER;
Packit 709fb3
      token->mb_partial = 1;
Packit 709fb3
      return 1;
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
  if (c == '\\')
Packit 709fb3
    {
Packit 709fb3
      unsigned char c2;
Packit 709fb3
      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
Packit 709fb3
	{
Packit 709fb3
	  token->type = BACK_SLASH;
Packit 709fb3
	  return 1;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      c2 = re_string_peek_byte_case (input, 1);
Packit 709fb3
      token->opr.c = c2;
Packit 709fb3
      token->type = CHARACTER;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      if (input->mb_cur_max > 1)
Packit 709fb3
	{
Packit 709fb3
	  wint_t wc = re_string_wchar_at (input,
Packit 709fb3
					  re_string_cur_idx (input) + 1);
Packit 709fb3
	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
#endif
Packit 709fb3
	token->word_char = IS_WORD_CHAR (c2) != 0;
Packit 709fb3
Packit 709fb3
      switch (c2)
Packit 709fb3
	{
Packit 709fb3
	case '|':
Packit 709fb3
	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
Packit 709fb3
	    token->type = OP_ALT;
Packit 709fb3
	  break;
Packit 709fb3
	case '1': case '2': case '3': case '4': case '5':
Packit 709fb3
	case '6': case '7': case '8': case '9':
Packit 709fb3
	  if (!(syntax & RE_NO_BK_REFS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = OP_BACK_REF;
Packit 709fb3
	      token->opr.idx = c2 - '1';
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case '<':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = ANCHOR;
Packit 709fb3
	      token->opr.ctx_type = WORD_FIRST;
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case '>':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = ANCHOR;
Packit 709fb3
	      token->opr.ctx_type = WORD_LAST;
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case 'b':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = ANCHOR;
Packit 709fb3
	      token->opr.ctx_type = WORD_DELIM;
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case 'B':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = ANCHOR;
Packit 709fb3
	      token->opr.ctx_type = NOT_WORD_DELIM;
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case 'w':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    token->type = OP_WORD;
Packit 709fb3
	  break;
Packit 709fb3
	case 'W':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    token->type = OP_NOTWORD;
Packit 709fb3
	  break;
Packit 709fb3
	case 's':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    token->type = OP_SPACE;
Packit 709fb3
	  break;
Packit 709fb3
	case 'S':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    token->type = OP_NOTSPACE;
Packit 709fb3
	  break;
Packit 709fb3
	case '`':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = ANCHOR;
Packit 709fb3
	      token->opr.ctx_type = BUF_FIRST;
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case '\'':
Packit 709fb3
	  if (!(syntax & RE_NO_GNU_OPS))
Packit 709fb3
	    {
Packit 709fb3
	      token->type = ANCHOR;
Packit 709fb3
	      token->opr.ctx_type = BUF_LAST;
Packit 709fb3
	    }
Packit 709fb3
	  break;
Packit 709fb3
	case '(':
Packit 709fb3
	  if (!(syntax & RE_NO_BK_PARENS))
Packit 709fb3
	    token->type = OP_OPEN_SUBEXP;
Packit 709fb3
	  break;
Packit 709fb3
	case ')':
Packit 709fb3
	  if (!(syntax & RE_NO_BK_PARENS))
Packit 709fb3
	    token->type = OP_CLOSE_SUBEXP;
Packit 709fb3
	  break;
Packit 709fb3
	case '+':
Packit 709fb3
	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
Packit 709fb3
	    token->type = OP_DUP_PLUS;
Packit 709fb3
	  break;
Packit 709fb3
	case '?':
Packit 709fb3
	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
Packit 709fb3
	    token->type = OP_DUP_QUESTION;
Packit 709fb3
	  break;
Packit 709fb3
	case '{':
Packit 709fb3
	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
Packit 709fb3
	    token->type = OP_OPEN_DUP_NUM;
Packit 709fb3
	  break;
Packit 709fb3
	case '}':
Packit 709fb3
	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
Packit 709fb3
	    token->type = OP_CLOSE_DUP_NUM;
Packit 709fb3
	  break;
Packit 709fb3
	default:
Packit 709fb3
	  break;
Packit 709fb3
	}
Packit 709fb3
      return 2;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  token->type = CHARACTER;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (input->mb_cur_max > 1)
Packit 709fb3
    {
Packit 709fb3
      wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
Packit 709fb3
      token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
#endif
Packit 709fb3
    token->word_char = IS_WORD_CHAR (token->opr.c);
Packit 709fb3
Packit 709fb3
  switch (c)
Packit 709fb3
    {
Packit 709fb3
    case '\n':
Packit 709fb3
      if (syntax & RE_NEWLINE_ALT)
Packit 709fb3
	token->type = OP_ALT;
Packit 709fb3
      break;
Packit 709fb3
    case '|':
Packit 709fb3
      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
Packit 709fb3
	token->type = OP_ALT;
Packit 709fb3
      break;
Packit 709fb3
    case '*':
Packit 709fb3
      token->type = OP_DUP_ASTERISK;
Packit 709fb3
      break;
Packit 709fb3
    case '+':
Packit 709fb3
      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
Packit 709fb3
	token->type = OP_DUP_PLUS;
Packit 709fb3
      break;
Packit 709fb3
    case '?':
Packit 709fb3
      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
Packit 709fb3
	token->type = OP_DUP_QUESTION;
Packit 709fb3
      break;
Packit 709fb3
    case '{':
Packit 709fb3
      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
Packit 709fb3
	token->type = OP_OPEN_DUP_NUM;
Packit 709fb3
      break;
Packit 709fb3
    case '}':
Packit 709fb3
      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
Packit 709fb3
	token->type = OP_CLOSE_DUP_NUM;
Packit 709fb3
      break;
Packit 709fb3
    case '(':
Packit 709fb3
      if (syntax & RE_NO_BK_PARENS)
Packit 709fb3
	token->type = OP_OPEN_SUBEXP;
Packit 709fb3
      break;
Packit 709fb3
    case ')':
Packit 709fb3
      if (syntax & RE_NO_BK_PARENS)
Packit 709fb3
	token->type = OP_CLOSE_SUBEXP;
Packit 709fb3
      break;
Packit 709fb3
    case '[':
Packit 709fb3
      token->type = OP_OPEN_BRACKET;
Packit 709fb3
      break;
Packit 709fb3
    case '.':
Packit 709fb3
      token->type = OP_PERIOD;
Packit 709fb3
      break;
Packit 709fb3
    case '^':
Packit 709fb3
      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
Packit 709fb3
	  re_string_cur_idx (input) != 0)
Packit 709fb3
	{
Packit 709fb3
	  char prev = re_string_peek_byte (input, -1);
Packit 709fb3
	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
Packit 709fb3
	    break;
Packit 709fb3
	}
Packit 709fb3
      token->type = ANCHOR;
Packit 709fb3
      token->opr.ctx_type = LINE_FIRST;
Packit 709fb3
      break;
Packit 709fb3
    case '$':
Packit 709fb3
      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
Packit 709fb3
	  re_string_cur_idx (input) + 1 != re_string_length (input))
Packit 709fb3
	{
Packit 709fb3
	  re_token_t next;
Packit 709fb3
	  re_string_skip_bytes (input, 1);
Packit 709fb3
	  peek_token (&next, input, syntax);
Packit 709fb3
	  re_string_skip_bytes (input, -1);
Packit 709fb3
	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
Packit 709fb3
	    break;
Packit 709fb3
	}
Packit 709fb3
      token->type = ANCHOR;
Packit 709fb3
      token->opr.ctx_type = LINE_LAST;
Packit 709fb3
      break;
Packit 709fb3
    default:
Packit 709fb3
      break;
Packit 709fb3
    }
Packit 709fb3
  return 1;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Peek a token from INPUT, and return the length of the token.
Packit 709fb3
   We must not use this function out of bracket expressions.  */
Packit 709fb3
Packit 709fb3
static int
Packit 709fb3
internal_function
Packit 709fb3
peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
Packit 709fb3
{
Packit 709fb3
  unsigned char c;
Packit 709fb3
  if (re_string_eoi (input))
Packit 709fb3
    {
Packit 709fb3
      token->type = END_OF_RE;
Packit 709fb3
      return 0;
Packit 709fb3
    }
Packit 709fb3
  c = re_string_peek_byte (input, 0);
Packit 709fb3
  token->opr.c = c;
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (input->mb_cur_max > 1 &&
Packit 709fb3
      !re_string_first_byte (input, re_string_cur_idx (input)))
Packit 709fb3
    {
Packit 709fb3
      token->type = CHARACTER;
Packit 709fb3
      return 1;
Packit 709fb3
    }
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
Packit 709fb3
  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
Packit 709fb3
      && re_string_cur_idx (input) + 1 < re_string_length (input))
Packit 709fb3
    {
Packit 709fb3
      /* In this case, '\' escape a character.  */
Packit 709fb3
      unsigned char c2;
Packit 709fb3
      re_string_skip_bytes (input, 1);
Packit 709fb3
      c2 = re_string_peek_byte (input, 0);
Packit 709fb3
      token->opr.c = c2;
Packit 709fb3
      token->type = CHARACTER;
Packit 709fb3
      return 1;
Packit 709fb3
    }
Packit 709fb3
  if (c == '[') /* '[' is a special char in a bracket exps.  */
Packit 709fb3
    {
Packit 709fb3
      unsigned char c2;
Packit 709fb3
      int token_len;
Packit 709fb3
      if (re_string_cur_idx (input) + 1 < re_string_length (input))
Packit 709fb3
	c2 = re_string_peek_byte (input, 1);
Packit 709fb3
      else
Packit 709fb3
	c2 = 0;
Packit 709fb3
      token->opr.c = c2;
Packit 709fb3
      token_len = 2;
Packit 709fb3
      switch (c2)
Packit 709fb3
	{
Packit 709fb3
	case '.':
Packit 709fb3
	  token->type = OP_OPEN_COLL_ELEM;
Packit 709fb3
	  break;
Packit 709fb3
	case '=':
Packit 709fb3
	  token->type = OP_OPEN_EQUIV_CLASS;
Packit 709fb3
	  break;
Packit 709fb3
	case ':':
Packit 709fb3
	  if (syntax & RE_CHAR_CLASSES)
Packit 709fb3
	    {
Packit 709fb3
	      token->type = OP_OPEN_CHAR_CLASS;
Packit 709fb3
	      break;
Packit 709fb3
	    }
Packit 709fb3
	  /* else fall through.  */
Packit 709fb3
	default:
Packit 709fb3
	  token->type = CHARACTER;
Packit 709fb3
	  token->opr.c = c;
Packit 709fb3
	  token_len = 1;
Packit 709fb3
	  break;
Packit 709fb3
	}
Packit 709fb3
      return token_len;
Packit 709fb3
    }
Packit 709fb3
  switch (c)
Packit 709fb3
    {
Packit 709fb3
    case '-':
Packit 709fb3
      token->type = OP_CHARSET_RANGE;
Packit 709fb3
      break;
Packit 709fb3
    case ']':
Packit 709fb3
      token->type = OP_CLOSE_BRACKET;
Packit 709fb3
      break;
Packit 709fb3
    case '^':
Packit 709fb3
      token->type = OP_NON_MATCH_LIST;
Packit 709fb3
      break;
Packit 709fb3
    default:
Packit 709fb3
      token->type = CHARACTER;
Packit 709fb3
    }
Packit 709fb3
  return 1;
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
/* Functions for parser.  */
Packit 709fb3
Packit 709fb3
/* Entry point of the parser.
Packit 709fb3
   Parse the regular expression REGEXP and return the structure tree.
Packit 709fb3
   If an error occurs, ERR is set by error code, and return NULL.
Packit 709fb3
   This function build the following tree, from regular expression <reg_exp>:
Packit 709fb3
	   CAT
Packit 709fb3
	   / \
Packit 709fb3
	  /   \
Packit 709fb3
   <reg_exp>  EOR
Packit 709fb3
Packit 709fb3
   CAT means concatenation.
Packit 709fb3
   EOR means end of regular expression.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
Packit 709fb3
       reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  bin_tree_t *tree, *eor, *root;
Packit 709fb3
  re_token_t current_token;
Packit 709fb3
  dfa->syntax = syntax;
Packit 709fb3
  fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
Packit 709fb3
  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
Packit 709fb3
  if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
    return NULL;
Packit 709fb3
  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
Packit 709fb3
  if (tree != NULL)
Packit 709fb3
    root = create_tree (dfa, tree, eor, CONCAT);
Packit 709fb3
  else
Packit 709fb3
    root = eor;
Packit 709fb3
  if (BE (eor == NULL || root == NULL, 0))
Packit 709fb3
    {
Packit 709fb3
      *err = REG_ESPACE;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  return root;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function build the following tree, from regular expression
Packit 709fb3
   <branch1>|<branch2>:
Packit 709fb3
	   ALT
Packit 709fb3
	   / \
Packit 709fb3
	  /   \
Packit 709fb3
   <branch1> <branch2>
Packit 709fb3
Packit 709fb3
   ALT means alternative, which represents the operator '|'.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit 709fb3
	       reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  bin_tree_t *tree, *branch = NULL;
Packit 709fb3
  bitset_word_t initial_bkref_map = dfa->completed_bkref_map;
Packit 709fb3
  tree = parse_branch (regexp, preg, token, syntax, nest, err);
Packit 709fb3
  if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
    return NULL;
Packit 709fb3
Packit 709fb3
  while (token->type == OP_ALT)
Packit 709fb3
    {
Packit 709fb3
      fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
Packit 709fb3
      if (token->type != OP_ALT && token->type != END_OF_RE
Packit 709fb3
	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
Packit 709fb3
	{
Packit 709fb3
	  bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map;
Packit 709fb3
	  dfa->completed_bkref_map = initial_bkref_map;
Packit 709fb3
	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
Packit 709fb3
	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
Packit 709fb3
	    {
Packit 709fb3
	      if (tree != NULL)
Packit 709fb3
		postorder (tree, free_tree, NULL);
Packit 709fb3
	      return NULL;
Packit 709fb3
	    }
Packit 709fb3
	  dfa->completed_bkref_map |= accumulated_bkref_map;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	branch = NULL;
Packit 709fb3
      tree = create_tree (dfa, tree, branch, OP_ALT);
Packit 709fb3
      if (BE (tree == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESPACE;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
  return tree;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function build the following tree, from regular expression
Packit 709fb3
   <exp1><exp2>:
Packit 709fb3
	CAT
Packit 709fb3
	/ \
Packit 709fb3
       /   \
Packit 709fb3
   <exp1> <exp2>
Packit 709fb3
Packit 709fb3
   CAT means concatenation.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit 709fb3
	      reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  bin_tree_t *tree, *expr;
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  tree = parse_expression (regexp, preg, token, syntax, nest, err);
Packit 709fb3
  if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
    return NULL;
Packit 709fb3
Packit 709fb3
  while (token->type != OP_ALT && token->type != END_OF_RE
Packit 709fb3
	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
Packit 709fb3
    {
Packit 709fb3
      expr = parse_expression (regexp, preg, token, syntax, nest, err);
Packit 709fb3
      if (BE (*err != REG_NOERROR && expr == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  if (tree != NULL)
Packit 709fb3
	    postorder (tree, free_tree, NULL);
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      if (tree != NULL && expr != NULL)
Packit 709fb3
	{
Packit 709fb3
	  bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
Packit 709fb3
	  if (newtree == NULL)
Packit 709fb3
	    {
Packit 709fb3
	      postorder (expr, free_tree, NULL);
Packit 709fb3
	      postorder (tree, free_tree, NULL);
Packit 709fb3
	      *err = REG_ESPACE;
Packit 709fb3
	      return NULL;
Packit 709fb3
	    }
Packit 709fb3
	  tree = newtree;
Packit 709fb3
	}
Packit 709fb3
      else if (tree == NULL)
Packit 709fb3
	tree = expr;
Packit 709fb3
      /* Otherwise expr == NULL, we don't need to create new tree.  */
Packit 709fb3
    }
Packit 709fb3
  return tree;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function build the following tree, from regular expression a*:
Packit 709fb3
	 *
Packit 709fb3
	 |
Packit 709fb3
	 a
Packit 709fb3
*/
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit 709fb3
		  reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  bin_tree_t *tree;
Packit 709fb3
  switch (token->type)
Packit 709fb3
    {
Packit 709fb3
    case CHARACTER:
Packit 709fb3
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
      if (BE (tree == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESPACE;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      if (dfa->mb_cur_max > 1)
Packit 709fb3
	{
Packit 709fb3
	  while (!re_string_eoi (regexp)
Packit 709fb3
		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
Packit 709fb3
	    {
Packit 709fb3
	      bin_tree_t *mbc_remain;
Packit 709fb3
	      fetch_token (token, regexp, syntax);
Packit 709fb3
	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
Packit 709fb3
	      if (BE (mbc_remain == NULL || tree == NULL, 0))
Packit 709fb3
		{
Packit 709fb3
		  *err = REG_ESPACE;
Packit 709fb3
		  return NULL;
Packit 709fb3
		}
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
#endif
Packit 709fb3
      break;
Packit 709fb3
    case OP_OPEN_SUBEXP:
Packit 709fb3
      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
Packit 709fb3
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
	return NULL;
Packit 709fb3
      break;
Packit 709fb3
    case OP_OPEN_BRACKET:
Packit 709fb3
      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
Packit 709fb3
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
	return NULL;
Packit 709fb3
      break;
Packit 709fb3
    case OP_BACK_REF:
Packit 709fb3
      if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESUBREG;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      dfa->used_bkref_map |= 1 << token->opr.idx;
Packit 709fb3
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
      if (BE (tree == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESPACE;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      ++dfa->nbackref;
Packit 709fb3
      dfa->has_mb_node = 1;
Packit 709fb3
      break;
Packit 709fb3
    case OP_OPEN_DUP_NUM:
Packit 709fb3
      if (syntax & RE_CONTEXT_INVALID_DUP)
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_BADRPT;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      /* FALLTHROUGH */
Packit 709fb3
    case OP_DUP_ASTERISK:
Packit 709fb3
    case OP_DUP_PLUS:
Packit 709fb3
    case OP_DUP_QUESTION:
Packit 709fb3
      if (syntax & RE_CONTEXT_INVALID_OPS)
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_BADRPT;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      else if (syntax & RE_CONTEXT_INDEP_OPS)
Packit 709fb3
	{
Packit 709fb3
	  fetch_token (token, regexp, syntax);
Packit 709fb3
	  return parse_expression (regexp, preg, token, syntax, nest, err);
Packit 709fb3
	}
Packit 709fb3
      /* else fall through  */
Packit 709fb3
    case OP_CLOSE_SUBEXP:
Packit 709fb3
      if ((token->type == OP_CLOSE_SUBEXP) &&
Packit 709fb3
	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ERPAREN;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      /* else fall through  */
Packit 709fb3
    case OP_CLOSE_DUP_NUM:
Packit 709fb3
      /* We treat it as a normal character.  */
Packit 709fb3
Packit 709fb3
      /* Then we can these characters as normal characters.  */
Packit 709fb3
      token->type = CHARACTER;
Packit 709fb3
      /* mb_partial and word_char bits should be initialized already
Packit 709fb3
	 by peek_token.  */
Packit 709fb3
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
      if (BE (tree == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESPACE;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      break;
Packit 709fb3
    case ANCHOR:
Packit 709fb3
      if ((token->opr.ctx_type
Packit 709fb3
	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
Packit 709fb3
	  && dfa->word_ops_used == 0)
Packit 709fb3
	init_word_char (dfa);
Packit 709fb3
      if (token->opr.ctx_type == WORD_DELIM
Packit 709fb3
	  || token->opr.ctx_type == NOT_WORD_DELIM)
Packit 709fb3
	{
Packit 709fb3
	  bin_tree_t *tree_first, *tree_last;
Packit 709fb3
	  if (token->opr.ctx_type == WORD_DELIM)
Packit 709fb3
	    {
Packit 709fb3
	      token->opr.ctx_type = WORD_FIRST;
Packit 709fb3
	      tree_first = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
	      token->opr.ctx_type = WORD_LAST;
Packit 709fb3
	    }
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      token->opr.ctx_type = INSIDE_WORD;
Packit 709fb3
	      tree_first = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
	      token->opr.ctx_type = INSIDE_NOTWORD;
Packit 709fb3
	    }
Packit 709fb3
	  tree_last = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
Packit 709fb3
	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
Packit 709fb3
	    {
Packit 709fb3
	      *err = REG_ESPACE;
Packit 709fb3
	      return NULL;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  tree = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
	  if (BE (tree == NULL, 0))
Packit 709fb3
	    {
Packit 709fb3
	      *err = REG_ESPACE;
Packit 709fb3
	      return NULL;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      /* We must return here, since ANCHORs can't be followed
Packit 709fb3
	 by repetition operators.
Packit 709fb3
	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
Packit 709fb3
	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
Packit 709fb3
      fetch_token (token, regexp, syntax);
Packit 709fb3
      return tree;
Packit 709fb3
    case OP_PERIOD:
Packit 709fb3
      tree = create_token_tree (dfa, NULL, NULL, token);
Packit 709fb3
      if (BE (tree == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESPACE;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      if (dfa->mb_cur_max > 1)
Packit 709fb3
	dfa->has_mb_node = 1;
Packit 709fb3
      break;
Packit 709fb3
    case OP_WORD:
Packit 709fb3
    case OP_NOTWORD:
Packit 709fb3
      tree = build_charclass_op (dfa, regexp->trans,
Packit 709fb3
				 "alnum",
Packit 709fb3
				 "_",
Packit 709fb3
				 token->type == OP_NOTWORD, err);
Packit 709fb3
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
	return NULL;
Packit 709fb3
      break;
Packit 709fb3
    case OP_SPACE:
Packit 709fb3
    case OP_NOTSPACE:
Packit 709fb3
      tree = build_charclass_op (dfa, regexp->trans,
Packit 709fb3
				 "space",
Packit 709fb3
				 "",
Packit 709fb3
				 token->type == OP_NOTSPACE, err);
Packit 709fb3
      if (BE (*err != REG_NOERROR && tree == NULL, 0))
Packit 709fb3
	return NULL;
Packit 709fb3
      break;
Packit 709fb3
    case OP_ALT:
Packit 709fb3
    case END_OF_RE:
Packit 709fb3
      return NULL;
Packit 709fb3
    case BACK_SLASH:
Packit 709fb3
      *err = REG_EESCAPE;
Packit 709fb3
      return NULL;
Packit 709fb3
    default:
Packit 709fb3
      /* Must not happen?  */
Packit 709fb3
#ifdef DEBUG
Packit 709fb3
      assert (0);
Packit 709fb3
#endif
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  fetch_token (token, regexp, syntax);
Packit 709fb3
Packit 709fb3
  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
Packit 709fb3
	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
Packit 709fb3
    {
Packit 709fb3
      bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token,
Packit 709fb3
					   syntax, err);
Packit 709fb3
      if (BE (*err != REG_NOERROR && dup_tree == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  if (tree != NULL)
Packit 709fb3
	    postorder (tree, free_tree, NULL);
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
      tree = dup_tree;
Packit 709fb3
      /* In BRE consecutive duplications are not allowed.  */
Packit 709fb3
      if ((syntax & RE_CONTEXT_INVALID_DUP)
Packit 709fb3
	  && (token->type == OP_DUP_ASTERISK
Packit 709fb3
	      || token->type == OP_OPEN_DUP_NUM))
Packit 709fb3
	{
Packit 709fb3
	  if (tree != NULL)
Packit 709fb3
	    postorder (tree, free_tree, NULL);
Packit 709fb3
	  *err = REG_BADRPT;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return tree;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function build the following tree, from regular expression
Packit 709fb3
   (<reg_exp>):
Packit 709fb3
	 SUBEXP
Packit 709fb3
	    |
Packit 709fb3
	<reg_exp>
Packit 709fb3
*/
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
Packit 709fb3
	       reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  re_dfa_t *dfa = preg->buffer;
Packit 709fb3
  bin_tree_t *tree;
Packit 709fb3
  size_t cur_nsub;
Packit 709fb3
  cur_nsub = preg->re_nsub++;
Packit 709fb3
Packit 709fb3
  fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
Packit 709fb3
Packit 709fb3
  /* The subexpression may be a null string.  */
Packit 709fb3
  if (token->type == OP_CLOSE_SUBEXP)
Packit 709fb3
    tree = NULL;
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
Packit 709fb3
      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
Packit 709fb3
	{
Packit 709fb3
	  if (tree != NULL)
Packit 709fb3
	    postorder (tree, free_tree, NULL);
Packit 709fb3
	  *err = REG_EPAREN;
Packit 709fb3
	}
Packit 709fb3
      if (BE (*err != REG_NOERROR, 0))
Packit 709fb3
	return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  if (cur_nsub <= '9' - '1')
Packit 709fb3
    dfa->completed_bkref_map |= 1 << cur_nsub;
Packit 709fb3
Packit 709fb3
  tree = create_tree (dfa, tree, NULL, SUBEXP);
Packit 709fb3
  if (BE (tree == NULL, 0))
Packit 709fb3
    {
Packit 709fb3
      *err = REG_ESPACE;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  tree->token.opr.idx = cur_nsub;
Packit 709fb3
  return tree;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
Packit 709fb3
	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  bin_tree_t *tree = NULL, *old_tree = NULL;
Packit 709fb3
  Idx i, start, end, start_idx = re_string_cur_idx (regexp);
Packit 709fb3
  re_token_t start_token = *token;
Packit 709fb3
Packit 709fb3
  if (token->type == OP_OPEN_DUP_NUM)
Packit 709fb3
    {
Packit 709fb3
      end = 0;
Packit 709fb3
      start = fetch_number (regexp, token, syntax);
Packit 709fb3
      if (start == -1)
Packit 709fb3
	{
Packit 709fb3
	  if (token->type == CHARACTER && token->opr.c == ',')
Packit 709fb3
	    start = 0; /* We treat "{,m}" as "{0,m}".  */
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      *err = REG_BADBR; /* <re>{} is invalid.  */
Packit 709fb3
	      return NULL;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      if (BE (start != -2, 1))
Packit 709fb3
	{
Packit 709fb3
	  /* We treat "{n}" as "{n,n}".  */
Packit 709fb3
	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
Packit 709fb3
		 : ((token->type == CHARACTER && token->opr.c == ',')
Packit 709fb3
		    ? fetch_number (regexp, token, syntax) : -2));
Packit 709fb3
	}
Packit 709fb3
      if (BE (start == -2 || end == -2, 0))
Packit 709fb3
	{
Packit 709fb3
	  /* Invalid sequence.  */
Packit 709fb3
	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
Packit 709fb3
	    {
Packit 709fb3
	      if (token->type == END_OF_RE)
Packit 709fb3
		*err = REG_EBRACE;
Packit 709fb3
	      else
Packit 709fb3
		*err = REG_BADBR;
Packit 709fb3
Packit 709fb3
	      return NULL;
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  /* If the syntax bit is set, rollback.  */
Packit 709fb3
	  re_string_set_index (regexp, start_idx);
Packit 709fb3
	  *token = start_token;
Packit 709fb3
	  token->type = CHARACTER;
Packit 709fb3
	  /* mb_partial and word_char bits should be already initialized by
Packit 709fb3
	     peek_token.  */
Packit 709fb3
	  return elem;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      if (BE ((end != -1 && start > end)
Packit 709fb3
	      || token->type != OP_CLOSE_DUP_NUM, 0))
Packit 709fb3
	{
Packit 709fb3
	  /* First number greater than second.  */
Packit 709fb3
	  *err = REG_BADBR;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      if (BE (RE_DUP_MAX < (end == -1 ? start : end), 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_ESIZE;
Packit 709fb3
	  return NULL;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
Packit 709fb3
      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  fetch_token (token, regexp, syntax);
Packit 709fb3
Packit 709fb3
  if (BE (elem == NULL, 0))
Packit 709fb3
    return NULL;
Packit 709fb3
  if (BE (start == 0 && end == 0, 0))
Packit 709fb3
    {
Packit 709fb3
      postorder (elem, free_tree, NULL);
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
Packit 709fb3
  if (BE (start > 0, 0))
Packit 709fb3
    {
Packit 709fb3
      tree = elem;
Packit 709fb3
      for (i = 2; i <= start; ++i)
Packit 709fb3
	{
Packit 709fb3
	  elem = duplicate_tree (elem, dfa);
Packit 709fb3
	  tree = create_tree (dfa, tree, elem, CONCAT);
Packit 709fb3
	  if (BE (elem == NULL || tree == NULL, 0))
Packit 709fb3
	    goto parse_dup_op_espace;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      if (start == end)
Packit 709fb3
	return tree;
Packit 709fb3
Packit 709fb3
      /* Duplicate ELEM before it is marked optional.  */
Packit 709fb3
      elem = duplicate_tree (elem, dfa);
Packit 709fb3
      if (BE (elem == NULL, 0))
Packit 709fb3
        goto parse_dup_op_espace;
Packit 709fb3
      old_tree = tree;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    old_tree = NULL;
Packit 709fb3
Packit 709fb3
  if (elem->token.type == SUBEXP)
Packit 709fb3
    {
Packit 709fb3
      uintptr_t subidx = elem->token.opr.idx;
Packit 709fb3
      postorder (elem, mark_opt_subexp, (void *) subidx);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  tree = create_tree (dfa, elem, NULL,
Packit 709fb3
		      (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
Packit 709fb3
  if (BE (tree == NULL, 0))
Packit 709fb3
    goto parse_dup_op_espace;
Packit 709fb3
Packit 709fb3
/* From gnulib's "intprops.h":
Packit 709fb3
   True if the arithmetic type T is signed.  */
Packit 709fb3
#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
Packit 709fb3
Packit 709fb3
  /* This loop is actually executed only when end != -1,
Packit 709fb3
     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
Packit 709fb3
     already created the start+1-th copy.  */
Packit 709fb3
  if (TYPE_SIGNED (Idx) || end != -1)
Packit 709fb3
    for (i = start + 2; i <= end; ++i)
Packit 709fb3
      {
Packit 709fb3
	elem = duplicate_tree (elem, dfa);
Packit 709fb3
	tree = create_tree (dfa, tree, elem, CONCAT);
Packit 709fb3
	if (BE (elem == NULL || tree == NULL, 0))
Packit 709fb3
	  goto parse_dup_op_espace;
Packit 709fb3
Packit 709fb3
	tree = create_tree (dfa, tree, NULL, OP_ALT);
Packit 709fb3
	if (BE (tree == NULL, 0))
Packit 709fb3
	  goto parse_dup_op_espace;
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
  if (old_tree)
Packit 709fb3
    tree = create_tree (dfa, old_tree, tree, CONCAT);
Packit 709fb3
Packit 709fb3
  return tree;
Packit 709fb3
Packit 709fb3
 parse_dup_op_espace:
Packit 709fb3
  *err = REG_ESPACE;
Packit 709fb3
  return NULL;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Size of the names for collating symbol/equivalence_class/character_class.
Packit 709fb3
   I'm not sure, but maybe enough.  */
Packit 709fb3
#define BRACKET_NAME_BUF_SIZE 32
Packit 709fb3
Packit 709fb3
#ifndef _LIBC
Packit 709fb3
Packit 709fb3
# ifdef RE_ENABLE_I18N
Packit 709fb3
/* Convert the byte B to the corresponding wide character.  In a
Packit 709fb3
   unibyte locale, treat B as itself if it is an encoding error.
Packit 709fb3
   In a multibyte locale, return WEOF if B is an encoding error.  */
Packit 709fb3
static wint_t
Packit 709fb3
parse_byte (unsigned char b, re_charset_t *mbcset)
Packit 709fb3
{
Packit 709fb3
  wint_t wc = __btowc (b);
Packit 709fb3
  return wc == WEOF && !mbcset ? b : wc;
Packit 709fb3
}
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
Packit 709fb3
     Build the range expression which starts from START_ELEM, and ends
Packit 709fb3
     at END_ELEM.  The result are written to MBCSET and SBCSET.
Packit 709fb3
     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
Packit 709fb3
     mbcset->range_ends, is a pointer argument since we may
Packit 709fb3
     update it.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function
Packit 709fb3
# ifdef RE_ENABLE_I18N
Packit 709fb3
build_range_exp (const reg_syntax_t syntax,
Packit 709fb3
                 bitset_t sbcset,
Packit 709fb3
                 re_charset_t *mbcset,
Packit 709fb3
                 Idx *range_alloc,
Packit 709fb3
                 const bracket_elem_t *start_elem,
Packit 709fb3
                 const bracket_elem_t *end_elem)
Packit 709fb3
# else /* not RE_ENABLE_I18N */
Packit 709fb3
build_range_exp (const reg_syntax_t syntax,
Packit 709fb3
                 bitset_t sbcset,
Packit 709fb3
                 const bracket_elem_t *start_elem,
Packit 709fb3
                 const bracket_elem_t *end_elem)
Packit 709fb3
# endif /* not RE_ENABLE_I18N */
Packit 709fb3
{
Packit 709fb3
  unsigned int start_ch, end_ch;
Packit 709fb3
  /* Equivalence Classes and Character Classes can't be a range start/end.  */
Packit 709fb3
  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
Packit 709fb3
	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
Packit 709fb3
	  0))
Packit 709fb3
    return REG_ERANGE;
Packit 709fb3
Packit 709fb3
  /* We can handle no multi character collating elements without libc
Packit 709fb3
     support.  */
Packit 709fb3
  if (BE ((start_elem->type == COLL_SYM
Packit 709fb3
	   && strlen ((char *) start_elem->opr.name) > 1)
Packit 709fb3
	  || (end_elem->type == COLL_SYM
Packit 709fb3
	      && strlen ((char *) end_elem->opr.name) > 1), 0))
Packit 709fb3
    return REG_ECOLLATE;
Packit 709fb3
Packit 709fb3
# ifdef RE_ENABLE_I18N
Packit 709fb3
  {
Packit 709fb3
    wchar_t wc;
Packit 709fb3
    wint_t start_wc;
Packit 709fb3
    wint_t end_wc;
Packit 709fb3
Packit 709fb3
    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
Packit 709fb3
		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
Packit 709fb3
		   : 0));
Packit 709fb3
    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
Packit 709fb3
	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
Packit 709fb3
		 : 0));
Packit 709fb3
    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
Packit 709fb3
		? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
Packit 709fb3
    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
Packit 709fb3
	      ? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
Packit 709fb3
    if (start_wc == WEOF || end_wc == WEOF)
Packit 709fb3
      return REG_ECOLLATE;
Packit 709fb3
    else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
Packit 709fb3
      return REG_ERANGE;
Packit 709fb3
Packit 709fb3
    /* Got valid collation sequence values, add them as a new entry.
Packit 709fb3
       However, for !_LIBC we have no collation elements: if the
Packit 709fb3
       character set is single byte, the single byte character set
Packit 709fb3
       that we build below suffices.  parse_bracket_exp passes
Packit 709fb3
       no MBCSET if dfa->mb_cur_max == 1.  */
Packit 709fb3
    if (mbcset)
Packit 709fb3
      {
Packit 709fb3
	/* Check the space of the arrays.  */
Packit 709fb3
	if (BE (*range_alloc == mbcset->nranges, 0))
Packit 709fb3
	  {
Packit 709fb3
	    /* There is not enough space, need realloc.  */
Packit 709fb3
	    wchar_t *new_array_start, *new_array_end;
Packit 709fb3
	    Idx new_nranges;
Packit 709fb3
Packit 709fb3
	    /* +1 in case of mbcset->nranges is 0.  */
Packit 709fb3
	    new_nranges = 2 * mbcset->nranges + 1;
Packit 709fb3
	    /* Use realloc since mbcset->range_starts and mbcset->range_ends
Packit 709fb3
	       are NULL if *range_alloc == 0.  */
Packit 709fb3
	    new_array_start = re_realloc (mbcset->range_starts, wchar_t,
Packit 709fb3
					  new_nranges);
Packit 709fb3
	    new_array_end = re_realloc (mbcset->range_ends, wchar_t,
Packit 709fb3
					new_nranges);
Packit 709fb3
Packit 709fb3
	    if (BE (new_array_start == NULL || new_array_end == NULL, 0))
Packit 709fb3
	      {
Packit 709fb3
		re_free (new_array_start);
Packit 709fb3
		re_free (new_array_end);
Packit 709fb3
		return REG_ESPACE;
Packit 709fb3
	      }
Packit 709fb3
Packit 709fb3
	    mbcset->range_starts = new_array_start;
Packit 709fb3
	    mbcset->range_ends = new_array_end;
Packit 709fb3
	    *range_alloc = new_nranges;
Packit 709fb3
	  }
Packit 709fb3
Packit 709fb3
	mbcset->range_starts[mbcset->nranges] = start_wc;
Packit 709fb3
	mbcset->range_ends[mbcset->nranges++] = end_wc;
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
    /* Build the table for single byte characters.  */
Packit 709fb3
    for (wc = 0; wc < SBC_MAX; ++wc)
Packit 709fb3
      {
Packit 709fb3
	if (start_wc <= wc && wc <= end_wc)
Packit 709fb3
	  bitset_set (sbcset, wc);
Packit 709fb3
      }
Packit 709fb3
  }
Packit 709fb3
# else /* not RE_ENABLE_I18N */
Packit 709fb3
  {
Packit 709fb3
    unsigned int ch;
Packit 709fb3
    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
Packit 709fb3
		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
Packit 709fb3
		   : 0));
Packit 709fb3
    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
Packit 709fb3
	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
Packit 709fb3
		 : 0));
Packit 709fb3
    if (start_ch > end_ch)
Packit 709fb3
      return REG_ERANGE;
Packit 709fb3
    /* Build the table for single byte characters.  */
Packit 709fb3
    for (ch = 0; ch < SBC_MAX; ++ch)
Packit 709fb3
      if (start_ch <= ch  && ch <= end_ch)
Packit 709fb3
	bitset_set (sbcset, ch);
Packit 709fb3
  }
Packit 709fb3
# endif /* not RE_ENABLE_I18N */
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
#endif /* not _LIBC */
Packit 709fb3
Packit 709fb3
#ifndef _LIBC
Packit 709fb3
/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
Packit 709fb3
   Build the collating element which is represented by NAME.
Packit 709fb3
   The result are written to MBCSET and SBCSET.
Packit 709fb3
   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
Packit 709fb3
   pointer argument since we may update it.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function
Packit 709fb3
# ifdef RE_ENABLE_I18N
Packit 709fb3
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Packit 709fb3
			Idx *coll_sym_alloc, const unsigned char *name)
Packit 709fb3
# else /* not RE_ENABLE_I18N */
Packit 709fb3
build_collating_symbol (bitset_t sbcset, const unsigned char *name)
Packit 709fb3
# endif /* not RE_ENABLE_I18N */
Packit 709fb3
{
Packit 709fb3
  size_t name_len = strlen ((const char *) name);
Packit 709fb3
  if (BE (name_len != 1, 0))
Packit 709fb3
    return REG_ECOLLATE;
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      bitset_set (sbcset, name[0]);
Packit 709fb3
      return REG_NOERROR;
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3
#endif /* not _LIBC */
Packit 709fb3
Packit 709fb3
/* This function parse bracket expression like "[abc]", "[a-c]",
Packit 709fb3
   "[[.a-a.]]" etc.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
Packit 709fb3
		   reg_syntax_t syntax, reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
  const unsigned char *collseqmb;
Packit 709fb3
  const char *collseqwc;
Packit 709fb3
  uint32_t nrules;
Packit 709fb3
  int32_t table_size;
Packit 709fb3
  const int32_t *symb_table;
Packit 709fb3
  const unsigned char *extra;
Packit 709fb3
Packit 709fb3
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit 709fb3
     Seek the collating symbol entry corresponding to NAME.
Packit 709fb3
     Return the index of the symbol in the SYMB_TABLE,
Packit 709fb3
     or -1 if not found.  */
Packit 709fb3
Packit 709fb3
  auto inline int32_t
Packit 709fb3
  __attribute__ ((always_inline))
Packit 709fb3
  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
Packit 709fb3
    {
Packit 709fb3
      int32_t elem;
Packit 709fb3
Packit 709fb3
      for (elem = 0; elem < table_size; elem++)
Packit 709fb3
	if (symb_table[2 * elem] != 0)
Packit 709fb3
	  {
Packit 709fb3
	    int32_t idx = symb_table[2 * elem + 1];
Packit 709fb3
	    /* Skip the name of collating element name.  */
Packit 709fb3
	    idx += 1 + extra[idx];
Packit 709fb3
	    if (/* Compare the length of the name.  */
Packit 709fb3
		name_len == extra[idx]
Packit 709fb3
		/* Compare the name.  */
Packit 709fb3
		&& memcmp (name, &extra[idx + 1], name_len) == 0)
Packit 709fb3
	      /* Yep, this is the entry.  */
Packit 709fb3
	      return elem;
Packit 709fb3
	  }
Packit 709fb3
      return -1;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit 709fb3
     Look up the collation sequence value of BR_ELEM.
Packit 709fb3
     Return the value if succeeded, UINT_MAX otherwise.  */
Packit 709fb3
Packit 709fb3
  auto inline unsigned int
Packit 709fb3
  __attribute__ ((always_inline))
Packit 709fb3
  lookup_collation_sequence_value (bracket_elem_t *br_elem)
Packit 709fb3
    {
Packit 709fb3
      if (br_elem->type == SB_CHAR)
Packit 709fb3
	{
Packit 709fb3
	  /*
Packit 709fb3
	  if (MB_CUR_MAX == 1)
Packit 709fb3
	  */
Packit 709fb3
	  if (nrules == 0)
Packit 709fb3
	    return collseqmb[br_elem->opr.ch];
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      wint_t wc = __btowc (br_elem->opr.ch);
Packit 709fb3
	      return __collseq_table_lookup (collseqwc, wc);
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      else if (br_elem->type == MB_CHAR)
Packit 709fb3
	{
Packit 709fb3
	  if (nrules != 0)
Packit 709fb3
	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
Packit 709fb3
	}
Packit 709fb3
      else if (br_elem->type == COLL_SYM)
Packit 709fb3
	{
Packit 709fb3
	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
Packit 709fb3
	  if (nrules != 0)
Packit 709fb3
	    {
Packit 709fb3
	      int32_t elem, idx;
Packit 709fb3
	      elem = seek_collating_symbol_entry (br_elem->opr.name,
Packit 709fb3
						  sym_name_len);
Packit 709fb3
	      if (elem != -1)
Packit 709fb3
		{
Packit 709fb3
		  /* We found the entry.  */
Packit 709fb3
		  idx = symb_table[2 * elem + 1];
Packit 709fb3
		  /* Skip the name of collating element name.  */
Packit 709fb3
		  idx += 1 + extra[idx];
Packit 709fb3
		  /* Skip the byte sequence of the collating element.  */
Packit 709fb3
		  idx += 1 + extra[idx];
Packit 709fb3
		  /* Adjust for the alignment.  */
Packit 709fb3
		  idx = (idx + 3) & ~3;
Packit 709fb3
		  /* Skip the multibyte collation sequence value.  */
Packit 709fb3
		  idx += sizeof (unsigned int);
Packit 709fb3
		  /* Skip the wide char sequence of the collating element.  */
Packit 709fb3
		  idx += sizeof (unsigned int) *
Packit 709fb3
		    (1 + *(unsigned int *) (extra + idx));
Packit 709fb3
		  /* Return the collation sequence value.  */
Packit 709fb3
		  return *(unsigned int *) (extra + idx);
Packit 709fb3
		}
Packit 709fb3
	      else if (sym_name_len == 1)
Packit 709fb3
		{
Packit 709fb3
		  /* No valid character.  Match it as a single byte
Packit 709fb3
		     character.  */
Packit 709fb3
		  return collseqmb[br_elem->opr.name[0]];
Packit 709fb3
		}
Packit 709fb3
	    }
Packit 709fb3
	  else if (sym_name_len == 1)
Packit 709fb3
	    return collseqmb[br_elem->opr.name[0]];
Packit 709fb3
	}
Packit 709fb3
      return UINT_MAX;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit 709fb3
     Build the range expression which starts from START_ELEM, and ends
Packit 709fb3
     at END_ELEM.  The result are written to MBCSET and SBCSET.
Packit 709fb3
     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
Packit 709fb3
     mbcset->range_ends, is a pointer argument since we may
Packit 709fb3
     update it.  */
Packit 709fb3
Packit 709fb3
  auto inline reg_errcode_t
Packit 709fb3
  __attribute__ ((always_inline))
Packit 709fb3
  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
Packit 709fb3
		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
Packit 709fb3
    {
Packit 709fb3
      unsigned int ch;
Packit 709fb3
      uint32_t start_collseq;
Packit 709fb3
      uint32_t end_collseq;
Packit 709fb3
Packit 709fb3
      /* Equivalence Classes and Character Classes can't be a range
Packit 709fb3
	 start/end.  */
Packit 709fb3
      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
Packit 709fb3
	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
Packit 709fb3
	      0))
Packit 709fb3
	return REG_ERANGE;
Packit 709fb3
Packit 709fb3
      /* FIXME: Implement rational ranges here, too.  */
Packit 709fb3
      start_collseq = lookup_collation_sequence_value (start_elem);
Packit 709fb3
      end_collseq = lookup_collation_sequence_value (end_elem);
Packit 709fb3
      /* Check start/end collation sequence values.  */
Packit 709fb3
      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
Packit 709fb3
	return REG_ECOLLATE;
Packit 709fb3
      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
Packit 709fb3
	return REG_ERANGE;
Packit 709fb3
Packit 709fb3
      /* Got valid collation sequence values, add them as a new entry.
Packit 709fb3
	 However, if we have no collation elements, and the character set
Packit 709fb3
	 is single byte, the single byte character set that we
Packit 709fb3
	 build below suffices. */
Packit 709fb3
      if (nrules > 0 || dfa->mb_cur_max > 1)
Packit 709fb3
	{
Packit 709fb3
	  /* Check the space of the arrays.  */
Packit 709fb3
	  if (BE (*range_alloc == mbcset->nranges, 0))
Packit 709fb3
	    {
Packit 709fb3
	      /* There is not enough space, need realloc.  */
Packit 709fb3
	      uint32_t *new_array_start;
Packit 709fb3
	      uint32_t *new_array_end;
Packit 709fb3
	      Idx new_nranges;
Packit 709fb3
Packit 709fb3
	      /* +1 in case of mbcset->nranges is 0.  */
Packit 709fb3
	      new_nranges = 2 * mbcset->nranges + 1;
Packit 709fb3
	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
Packit 709fb3
					    new_nranges);
Packit 709fb3
	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
Packit 709fb3
					  new_nranges);
Packit 709fb3
Packit 709fb3
	      if (BE (new_array_start == NULL || new_array_end == NULL, 0))
Packit 709fb3
		return REG_ESPACE;
Packit 709fb3
Packit 709fb3
	      mbcset->range_starts = new_array_start;
Packit 709fb3
	      mbcset->range_ends = new_array_end;
Packit 709fb3
	      *range_alloc = new_nranges;
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  mbcset->range_starts[mbcset->nranges] = start_collseq;
Packit 709fb3
	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      /* Build the table for single byte characters.  */
Packit 709fb3
      for (ch = 0; ch < SBC_MAX; ch++)
Packit 709fb3
	{
Packit 709fb3
	  uint32_t ch_collseq;
Packit 709fb3
	  /*
Packit 709fb3
	  if (MB_CUR_MAX == 1)
Packit 709fb3
	  */
Packit 709fb3
	  if (nrules == 0)
Packit 709fb3
	    ch_collseq = collseqmb[ch];
Packit 709fb3
	  else
Packit 709fb3
	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
Packit 709fb3
	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
Packit 709fb3
	    bitset_set (sbcset, ch);
Packit 709fb3
	}
Packit 709fb3
      return REG_NOERROR;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Local function for parse_bracket_exp used in _LIBC environment.
Packit 709fb3
     Build the collating element which is represented by NAME.
Packit 709fb3
     The result are written to MBCSET and SBCSET.
Packit 709fb3
     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
Packit 709fb3
     pointer argument since we may update it.  */
Packit 709fb3
Packit 709fb3
  auto inline reg_errcode_t
Packit 709fb3
  __attribute__ ((always_inline))
Packit 709fb3
  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Packit 709fb3
			  Idx *coll_sym_alloc, const unsigned char *name)
Packit 709fb3
    {
Packit 709fb3
      int32_t elem, idx;
Packit 709fb3
      size_t name_len = strlen ((const char *) name);
Packit 709fb3
      if (nrules != 0)
Packit 709fb3
	{
Packit 709fb3
	  elem = seek_collating_symbol_entry (name, name_len);
Packit 709fb3
	  if (elem != -1)
Packit 709fb3
	    {
Packit 709fb3
	      /* We found the entry.  */
Packit 709fb3
	      idx = symb_table[2 * elem + 1];
Packit 709fb3
	      /* Skip the name of collating element name.  */
Packit 709fb3
	      idx += 1 + extra[idx];
Packit 709fb3
	    }
Packit 709fb3
	  else if (name_len == 1)
Packit 709fb3
	    {
Packit 709fb3
	      /* No valid character, treat it as a normal
Packit 709fb3
		 character.  */
Packit 709fb3
	      bitset_set (sbcset, name[0]);
Packit 709fb3
	      return REG_NOERROR;
Packit 709fb3
	    }
Packit 709fb3
	  else
Packit 709fb3
	    return REG_ECOLLATE;
Packit 709fb3
Packit 709fb3
	  /* Got valid collation sequence, add it as a new entry.  */
Packit 709fb3
	  /* Check the space of the arrays.  */
Packit 709fb3
	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
Packit 709fb3
	    {
Packit 709fb3
	      /* Not enough, realloc it.  */
Packit 709fb3
	      /* +1 in case of mbcset->ncoll_syms is 0.  */
Packit 709fb3
	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
Packit 709fb3
	      /* Use realloc since mbcset->coll_syms is NULL
Packit 709fb3
		 if *alloc == 0.  */
Packit 709fb3
	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
Packit 709fb3
						   new_coll_sym_alloc);
Packit 709fb3
	      if (BE (new_coll_syms == NULL, 0))
Packit 709fb3
		return REG_ESPACE;
Packit 709fb3
	      mbcset->coll_syms = new_coll_syms;
Packit 709fb3
	      *coll_sym_alloc = new_coll_sym_alloc;
Packit 709fb3
	    }
Packit 709fb3
	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
Packit 709fb3
	  return REG_NOERROR;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  if (BE (name_len != 1, 0))
Packit 709fb3
	    return REG_ECOLLATE;
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      bitset_set (sbcset, name[0]);
Packit 709fb3
	      return REG_NOERROR;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  re_token_t br_token;
Packit 709fb3
  re_bitset_ptr_t sbcset;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  re_charset_t *mbcset;
Packit 709fb3
  Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
Packit 709fb3
  Idx equiv_class_alloc = 0, char_class_alloc = 0;
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
  bool non_match = false;
Packit 709fb3
  bin_tree_t *work_tree;
Packit 709fb3
  int token_len;
Packit 709fb3
  bool first_round = true;
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
  collseqmb = (const unsigned char *)
Packit 709fb3
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
Packit 709fb3
  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
Packit 709fb3
  if (nrules)
Packit 709fb3
    {
Packit 709fb3
      /*
Packit 709fb3
      if (MB_CUR_MAX > 1)
Packit 709fb3
      */
Packit 709fb3
      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
Packit 709fb3
      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
Packit 709fb3
      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
Packit 709fb3
						  _NL_COLLATE_SYMB_TABLEMB);
Packit 709fb3
      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
Packit 709fb3
						   _NL_COLLATE_SYMB_EXTRAMB);
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (BE (sbcset == NULL || mbcset == NULL, 0))
Packit 709fb3
#else
Packit 709fb3
  if (BE (sbcset == NULL, 0))
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
    {
Packit 709fb3
      re_free (sbcset);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      re_free (mbcset);
Packit 709fb3
#endif
Packit 709fb3
      *err = REG_ESPACE;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  token_len = peek_token_bracket (token, regexp, syntax);
Packit 709fb3
  if (BE (token->type == END_OF_RE, 0))
Packit 709fb3
    {
Packit 709fb3
      *err = REG_BADPAT;
Packit 709fb3
      goto parse_bracket_exp_free_return;
Packit 709fb3
    }
Packit 709fb3
  if (token->type == OP_NON_MATCH_LIST)
Packit 709fb3
    {
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      mbcset->non_match = 1;
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
      non_match = true;
Packit 709fb3
      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
Packit 709fb3
	bitset_set (sbcset, '\n');
Packit 709fb3
      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
Packit 709fb3
      token_len = peek_token_bracket (token, regexp, syntax);
Packit 709fb3
      if (BE (token->type == END_OF_RE, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_BADPAT;
Packit 709fb3
	  goto parse_bracket_exp_free_return;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* We treat the first ']' as a normal character.  */
Packit 709fb3
  if (token->type == OP_CLOSE_BRACKET)
Packit 709fb3
    token->type = CHARACTER;
Packit 709fb3
Packit 709fb3
  while (1)
Packit 709fb3
    {
Packit 709fb3
      bracket_elem_t start_elem, end_elem;
Packit 709fb3
      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
Packit 709fb3
      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
Packit 709fb3
      reg_errcode_t ret;
Packit 709fb3
      int token_len2 = 0;
Packit 709fb3
      bool is_range_exp = false;
Packit 709fb3
      re_token_t token2;
Packit 709fb3
Packit 709fb3
      start_elem.opr.name = start_name_buf;
Packit 709fb3
      start_elem.type = COLL_SYM;
Packit 709fb3
      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
Packit 709fb3
				   syntax, first_round);
Packit 709fb3
      if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = ret;
Packit 709fb3
	  goto parse_bracket_exp_free_return;
Packit 709fb3
	}
Packit 709fb3
      first_round = false;
Packit 709fb3
Packit 709fb3
      /* Get information about the next token.  We need it in any case.  */
Packit 709fb3
      token_len = peek_token_bracket (token, regexp, syntax);
Packit 709fb3
Packit 709fb3
      /* Do not check for ranges if we know they are not allowed.  */
Packit 709fb3
      if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
Packit 709fb3
	{
Packit 709fb3
	  if (BE (token->type == END_OF_RE, 0))
Packit 709fb3
	    {
Packit 709fb3
	      *err = REG_EBRACK;
Packit 709fb3
	      goto parse_bracket_exp_free_return;
Packit 709fb3
	    }
Packit 709fb3
	  if (token->type == OP_CHARSET_RANGE)
Packit 709fb3
	    {
Packit 709fb3
	      re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
Packit 709fb3
	      token_len2 = peek_token_bracket (&token2, regexp, syntax);
Packit 709fb3
	      if (BE (token2.type == END_OF_RE, 0))
Packit 709fb3
		{
Packit 709fb3
		  *err = REG_EBRACK;
Packit 709fb3
		  goto parse_bracket_exp_free_return;
Packit 709fb3
		}
Packit 709fb3
	      if (token2.type == OP_CLOSE_BRACKET)
Packit 709fb3
		{
Packit 709fb3
		  /* We treat the last '-' as a normal character.  */
Packit 709fb3
		  re_string_skip_bytes (regexp, -token_len);
Packit 709fb3
		  token->type = CHARACTER;
Packit 709fb3
		}
Packit 709fb3
	      else
Packit 709fb3
		is_range_exp = true;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      if (is_range_exp == true)
Packit 709fb3
	{
Packit 709fb3
	  end_elem.opr.name = end_name_buf;
Packit 709fb3
	  end_elem.type = COLL_SYM;
Packit 709fb3
	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
Packit 709fb3
				       dfa, syntax, true);
Packit 709fb3
	  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
	    {
Packit 709fb3
	      *err = ret;
Packit 709fb3
	      goto parse_bracket_exp_free_return;
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  token_len = peek_token_bracket (token, regexp, syntax);
Packit 709fb3
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
Packit 709fb3
				  &start_elem, &end_elem);
Packit 709fb3
#else
Packit 709fb3
# ifdef RE_ENABLE_I18N
Packit 709fb3
	  *err = build_range_exp (syntax, sbcset,
Packit 709fb3
				  dfa->mb_cur_max > 1 ? mbcset : NULL,
Packit 709fb3
				  &range_alloc, &start_elem, &end_elem);
Packit 709fb3
# else
Packit 709fb3
	  *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
Packit 709fb3
# endif
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
	  if (BE (*err != REG_NOERROR, 0))
Packit 709fb3
	    goto parse_bracket_exp_free_return;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  switch (start_elem.type)
Packit 709fb3
	    {
Packit 709fb3
	    case SB_CHAR:
Packit 709fb3
	      bitset_set (sbcset, start_elem.opr.ch);
Packit 709fb3
	      break;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	    case MB_CHAR:
Packit 709fb3
	      /* Check whether the array has enough space.  */
Packit 709fb3
	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))
Packit 709fb3
		{
Packit 709fb3
		  wchar_t *new_mbchars;
Packit 709fb3
		  /* Not enough, realloc it.  */
Packit 709fb3
		  /* +1 in case of mbcset->nmbchars is 0.  */
Packit 709fb3
		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
Packit 709fb3
		  /* Use realloc since array is NULL if *alloc == 0.  */
Packit 709fb3
		  new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
Packit 709fb3
					    mbchar_alloc);
Packit 709fb3
		  if (BE (new_mbchars == NULL, 0))
Packit 709fb3
		    goto parse_bracket_exp_espace;
Packit 709fb3
		  mbcset->mbchars = new_mbchars;
Packit 709fb3
		}
Packit 709fb3
	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
Packit 709fb3
	      break;
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
	    case EQUIV_CLASS:
Packit 709fb3
	      *err = build_equiv_class (sbcset,
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
					mbcset, &equiv_class_alloc,
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
					start_elem.opr.name);
Packit 709fb3
	      if (BE (*err != REG_NOERROR, 0))
Packit 709fb3
		goto parse_bracket_exp_free_return;
Packit 709fb3
	      break;
Packit 709fb3
	    case COLL_SYM:
Packit 709fb3
	      *err = build_collating_symbol (sbcset,
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
					     mbcset, &coll_sym_alloc,
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
					     start_elem.opr.name);
Packit 709fb3
	      if (BE (*err != REG_NOERROR, 0))
Packit 709fb3
		goto parse_bracket_exp_free_return;
Packit 709fb3
	      break;
Packit 709fb3
	    case CHAR_CLASS:
Packit 709fb3
	      *err = build_charclass (regexp->trans, sbcset,
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
				      mbcset, &char_class_alloc,
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
				      (const char *) start_elem.opr.name,
Packit 709fb3
				      syntax);
Packit 709fb3
	      if (BE (*err != REG_NOERROR, 0))
Packit 709fb3
	       goto parse_bracket_exp_free_return;
Packit 709fb3
	      break;
Packit 709fb3
	    default:
Packit 709fb3
	      assert (0);
Packit 709fb3
	      break;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      if (BE (token->type == END_OF_RE, 0))
Packit 709fb3
	{
Packit 709fb3
	  *err = REG_EBRACK;
Packit 709fb3
	  goto parse_bracket_exp_free_return;
Packit 709fb3
	}
Packit 709fb3
      if (token->type == OP_CLOSE_BRACKET)
Packit 709fb3
	break;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
Packit 709fb3
Packit 709fb3
  /* If it is non-matching list.  */
Packit 709fb3
  if (non_match)
Packit 709fb3
    bitset_not (sbcset);
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  /* Ensure only single byte characters are set.  */
Packit 709fb3
  if (dfa->mb_cur_max > 1)
Packit 709fb3
    bitset_mask (sbcset, dfa->sb_char);
Packit 709fb3
Packit 709fb3
  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
Packit 709fb3
      || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
Packit 709fb3
						     || mbcset->non_match)))
Packit 709fb3
    {
Packit 709fb3
      bin_tree_t *mbc_tree;
Packit 709fb3
      int sbc_idx;
Packit 709fb3
      /* Build a tree for complex bracket.  */
Packit 709fb3
      dfa->has_mb_node = 1;
Packit 709fb3
      br_token.type = COMPLEX_BRACKET;
Packit 709fb3
      br_token.opr.mbcset = mbcset;
Packit 709fb3
      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit 709fb3
      if (BE (mbc_tree == NULL, 0))
Packit 709fb3
	goto parse_bracket_exp_espace;
Packit 709fb3
      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
Packit 709fb3
	if (sbcset[sbc_idx])
Packit 709fb3
	  break;
Packit 709fb3
      /* If there are no bits set in sbcset, there is no point
Packit 709fb3
	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
Packit 709fb3
      if (sbc_idx < BITSET_WORDS)
Packit 709fb3
	{
Packit 709fb3
	  /* Build a tree for simple bracket.  */
Packit 709fb3
	  br_token.type = SIMPLE_BRACKET;
Packit 709fb3
	  br_token.opr.sbcset = sbcset;
Packit 709fb3
	  work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit 709fb3
	  if (BE (work_tree == NULL, 0))
Packit 709fb3
	    goto parse_bracket_exp_espace;
Packit 709fb3
Packit 709fb3
	  /* Then join them by ALT node.  */
Packit 709fb3
	  work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
Packit 709fb3
	  if (BE (work_tree == NULL, 0))
Packit 709fb3
	    goto parse_bracket_exp_espace;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  re_free (sbcset);
Packit 709fb3
	  work_tree = mbc_tree;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
    {
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      free_charset (mbcset);
Packit 709fb3
#endif
Packit 709fb3
      /* Build a tree for simple bracket.  */
Packit 709fb3
      br_token.type = SIMPLE_BRACKET;
Packit 709fb3
      br_token.opr.sbcset = sbcset;
Packit 709fb3
      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit 709fb3
      if (BE (work_tree == NULL, 0))
Packit 709fb3
	goto parse_bracket_exp_espace;
Packit 709fb3
    }
Packit 709fb3
  return work_tree;
Packit 709fb3
Packit 709fb3
 parse_bracket_exp_espace:
Packit 709fb3
  *err = REG_ESPACE;
Packit 709fb3
 parse_bracket_exp_free_return:
Packit 709fb3
  re_free (sbcset);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  free_charset (mbcset);
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
  return NULL;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Parse an element in the bracket expression.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
Packit 709fb3
		       re_token_t *token, int token_len, re_dfa_t *dfa,
Packit 709fb3
		       reg_syntax_t syntax, bool accept_hyphen)
Packit 709fb3
{
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  int cur_char_size;
Packit 709fb3
  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
Packit 709fb3
  if (cur_char_size > 1)
Packit 709fb3
    {
Packit 709fb3
      elem->type = MB_CHAR;
Packit 709fb3
      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
Packit 709fb3
      re_string_skip_bytes (regexp, cur_char_size);
Packit 709fb3
      return REG_NOERROR;
Packit 709fb3
    }
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
Packit 709fb3
  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
Packit 709fb3
      || token->type == OP_OPEN_EQUIV_CLASS)
Packit 709fb3
    return parse_bracket_symbol (elem, regexp, token);
Packit 709fb3
  if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
Packit 709fb3
    {
Packit 709fb3
      /* A '-' must only appear as anything but a range indicator before
Packit 709fb3
	 the closing bracket.  Everything else is an error.  */
Packit 709fb3
      re_token_t token2;
Packit 709fb3
      (void) peek_token_bracket (&token2, regexp, syntax);
Packit 709fb3
      if (token2.type != OP_CLOSE_BRACKET)
Packit 709fb3
	/* The actual error value is not standardized since this whole
Packit 709fb3
	   case is undefined.  But ERANGE makes good sense.  */
Packit 709fb3
	return REG_ERANGE;
Packit 709fb3
    }
Packit 709fb3
  elem->type = SB_CHAR;
Packit 709fb3
  elem->opr.ch = token->opr.c;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
Packit 709fb3
   such as [:<character_class>:], [.<collating_element>.], and
Packit 709fb3
   [=<equivalent_class>=].  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
Packit 709fb3
		      re_token_t *token)
Packit 709fb3
{
Packit 709fb3
  unsigned char ch, delim = token->opr.c;
Packit 709fb3
  int i = 0;
Packit 709fb3
  if (re_string_eoi(regexp))
Packit 709fb3
    return REG_EBRACK;
Packit 709fb3
  for (;; ++i)
Packit 709fb3
    {
Packit 709fb3
      if (i >= BRACKET_NAME_BUF_SIZE)
Packit 709fb3
	return REG_EBRACK;
Packit 709fb3
      if (token->type == OP_OPEN_CHAR_CLASS)
Packit 709fb3
	ch = re_string_fetch_byte_case (regexp);
Packit 709fb3
      else
Packit 709fb3
	ch = re_string_fetch_byte (regexp);
Packit 709fb3
      if (re_string_eoi(regexp))
Packit 709fb3
	return REG_EBRACK;
Packit 709fb3
      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
Packit 709fb3
	break;
Packit 709fb3
      elem->opr.name[i] = ch;
Packit 709fb3
    }
Packit 709fb3
  re_string_skip_bytes (regexp, 1);
Packit 709fb3
  elem->opr.name[i] = '\0';
Packit 709fb3
  switch (token->type)
Packit 709fb3
    {
Packit 709fb3
    case OP_OPEN_COLL_ELEM:
Packit 709fb3
      elem->type = COLL_SYM;
Packit 709fb3
      break;
Packit 709fb3
    case OP_OPEN_EQUIV_CLASS:
Packit 709fb3
      elem->type = EQUIV_CLASS;
Packit 709fb3
      break;
Packit 709fb3
    case OP_OPEN_CHAR_CLASS:
Packit 709fb3
      elem->type = CHAR_CLASS;
Packit 709fb3
      break;
Packit 709fb3
    default:
Packit 709fb3
      break;
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
  /* Helper function for parse_bracket_exp.
Packit 709fb3
     Build the equivalence class which is represented by NAME.
Packit 709fb3
     The result are written to MBCSET and SBCSET.
Packit 709fb3
     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
Packit 709fb3
     is a pointer argument since we may update it.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
Packit 709fb3
		   Idx *equiv_class_alloc, const unsigned char *name)
Packit 709fb3
#else /* not RE_ENABLE_I18N */
Packit 709fb3
build_equiv_class (bitset_t sbcset, const unsigned char *name)
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
{
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
Packit 709fb3
  if (nrules != 0)
Packit 709fb3
    {
Packit 709fb3
      const int32_t *table, *indirect;
Packit 709fb3
      const unsigned char *weights, *extra, *cp;
Packit 709fb3
      unsigned char char_buf[2];
Packit 709fb3
      int32_t idx1, idx2;
Packit 709fb3
      unsigned int ch;
Packit 709fb3
      size_t len;
Packit 709fb3
      /* Calculate the index for equivalence class.  */
Packit 709fb3
      cp = name;
Packit 709fb3
      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
Packit 709fb3
      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
Packit 709fb3
					       _NL_COLLATE_WEIGHTMB);
Packit 709fb3
      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
Packit 709fb3
						   _NL_COLLATE_EXTRAMB);
Packit 709fb3
      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
Packit 709fb3
						_NL_COLLATE_INDIRECTMB);
Packit 709fb3
      idx1 = findidx (table, indirect, extra, &cp, -1);
Packit 709fb3
      if (BE (idx1 == 0 || *cp != '\0', 0))
Packit 709fb3
	/* This isn't a valid character.  */
Packit 709fb3
	return REG_ECOLLATE;
Packit 709fb3
Packit 709fb3
      /* Build single byte matching table for this equivalence class.  */
Packit 709fb3
      len = weights[idx1 & 0xffffff];
Packit 709fb3
      for (ch = 0; ch < SBC_MAX; ++ch)
Packit 709fb3
	{
Packit 709fb3
	  char_buf[0] = ch;
Packit 709fb3
	  cp = char_buf;
Packit 709fb3
	  idx2 = findidx (table, indirect, extra, &cp, 1);
Packit 709fb3
/*
Packit 709fb3
	  idx2 = table[ch];
Packit 709fb3
*/
Packit 709fb3
	  if (idx2 == 0)
Packit 709fb3
	    /* This isn't a valid character.  */
Packit 709fb3
	    continue;
Packit 709fb3
	  /* Compare only if the length matches and the collation rule
Packit 709fb3
	     index is the same.  */
Packit 709fb3
	  if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
Packit 709fb3
	    {
Packit 709fb3
	      int cnt = 0;
Packit 709fb3
Packit 709fb3
	      while (cnt <= len &&
Packit 709fb3
		     weights[(idx1 & 0xffffff) + 1 + cnt]
Packit 709fb3
		     == weights[(idx2 & 0xffffff) + 1 + cnt])
Packit 709fb3
		++cnt;
Packit 709fb3
Packit 709fb3
	      if (cnt > len)
Packit 709fb3
		bitset_set (sbcset, ch);
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      /* Check whether the array has enough space.  */
Packit 709fb3
      if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
Packit 709fb3
	{
Packit 709fb3
	  /* Not enough, realloc it.  */
Packit 709fb3
	  /* +1 in case of mbcset->nequiv_classes is 0.  */
Packit 709fb3
	  Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
Packit 709fb3
	  /* Use realloc since the array is NULL if *alloc == 0.  */
Packit 709fb3
	  int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
Packit 709fb3
						   int32_t,
Packit 709fb3
						   new_equiv_class_alloc);
Packit 709fb3
	  if (BE (new_equiv_classes == NULL, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	  mbcset->equiv_classes = new_equiv_classes;
Packit 709fb3
	  *equiv_class_alloc = new_equiv_class_alloc;
Packit 709fb3
	}
Packit 709fb3
      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
#endif /* _LIBC */
Packit 709fb3
    {
Packit 709fb3
      if (BE (strlen ((const char *) name) != 1, 0))
Packit 709fb3
	return REG_ECOLLATE;
Packit 709fb3
      bitset_set (sbcset, *name);
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
  /* Helper function for parse_bracket_exp.
Packit 709fb3
     Build the character class which is represented by NAME.
Packit 709fb3
     The result are written to MBCSET and SBCSET.
Packit 709fb3
     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
Packit 709fb3
     is a pointer argument since we may update it.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
Packit 709fb3
		 re_charset_t *mbcset, Idx *char_class_alloc,
Packit 709fb3
		 const char *class_name, reg_syntax_t syntax)
Packit 709fb3
#else /* not RE_ENABLE_I18N */
Packit 709fb3
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
Packit 709fb3
		 const char *class_name, reg_syntax_t syntax)
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
{
Packit 709fb3
  int i;
Packit 709fb3
  const char *name = class_name;
Packit 709fb3
Packit 709fb3
  /* In case of REG_ICASE "upper" and "lower" match the both of
Packit 709fb3
     upper and lower cases.  */
Packit 709fb3
  if ((syntax & RE_ICASE)
Packit 709fb3
      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
Packit 709fb3
    name = "alpha";
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  /* Check the space of the arrays.  */
Packit 709fb3
  if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
Packit 709fb3
    {
Packit 709fb3
      /* Not enough, realloc it.  */
Packit 709fb3
      /* +1 in case of mbcset->nchar_classes is 0.  */
Packit 709fb3
      Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
Packit 709fb3
      /* Use realloc since array is NULL if *alloc == 0.  */
Packit 709fb3
      wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
Packit 709fb3
					       new_char_class_alloc);
Packit 709fb3
      if (BE (new_char_classes == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      mbcset->char_classes = new_char_classes;
Packit 709fb3
      *char_class_alloc = new_char_class_alloc;
Packit 709fb3
    }
Packit 709fb3
  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
Packit 709fb3
#define BUILD_CHARCLASS_LOOP(ctype_func)	\
Packit 709fb3
  do {						\
Packit 709fb3
    if (BE (trans != NULL, 0))			\
Packit 709fb3
      {						\
Packit 709fb3
	for (i = 0; i < SBC_MAX; ++i)		\
Packit 709fb3
	  if (ctype_func (i))			\
Packit 709fb3
	    bitset_set (sbcset, trans[i]);	\
Packit 709fb3
      }						\
Packit 709fb3
    else					\
Packit 709fb3
      {						\
Packit 709fb3
	for (i = 0; i < SBC_MAX; ++i)		\
Packit 709fb3
	  if (ctype_func (i))			\
Packit 709fb3
	    bitset_set (sbcset, i);		\
Packit 709fb3
      }						\
Packit 709fb3
  } while (0)
Packit 709fb3
Packit 709fb3
  if (strcmp (name, "alnum") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isalnum);
Packit 709fb3
  else if (strcmp (name, "cntrl") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (iscntrl);
Packit 709fb3
  else if (strcmp (name, "lower") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (islower);
Packit 709fb3
  else if (strcmp (name, "space") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isspace);
Packit 709fb3
  else if (strcmp (name, "alpha") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isalpha);
Packit 709fb3
  else if (strcmp (name, "digit") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isdigit);
Packit 709fb3
  else if (strcmp (name, "print") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isprint);
Packit 709fb3
  else if (strcmp (name, "upper") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isupper);
Packit 709fb3
  else if (strcmp (name, "blank") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isblank);
Packit 709fb3
  else if (strcmp (name, "graph") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isgraph);
Packit 709fb3
  else if (strcmp (name, "punct") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (ispunct);
Packit 709fb3
  else if (strcmp (name, "xdigit") == 0)
Packit 709fb3
    BUILD_CHARCLASS_LOOP (isxdigit);
Packit 709fb3
  else
Packit 709fb3
    return REG_ECTYPE;
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
Packit 709fb3
		    const char *class_name,
Packit 709fb3
		    const char *extra, bool non_match,
Packit 709fb3
		    reg_errcode_t *err)
Packit 709fb3
{
Packit 709fb3
  re_bitset_ptr_t sbcset;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  re_charset_t *mbcset;
Packit 709fb3
  Idx alloc = 0;
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
  re_token_t br_token;
Packit 709fb3
  bin_tree_t *tree;
Packit 709fb3
Packit 709fb3
  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
Packit 709fb3
  if (BE (sbcset == NULL, 0))
Packit 709fb3
    {
Packit 709fb3
      *err = REG_ESPACE;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
Packit 709fb3
  if (BE (mbcset == NULL, 0))
Packit 709fb3
    {
Packit 709fb3
      re_free (sbcset);
Packit 709fb3
      *err = REG_ESPACE;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  mbcset->non_match = non_match;
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
Packit 709fb3
  /* We don't care the syntax in this case.  */
Packit 709fb3
  ret = build_charclass (trans, sbcset,
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
			 mbcset, &alloc,
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
			 class_name, 0);
Packit 709fb3
Packit 709fb3
  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      re_free (sbcset);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      free_charset (mbcset);
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
      *err = ret;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  /* \w match '_' also.  */
Packit 709fb3
  for (; *extra; extra++)
Packit 709fb3
    bitset_set (sbcset, *extra);
Packit 709fb3
Packit 709fb3
  /* If it is non-matching list.  */
Packit 709fb3
  if (non_match)
Packit 709fb3
    bitset_not (sbcset);
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  /* Ensure only single byte characters are set.  */
Packit 709fb3
  if (dfa->mb_cur_max > 1)
Packit 709fb3
    bitset_mask (sbcset, dfa->sb_char);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  /* Build a tree for simple bracket.  */
Packit 709fb3
#if defined GCC_LINT || defined lint
Packit 709fb3
  memset (&br_token, 0, sizeof br_token);
Packit 709fb3
#endif
Packit 709fb3
  br_token.type = SIMPLE_BRACKET;
Packit 709fb3
  br_token.opr.sbcset = sbcset;
Packit 709fb3
  tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit 709fb3
  if (BE (tree == NULL, 0))
Packit 709fb3
    goto build_word_op_espace;
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (dfa->mb_cur_max > 1)
Packit 709fb3
    {
Packit 709fb3
      bin_tree_t *mbc_tree;
Packit 709fb3
      /* Build a tree for complex bracket.  */
Packit 709fb3
      br_token.type = COMPLEX_BRACKET;
Packit 709fb3
      br_token.opr.mbcset = mbcset;
Packit 709fb3
      dfa->has_mb_node = 1;
Packit 709fb3
      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
Packit 709fb3
      if (BE (mbc_tree == NULL, 0))
Packit 709fb3
	goto build_word_op_espace;
Packit 709fb3
      /* Then join them by ALT node.  */
Packit 709fb3
      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
Packit 709fb3
      if (BE (mbc_tree != NULL, 1))
Packit 709fb3
	return tree;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      free_charset (mbcset);
Packit 709fb3
      return tree;
Packit 709fb3
    }
Packit 709fb3
#else /* not RE_ENABLE_I18N */
Packit 709fb3
  return tree;
Packit 709fb3
#endif /* not RE_ENABLE_I18N */
Packit 709fb3
Packit 709fb3
 build_word_op_espace:
Packit 709fb3
  re_free (sbcset);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  free_charset (mbcset);
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
  *err = REG_ESPACE;
Packit 709fb3
  return NULL;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This is intended for the expressions like "a{1,3}".
Packit 709fb3
   Fetch a number from 'input', and return the number.
Packit 709fb3
   Return -1 if the number field is empty like "{,1}".
Packit 709fb3
   Return RE_DUP_MAX + 1 if the number field is too large.
Packit 709fb3
   Return -2 if an error occurred.  */
Packit 709fb3
Packit 709fb3
static Idx
Packit 709fb3
fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
Packit 709fb3
{
Packit 709fb3
  Idx num = -1;
Packit 709fb3
  unsigned char c;
Packit 709fb3
  while (1)
Packit 709fb3
    {
Packit 709fb3
      fetch_token (token, input, syntax);
Packit 709fb3
      c = token->opr.c;
Packit 709fb3
      if (BE (token->type == END_OF_RE, 0))
Packit 709fb3
	return -2;
Packit 709fb3
      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
Packit 709fb3
	break;
Packit 709fb3
      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
Packit 709fb3
	     ? -2
Packit 709fb3
	     : num == -1
Packit 709fb3
	     ? c - '0'
Packit 709fb3
	     : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
Packit 709fb3
    }
Packit 709fb3
  return num;
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
static void
Packit 709fb3
free_charset (re_charset_t *cset)
Packit 709fb3
{
Packit 709fb3
  re_free (cset->mbchars);
Packit 709fb3
# ifdef _LIBC
Packit 709fb3
  re_free (cset->coll_syms);
Packit 709fb3
  re_free (cset->equiv_classes);
Packit 709fb3
  re_free (cset->range_starts);
Packit 709fb3
  re_free (cset->range_ends);
Packit 709fb3
# endif
Packit 709fb3
  re_free (cset->char_classes);
Packit 709fb3
  re_free (cset);
Packit 709fb3
}
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3

Packit 709fb3
/* Functions for binary tree operation.  */
Packit 709fb3
Packit 709fb3
/* Create a tree node.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
Packit 709fb3
	     re_token_type_t type)
Packit 709fb3
{
Packit 709fb3
  re_token_t t;
Packit 709fb3
#if defined GCC_LINT || defined lint
Packit 709fb3
  memset (&t, 0, sizeof t);
Packit 709fb3
#endif
Packit 709fb3
  t.type = type;
Packit 709fb3
  return create_token_tree (dfa, left, right, &t);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
Packit 709fb3
		   const re_token_t *token)
Packit 709fb3
{
Packit 709fb3
  bin_tree_t *tree;
Packit 709fb3
  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
Packit 709fb3
    {
Packit 709fb3
      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
Packit 709fb3
Packit 709fb3
      if (storage == NULL)
Packit 709fb3
	return NULL;
Packit 709fb3
      storage->next = dfa->str_tree_storage;
Packit 709fb3
      dfa->str_tree_storage = storage;
Packit 709fb3
      dfa->str_tree_storage_idx = 0;
Packit 709fb3
    }
Packit 709fb3
  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
Packit 709fb3
Packit 709fb3
  tree->parent = NULL;
Packit 709fb3
  tree->left = left;
Packit 709fb3
  tree->right = right;
Packit 709fb3
  tree->token = *token;
Packit 709fb3
  tree->token.duplicated = 0;
Packit 709fb3
  tree->token.opt_subexp = 0;
Packit 709fb3
  tree->first = NULL;
Packit 709fb3
  tree->next = NULL;
Packit 709fb3
  tree->node_idx = -1;
Packit 709fb3
Packit 709fb3
  if (left != NULL)
Packit 709fb3
    left->parent = tree;
Packit 709fb3
  if (right != NULL)
Packit 709fb3
    right->parent = tree;
Packit 709fb3
  return tree;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Mark the tree SRC as an optional subexpression.
Packit 709fb3
   To be called from preorder or postorder.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
mark_opt_subexp (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  Idx idx = (uintptr_t) extra;
Packit 709fb3
  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
Packit 709fb3
    node->token.opt_subexp = 1;
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Free the allocated memory inside NODE. */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
free_token (re_token_t *node)
Packit 709fb3
{
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
Packit 709fb3
    free_charset (node->opr.mbcset);
Packit 709fb3
  else
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
Packit 709fb3
      re_free (node->opr.sbcset);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Worker function for tree walking.  Free the allocated memory inside NODE
Packit 709fb3
   and its children. */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
free_tree (void *extra, bin_tree_t *node)
Packit 709fb3
{
Packit 709fb3
  free_token (&node->token);
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
Packit 709fb3
/* Duplicate the node SRC, and return new node.  This is a preorder
Packit 709fb3
   visit similar to the one implemented by the generic visitor, but
Packit 709fb3
   we need more infrastructure to maintain two parallel trees --- so,
Packit 709fb3
   it's easier to duplicate.  */
Packit 709fb3
Packit 709fb3
static bin_tree_t *
Packit 709fb3
duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  const bin_tree_t *node;
Packit 709fb3
  bin_tree_t *dup_root;
Packit 709fb3
  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
Packit 709fb3
Packit 709fb3
  for (node = root; ; )
Packit 709fb3
    {
Packit 709fb3
      /* Create a new tree and link it back to the current parent.  */
Packit 709fb3
      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
Packit 709fb3
      if (*p_new == NULL)
Packit 709fb3
	return NULL;
Packit 709fb3
      (*p_new)->parent = dup_node;
Packit 709fb3
      (*p_new)->token.duplicated = 1;
Packit 709fb3
      dup_node = *p_new;
Packit 709fb3
Packit 709fb3
      /* Go to the left node, or up and to the right.  */
Packit 709fb3
      if (node->left)
Packit 709fb3
	{
Packit 709fb3
	  node = node->left;
Packit 709fb3
	  p_new = &dup_node->left;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  const bin_tree_t *prev = NULL;
Packit 709fb3
	  while (node->right == prev || node->right == NULL)
Packit 709fb3
	    {
Packit 709fb3
	      prev = node;
Packit 709fb3
	      node = node->parent;
Packit 709fb3
	      dup_node = dup_node->parent;
Packit 709fb3
	      if (!node)
Packit 709fb3
		return dup_root;
Packit 709fb3
	    }
Packit 709fb3
	  node = node->right;
Packit 709fb3
	  p_new = &dup_node->right;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
}