Blame posix/regex_internal.c

Packit 6c4009
/* Extended regular expression matching and search library.
Packit 6c4009
   Copyright (C) 2002-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <https://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
static void re_string_construct_common (const char *str, Idx len,
Packit 6c4009
					re_string_t *pstr,
Packit 6c4009
					RE_TRANSLATE_TYPE trans, bool icase,
Packit 6c4009
					const re_dfa_t *dfa);
Packit 6c4009
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
Packit 6c4009
					  const re_node_set *nodes,
Packit 6c4009
					  re_hashval_t hash);
Packit 6c4009
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
Packit 6c4009
					  const re_node_set *nodes,
Packit 6c4009
					  unsigned int context,
Packit 6c4009
					  re_hashval_t hash);
Packit 6c4009
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
Packit 6c4009
						Idx new_buf_len);
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
static void build_wcs_buffer (re_string_t *pstr);
Packit 6c4009
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
static void build_upper_buffer (re_string_t *pstr);
Packit 6c4009
static void re_string_translate_buffer (re_string_t *pstr);
Packit 6c4009
static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
Packit 6c4009
					  int eflags) __attribute__ ((pure));
Packit 6c4009

Packit 6c4009
/* Functions for string operation.  */
Packit 6c4009
Packit 6c4009
/* This function allocate the buffers.  It is necessary to call
Packit 6c4009
   re_string_reconstruct before using the object.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
Packit 6c4009
		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit 6c4009
{
Packit 6c4009
  reg_errcode_t ret;
Packit 6c4009
  Idx init_buf_len;
Packit 6c4009
Packit 6c4009
  /* Ensure at least one character fits into the buffers.  */
Packit 6c4009
  if (init_len < dfa->mb_cur_max)
Packit 6c4009
    init_len = dfa->mb_cur_max;
Packit 6c4009
  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
Packit 6c4009
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit 6c4009
Packit 6c4009
  ret = re_string_realloc_buffers (pstr, init_buf_len);
Packit 6c4009
  if (BE (ret != REG_NOERROR, 0))
Packit 6c4009
    return ret;
Packit 6c4009
Packit 6c4009
  pstr->word_char = dfa->word_char;
Packit 6c4009
  pstr->word_ops_used = dfa->word_ops_used;
Packit 6c4009
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit 6c4009
  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
Packit 6c4009
  pstr->valid_raw_len = pstr->valid_len;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* This function allocate the buffers, and initialize them.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_string_construct (re_string_t *pstr, const char *str, Idx len,
Packit 6c4009
		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit 6c4009
{
Packit 6c4009
  reg_errcode_t ret;
Packit 6c4009
  memset (pstr, '\0', sizeof (re_string_t));
Packit 6c4009
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit 6c4009
Packit 6c4009
  if (len > 0)
Packit 6c4009
    {
Packit 6c4009
      ret = re_string_realloc_buffers (pstr, len + 1);
Packit 6c4009
      if (BE (ret != REG_NOERROR, 0))
Packit 6c4009
	return ret;
Packit 6c4009
    }
Packit 6c4009
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit 6c4009
Packit 6c4009
  if (icase)
Packit 6c4009
    {
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
      if (dfa->mb_cur_max > 1)
Packit 6c4009
	{
Packit 6c4009
	  while (1)
Packit 6c4009
	    {
Packit 6c4009
	      ret = build_wcs_upper_buffer (pstr);
Packit 6c4009
	      if (BE (ret != REG_NOERROR, 0))
Packit 6c4009
		return ret;
Packit 6c4009
	      if (pstr->valid_raw_len >= len)
Packit 6c4009
		break;
Packit 6c4009
	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
Packit 6c4009
		break;
Packit 6c4009
	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
Packit 6c4009
	      if (BE (ret != REG_NOERROR, 0))
Packit 6c4009
		return ret;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
#endif /* RE_ENABLE_I18N  */
Packit 6c4009
	build_upper_buffer (pstr);
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
      if (dfa->mb_cur_max > 1)
Packit 6c4009
	build_wcs_buffer (pstr);
Packit 6c4009
      else
Packit 6c4009
#endif /* RE_ENABLE_I18N  */
Packit 6c4009
	{
Packit 6c4009
	  if (trans != NULL)
Packit 6c4009
	    re_string_translate_buffer (pstr);
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      pstr->valid_len = pstr->bufs_len;
Packit 6c4009
	      pstr->valid_raw_len = pstr->bufs_len;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Helper functions for re_string_allocate, and re_string_construct.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
Packit 6c4009
{
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  if (pstr->mb_cur_max > 1)
Packit 6c4009
    {
Packit 6c4009
      wint_t *new_wcs;
Packit 6c4009
Packit 6c4009
      /* Avoid overflow in realloc.  */
Packit 6c4009
      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
Packit 6c4009
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
Packit 6c4009
      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
Packit 6c4009
      if (BE (new_wcs == NULL, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
      pstr->wcs = new_wcs;
Packit 6c4009
      if (pstr->offsets != NULL)
Packit 6c4009
	{
Packit 6c4009
	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
Packit 6c4009
	  if (BE (new_offsets == NULL, 0))
Packit 6c4009
	    return REG_ESPACE;
Packit 6c4009
	  pstr->offsets = new_offsets;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
#endif /* RE_ENABLE_I18N  */
Packit 6c4009
  if (pstr->mbs_allocated)
Packit 6c4009
    {
Packit 6c4009
      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
Packit 6c4009
					   new_buf_len);
Packit 6c4009
      if (BE (new_mbs == NULL, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
      pstr->mbs = new_mbs;
Packit 6c4009
    }
Packit 6c4009
  pstr->bufs_len = new_buf_len;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
Packit 6c4009
			    RE_TRANSLATE_TYPE trans, bool icase,
Packit 6c4009
			    const re_dfa_t *dfa)
Packit 6c4009
{
Packit 6c4009
  pstr->raw_mbs = (const unsigned char *) str;
Packit 6c4009
  pstr->len = len;
Packit 6c4009
  pstr->raw_len = len;
Packit 6c4009
  pstr->trans = trans;
Packit 6c4009
  pstr->icase = icase;
Packit 6c4009
  pstr->mbs_allocated = (trans != NULL || icase);
Packit 6c4009
  pstr->mb_cur_max = dfa->mb_cur_max;
Packit 6c4009
  pstr->is_utf8 = dfa->is_utf8;
Packit 6c4009
  pstr->map_notascii = dfa->map_notascii;
Packit 6c4009
  pstr->stop = pstr->len;
Packit 6c4009
  pstr->raw_stop = pstr->stop;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
Packit 6c4009
/* Build wide character buffer PSTR->WCS.
Packit 6c4009
   If the byte sequence of the string are:
Packit 6c4009
     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Packit 6c4009
   Then wide character buffer will be:
Packit 6c4009
     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
Packit 6c4009
   We use WEOF for padding, they indicate that the position isn't
Packit 6c4009
   a first byte of a multibyte character.
Packit 6c4009
Packit 6c4009
   Note that this function assumes PSTR->VALID_LEN elements are already
Packit 6c4009
   built and starts from PSTR->VALID_LEN.  */
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
build_wcs_buffer (re_string_t *pstr)
Packit 6c4009
{
Packit 6c4009
#ifdef _LIBC
Packit 6c4009
  unsigned char buf[MB_LEN_MAX];
Packit 6c4009
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit 6c4009
#else
Packit 6c4009
  unsigned char buf[64];
Packit 6c4009
#endif
Packit 6c4009
  mbstate_t prev_st;
Packit 6c4009
  Idx byte_idx, end_idx, remain_len;
Packit 6c4009
  size_t mbclen;
Packit 6c4009
Packit 6c4009
  /* Build the buffers from pstr->valid_len to either pstr->len or
Packit 6c4009
     pstr->bufs_len.  */
Packit 6c4009
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 6c4009
  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
Packit 6c4009
    {
Packit 6c4009
      wchar_t wc;
Packit 6c4009
      const char *p;
Packit 6c4009
Packit 6c4009
      remain_len = end_idx - byte_idx;
Packit 6c4009
      prev_st = pstr->cur_state;
Packit 6c4009
      /* Apply the translation if we need.  */
Packit 6c4009
      if (BE (pstr->trans != NULL, 0))
Packit 6c4009
	{
Packit 6c4009
	  int i, ch;
Packit 6c4009
Packit 6c4009
	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit 6c4009
	    {
Packit 6c4009
	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
Packit 6c4009
	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
Packit 6c4009
	    }
Packit 6c4009
	  p = (const char *) buf;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
Packit 6c4009
      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit 6c4009
      if (BE (mbclen == (size_t) -1 || mbclen == 0
Packit 6c4009
	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
Packit 6c4009
	{
Packit 6c4009
	  /* We treat these cases as a singlebyte character.  */
Packit 6c4009
	  mbclen = 1;
Packit 6c4009
	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit 6c4009
	  if (BE (pstr->trans != NULL, 0))
Packit 6c4009
	    wc = pstr->trans[wc];
Packit 6c4009
	  pstr->cur_state = prev_st;
Packit 6c4009
	}
Packit 6c4009
      else if (BE (mbclen == (size_t) -2, 0))
Packit 6c4009
	{
Packit 6c4009
	  /* The buffer doesn't have enough space, finish to build.  */
Packit 6c4009
	  pstr->cur_state = prev_st;
Packit 6c4009
	  break;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Write wide character and padding.  */
Packit 6c4009
      pstr->wcs[byte_idx++] = wc;
Packit 6c4009
      /* Write paddings.  */
Packit 6c4009
      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 6c4009
	pstr->wcs[byte_idx++] = WEOF;
Packit 6c4009
    }
Packit 6c4009
  pstr->valid_len = byte_idx;
Packit 6c4009
  pstr->valid_raw_len = byte_idx;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
Packit 6c4009
   but for REG_ICASE.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
build_wcs_upper_buffer (re_string_t *pstr)
Packit 6c4009
{
Packit 6c4009
  mbstate_t prev_st;
Packit 6c4009
  Idx src_idx, byte_idx, end_idx, remain_len;
Packit 6c4009
  size_t mbclen;
Packit 6c4009
#ifdef _LIBC
Packit 6c4009
  char buf[MB_LEN_MAX];
Packit 6c4009
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit 6c4009
#else
Packit 6c4009
  char buf[64];
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  byte_idx = pstr->valid_len;
Packit 6c4009
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 6c4009
Packit 6c4009
  /* The following optimization assumes that ASCII characters can be
Packit 6c4009
     mapped to wide characters with a simple cast.  */
Packit 6c4009
  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
Packit 6c4009
    {
Packit 6c4009
      while (byte_idx < end_idx)
Packit 6c4009
	{
Packit 6c4009
	  wchar_t wc;
Packit 6c4009
Packit 6c4009
	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
Packit 6c4009
	      && mbsinit (&pstr->cur_state))
Packit 6c4009
	    {
Packit 6c4009
	      /* In case of a singlebyte character.  */
Packit 6c4009
	      pstr->mbs[byte_idx]
Packit 6c4009
		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
Packit 6c4009
	      /* The next step uses the assumption that wchar_t is encoded
Packit 6c4009
		 ASCII-safe: all ASCII values can be converted like this.  */
Packit 6c4009
	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
Packit 6c4009
	      ++byte_idx;
Packit 6c4009
	      continue;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  remain_len = end_idx - byte_idx;
Packit 6c4009
	  prev_st = pstr->cur_state;
Packit 6c4009
	  mbclen = __mbrtowc (&wc,
Packit 6c4009
			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
Packit 6c4009
			       + byte_idx), remain_len, &pstr->cur_state);
Packit Service d59610
	  if (BE (0 < mbclen && mbclen < (size_t) -2, 1))
Packit 6c4009
	    {
Packit 6c4009
	      wchar_t wcu = __towupper (wc);
Packit 6c4009
	      if (wcu != wc)
Packit 6c4009
		{
Packit 6c4009
		  size_t mbcdlen;
Packit 6c4009
Packit 6c4009
		  mbcdlen = __wcrtomb (buf, wcu, &prev_st);
Packit 6c4009
		  if (BE (mbclen == mbcdlen, 1))
Packit 6c4009
		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit 6c4009
		  else
Packit 6c4009
		    {
Packit 6c4009
		      src_idx = byte_idx;
Packit 6c4009
		      goto offsets_needed;
Packit 6c4009
		    }
Packit 6c4009
		}
Packit 6c4009
	      else
Packit 6c4009
		memcpy (pstr->mbs + byte_idx,
Packit 6c4009
			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
Packit 6c4009
	      pstr->wcs[byte_idx++] = wcu;
Packit 6c4009
	      /* Write paddings.  */
Packit 6c4009
	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 6c4009
		pstr->wcs[byte_idx++] = WEOF;
Packit 6c4009
	    }
Packit 6c4009
	  else if (mbclen == (size_t) -1 || mbclen == 0
Packit 6c4009
		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit 6c4009
	    {
Packit 6c4009
	      /* It is an invalid character, an incomplete character
Packit 6c4009
		 at the end of the string, or '\0'.  Just use the byte.  */
Packit 6c4009
	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit 6c4009
	      pstr->mbs[byte_idx] = ch;
Packit 6c4009
	      /* And also cast it to wide char.  */
Packit 6c4009
	      pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit 6c4009
	      if (BE (mbclen == (size_t) -1, 0))
Packit 6c4009
		pstr->cur_state = prev_st;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* The buffer doesn't have enough space, finish to build.  */
Packit 6c4009
	      pstr->cur_state = prev_st;
Packit 6c4009
	      break;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      pstr->valid_len = byte_idx;
Packit 6c4009
      pstr->valid_raw_len = byte_idx;
Packit 6c4009
      return REG_NOERROR;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
Packit 6c4009
      {
Packit 6c4009
	wchar_t wc;
Packit 6c4009
	const char *p;
Packit 6c4009
      offsets_needed:
Packit 6c4009
	remain_len = end_idx - byte_idx;
Packit 6c4009
	prev_st = pstr->cur_state;
Packit 6c4009
	if (BE (pstr->trans != NULL, 0))
Packit 6c4009
	  {
Packit 6c4009
	    int i, ch;
Packit 6c4009
Packit 6c4009
	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit 6c4009
	      {
Packit 6c4009
		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
Packit 6c4009
		buf[i] = pstr->trans[ch];
Packit 6c4009
	      }
Packit 6c4009
	    p = (const char *) buf;
Packit 6c4009
	  }
Packit 6c4009
	else
Packit 6c4009
	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
Packit 6c4009
	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit Service d59610
	if (BE (0 < mbclen && mbclen < (size_t) -2, 1))
Packit 6c4009
	  {
Packit 6c4009
	    wchar_t wcu = __towupper (wc);
Packit 6c4009
	    if (wcu != wc)
Packit 6c4009
	      {
Packit 6c4009
		size_t mbcdlen;
Packit 6c4009
Packit 6c4009
		mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st);
Packit 6c4009
		if (BE (mbclen == mbcdlen, 1))
Packit 6c4009
		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit 6c4009
		else if (mbcdlen != (size_t) -1)
Packit 6c4009
		  {
Packit 6c4009
		    size_t i;
Packit 6c4009
Packit 6c4009
		    if (byte_idx + mbcdlen > pstr->bufs_len)
Packit 6c4009
		      {
Packit 6c4009
			pstr->cur_state = prev_st;
Packit 6c4009
			break;
Packit 6c4009
		      }
Packit 6c4009
Packit 6c4009
		    if (pstr->offsets == NULL)
Packit 6c4009
		      {
Packit 6c4009
			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
Packit 6c4009
Packit 6c4009
			if (pstr->offsets == NULL)
Packit 6c4009
			  return REG_ESPACE;
Packit 6c4009
		      }
Packit 6c4009
		    if (!pstr->offsets_needed)
Packit 6c4009
		      {
Packit 6c4009
			for (i = 0; i < (size_t) byte_idx; ++i)
Packit 6c4009
			  pstr->offsets[i] = i;
Packit 6c4009
			pstr->offsets_needed = 1;
Packit 6c4009
		      }
Packit 6c4009
Packit 6c4009
		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
Packit 6c4009
		    pstr->wcs[byte_idx] = wcu;
Packit 6c4009
		    pstr->offsets[byte_idx] = src_idx;
Packit 6c4009
		    for (i = 1; i < mbcdlen; ++i)
Packit 6c4009
		      {
Packit 6c4009
			pstr->offsets[byte_idx + i]
Packit 6c4009
			  = src_idx + (i < mbclen ? i : mbclen - 1);
Packit 6c4009
			pstr->wcs[byte_idx + i] = WEOF;
Packit 6c4009
		      }
Packit 6c4009
		    pstr->len += mbcdlen - mbclen;
Packit 6c4009
		    if (pstr->raw_stop > src_idx)
Packit 6c4009
		      pstr->stop += mbcdlen - mbclen;
Packit 6c4009
		    end_idx = (pstr->bufs_len > pstr->len)
Packit 6c4009
			      ? pstr->len : pstr->bufs_len;
Packit 6c4009
		    byte_idx += mbcdlen;
Packit 6c4009
		    src_idx += mbclen;
Packit 6c4009
		    continue;
Packit 6c4009
		  }
Packit 6c4009
		else
Packit 6c4009
		  memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit 6c4009
	      }
Packit 6c4009
	    else
Packit 6c4009
	      memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit 6c4009
Packit 6c4009
	    if (BE (pstr->offsets_needed != 0, 0))
Packit 6c4009
	      {
Packit 6c4009
		size_t i;
Packit 6c4009
		for (i = 0; i < mbclen; ++i)
Packit 6c4009
		  pstr->offsets[byte_idx + i] = src_idx + i;
Packit 6c4009
	      }
Packit 6c4009
	    src_idx += mbclen;
Packit 6c4009
Packit 6c4009
	    pstr->wcs[byte_idx++] = wcu;
Packit 6c4009
	    /* Write paddings.  */
Packit 6c4009
	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 6c4009
	      pstr->wcs[byte_idx++] = WEOF;
Packit 6c4009
	  }
Packit 6c4009
	else if (mbclen == (size_t) -1 || mbclen == 0
Packit 6c4009
		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit 6c4009
	  {
Packit 6c4009
	    /* It is an invalid character or '\0'.  Just use the byte.  */
Packit 6c4009
	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
Packit 6c4009
Packit 6c4009
	    if (BE (pstr->trans != NULL, 0))
Packit 6c4009
	      ch = pstr->trans [ch];
Packit 6c4009
	    pstr->mbs[byte_idx] = ch;
Packit 6c4009
Packit 6c4009
	    if (BE (pstr->offsets_needed != 0, 0))
Packit 6c4009
	      pstr->offsets[byte_idx] = src_idx;
Packit 6c4009
	    ++src_idx;
Packit 6c4009
Packit 6c4009
	    /* And also cast it to wide char.  */
Packit 6c4009
	    pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit 6c4009
	    if (BE (mbclen == (size_t) -1, 0))
Packit 6c4009
	      pstr->cur_state = prev_st;
Packit 6c4009
	  }
Packit 6c4009
	else
Packit 6c4009
	  {
Packit 6c4009
	    /* The buffer doesn't have enough space, finish to build.  */
Packit 6c4009
	    pstr->cur_state = prev_st;
Packit 6c4009
	    break;
Packit 6c4009
	  }
Packit 6c4009
      }
Packit 6c4009
  pstr->valid_len = byte_idx;
Packit 6c4009
  pstr->valid_raw_len = src_idx;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
Packit 6c4009
   Return the index.  */
Packit 6c4009
Packit 6c4009
static Idx
Packit 6c4009
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
Packit 6c4009
{
Packit 6c4009
  mbstate_t prev_st;
Packit 6c4009
  Idx rawbuf_idx;
Packit 6c4009
  size_t mbclen;
Packit 6c4009
  wint_t wc = WEOF;
Packit 6c4009
Packit 6c4009
  /* Skip the characters which are not necessary to check.  */
Packit 6c4009
  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
Packit 6c4009
       rawbuf_idx < new_raw_idx;)
Packit 6c4009
    {
Packit 6c4009
      wchar_t wc2;
Packit 6c4009
      Idx remain_len = pstr->raw_len - rawbuf_idx;
Packit 6c4009
      prev_st = pstr->cur_state;
Packit 6c4009
      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
Packit 6c4009
			  remain_len, &pstr->cur_state);
Packit 6c4009
      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
Packit 6c4009
	{
Packit 6c4009
	  /* We treat these cases as a single byte character.  */
Packit 6c4009
	  if (mbclen == 0 || remain_len == 0)
Packit 6c4009
	    wc = L'\0';
Packit 6c4009
	  else
Packit 6c4009
	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
Packit 6c4009
	  mbclen = 1;
Packit 6c4009
	  pstr->cur_state = prev_st;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	wc = wc2;
Packit 6c4009
      /* Then proceed the next character.  */
Packit 6c4009
      rawbuf_idx += mbclen;
Packit 6c4009
    }
Packit 6c4009
  *last_wc = wc;
Packit 6c4009
  return rawbuf_idx;
Packit 6c4009
}
Packit 6c4009
#endif /* RE_ENABLE_I18N  */
Packit 6c4009
Packit 6c4009
/* Build the buffer PSTR->MBS, and apply the translation if we need.
Packit 6c4009
   This function is used in case of REG_ICASE.  */
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
build_upper_buffer (re_string_t *pstr)
Packit 6c4009
{
Packit 6c4009
  Idx char_idx, end_idx;
Packit 6c4009
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 6c4009
Packit 6c4009
  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
Packit 6c4009
    {
Packit 6c4009
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
Packit 6c4009
      if (BE (pstr->trans != NULL, 0))
Packit 6c4009
	ch = pstr->trans[ch];
Packit 6c4009
      pstr->mbs[char_idx] = toupper (ch);
Packit 6c4009
    }
Packit 6c4009
  pstr->valid_len = char_idx;
Packit 6c4009
  pstr->valid_raw_len = char_idx;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Apply TRANS to the buffer in PSTR.  */
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
re_string_translate_buffer (re_string_t *pstr)
Packit 6c4009
{
Packit 6c4009
  Idx buf_idx, end_idx;
Packit 6c4009
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 6c4009
Packit 6c4009
  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
Packit 6c4009
    {
Packit 6c4009
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
Packit 6c4009
      pstr->mbs[buf_idx] = pstr->trans[ch];
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  pstr->valid_len = buf_idx;
Packit 6c4009
  pstr->valid_raw_len = buf_idx;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* This function re-construct the buffers.
Packit 6c4009
   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
Packit 6c4009
   convert to upper case in case of REG_ICASE, apply translation.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
Packit 6c4009
{
Packit 6c4009
  Idx offset;
Packit 6c4009
Packit 6c4009
  if (BE (pstr->raw_mbs_idx <= idx, 0))
Packit 6c4009
    offset = idx - pstr->raw_mbs_idx;
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      /* Reset buffer.  */
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
      if (pstr->mb_cur_max > 1)
Packit 6c4009
	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
      pstr->len = pstr->raw_len;
Packit 6c4009
      pstr->stop = pstr->raw_stop;
Packit 6c4009
      pstr->valid_len = 0;
Packit 6c4009
      pstr->raw_mbs_idx = 0;
Packit 6c4009
      pstr->valid_raw_len = 0;
Packit 6c4009
      pstr->offsets_needed = 0;
Packit 6c4009
      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
Packit 6c4009
			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
Packit 6c4009
      if (!pstr->mbs_allocated)
Packit 6c4009
	pstr->mbs = (unsigned char *) pstr->raw_mbs;
Packit 6c4009
      offset = idx;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (BE (offset != 0, 1))
Packit 6c4009
    {
Packit 6c4009
      /* Should the already checked characters be kept?  */
Packit 6c4009
      if (BE (offset < pstr->valid_raw_len, 1))
Packit 6c4009
	{
Packit 6c4009
	  /* Yes, move them to the front of the buffer.  */
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
	  if (BE (pstr->offsets_needed, 0))
Packit 6c4009
	    {
Packit 6c4009
	      Idx low = 0, high = pstr->valid_len, mid;
Packit 6c4009
	      do
Packit 6c4009
		{
Packit 6c4009
		  mid = (high + low) / 2;
Packit 6c4009
		  if (pstr->offsets[mid] > offset)
Packit 6c4009
		    high = mid;
Packit 6c4009
		  else if (pstr->offsets[mid] < offset)
Packit 6c4009
		    low = mid + 1;
Packit 6c4009
		  else
Packit 6c4009
		    break;
Packit 6c4009
		}
Packit 6c4009
	      while (low < high);
Packit 6c4009
	      if (pstr->offsets[mid] < offset)
Packit 6c4009
		++mid;
Packit 6c4009
	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
Packit 6c4009
							eflags);
Packit 6c4009
	      /* This can be quite complicated, so handle specially
Packit 6c4009
		 only the common and easy case where the character with
Packit 6c4009
		 different length representation of lower and upper
Packit 6c4009
		 case is present at or after offset.  */
Packit 6c4009
	      if (pstr->valid_len > offset
Packit 6c4009
		  && mid == offset && pstr->offsets[mid] == offset)
Packit 6c4009
		{
Packit 6c4009
		  memmove (pstr->wcs, pstr->wcs + offset,
Packit 6c4009
			   (pstr->valid_len - offset) * sizeof (wint_t));
Packit 6c4009
		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
Packit 6c4009
		  pstr->valid_len -= offset;
Packit 6c4009
		  pstr->valid_raw_len -= offset;
Packit 6c4009
		  for (low = 0; low < pstr->valid_len; low++)
Packit 6c4009
		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
Packit 6c4009
		}
Packit 6c4009
	      else
Packit 6c4009
		{
Packit 6c4009
		  /* Otherwise, just find out how long the partial multibyte
Packit 6c4009
		     character at offset is and fill it with WEOF/255.  */
Packit 6c4009
		  pstr->len = pstr->raw_len - idx + offset;
Packit 6c4009
		  pstr->stop = pstr->raw_stop - idx + offset;
Packit 6c4009
		  pstr->offsets_needed = 0;
Packit 6c4009
		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
Packit 6c4009
		    --mid;
Packit 6c4009
		  while (mid < pstr->valid_len)
Packit 6c4009
		    if (pstr->wcs[mid] != WEOF)
Packit 6c4009
		      break;
Packit 6c4009
		    else
Packit 6c4009
		      ++mid;
Packit 6c4009
		  if (mid == pstr->valid_len)
Packit 6c4009
		    pstr->valid_len = 0;
Packit 6c4009
		  else
Packit 6c4009
		    {
Packit 6c4009
		      pstr->valid_len = pstr->offsets[mid] - offset;
Packit 6c4009
		      if (pstr->valid_len)
Packit 6c4009
			{
Packit 6c4009
			  for (low = 0; low < pstr->valid_len; ++low)
Packit 6c4009
			    pstr->wcs[low] = WEOF;
Packit 6c4009
			  memset (pstr->mbs, 255, pstr->valid_len);
Packit 6c4009
			}
Packit 6c4009
		    }
Packit 6c4009
		  pstr->valid_raw_len = pstr->valid_len;
Packit 6c4009
		}
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
#endif
Packit 6c4009
	    {
Packit 6c4009
	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
Packit 6c4009
							eflags);
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
	      if (pstr->mb_cur_max > 1)
Packit 6c4009
		memmove (pstr->wcs, pstr->wcs + offset,
Packit 6c4009
			 (pstr->valid_len - offset) * sizeof (wint_t));
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
	      if (BE (pstr->mbs_allocated, 0))
Packit 6c4009
		memmove (pstr->mbs, pstr->mbs + offset,
Packit 6c4009
			 pstr->valid_len - offset);
Packit 6c4009
	      pstr->valid_len -= offset;
Packit 6c4009
	      pstr->valid_raw_len -= offset;
Packit 6c4009
#if defined DEBUG && DEBUG
Packit 6c4009
	      assert (pstr->valid_len > 0);
Packit 6c4009
#endif
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
	  /* No, skip all characters until IDX.  */
Packit 6c4009
	  Idx prev_valid_len = pstr->valid_len;
Packit 6c4009
Packit 6c4009
	  if (BE (pstr->offsets_needed, 0))
Packit 6c4009
	    {
Packit 6c4009
	      pstr->len = pstr->raw_len - idx + offset;
Packit 6c4009
	      pstr->stop = pstr->raw_stop - idx + offset;
Packit 6c4009
	      pstr->offsets_needed = 0;
Packit 6c4009
	    }
Packit 6c4009
#endif
Packit 6c4009
	  pstr->valid_len = 0;
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
	  if (pstr->mb_cur_max > 1)
Packit 6c4009
	    {
Packit 6c4009
	      Idx wcs_idx;
Packit 6c4009
	      wint_t wc = WEOF;
Packit 6c4009
Packit 6c4009
	      if (pstr->is_utf8)
Packit 6c4009
		{
Packit 6c4009
		  const unsigned char *raw, *p, *end;
Packit 6c4009
Packit 6c4009
		  /* Special case UTF-8.  Multi-byte chars start with any
Packit 6c4009
		     byte other than 0x80 - 0xbf.  */
Packit 6c4009
		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
Packit 6c4009
		  end = raw + (offset - pstr->mb_cur_max);
Packit 6c4009
		  if (end < pstr->raw_mbs)
Packit 6c4009
		    end = pstr->raw_mbs;
Packit 6c4009
		  p = raw + offset - 1;
Packit 6c4009
#ifdef _LIBC
Packit 6c4009
		  /* We know the wchar_t encoding is UCS4, so for the simple
Packit 6c4009
		     case, ASCII characters, skip the conversion step.  */
Packit 6c4009
		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
Packit 6c4009
		    {
Packit 6c4009
		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit 6c4009
		      /* pstr->valid_len = 0; */
Packit 6c4009
		      wc = (wchar_t) *p;
Packit 6c4009
		    }
Packit 6c4009
		  else
Packit 6c4009
#endif
Packit 6c4009
		    for (; p >= end; --p)
Packit 6c4009
		      if ((*p & 0xc0) != 0x80)
Packit 6c4009
			{
Packit 6c4009
			  mbstate_t cur_state;
Packit 6c4009
			  wchar_t wc2;
Packit 6c4009
			  Idx mlen = raw + pstr->len - p;
Packit 6c4009
			  unsigned char buf[6];
Packit 6c4009
			  size_t mbclen;
Packit 6c4009
Packit 6c4009
			  const unsigned char *pp = p;
Packit 6c4009
			  if (BE (pstr->trans != NULL, 0))
Packit 6c4009
			    {
Packit 6c4009
			      int i = mlen < 6 ? mlen : 6;
Packit 6c4009
			      while (--i >= 0)
Packit 6c4009
				buf[i] = pstr->trans[p[i]];
Packit 6c4009
			      pp = buf;
Packit 6c4009
			    }
Packit 6c4009
			  /* XXX Don't use mbrtowc, we know which conversion
Packit 6c4009
			     to use (UTF-8 -> UCS4).  */
Packit 6c4009
			  memset (&cur_state, 0, sizeof (cur_state));
Packit 6c4009
			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
Packit 6c4009
					      &cur_state);
Packit 6c4009
			  if (raw + offset - p <= mbclen
Packit 6c4009
			      && mbclen < (size_t) -2)
Packit 6c4009
			    {
Packit 6c4009
			      memset (&pstr->cur_state, '\0',
Packit 6c4009
				      sizeof (mbstate_t));
Packit 6c4009
			      pstr->valid_len = mbclen - (raw + offset - p);
Packit 6c4009
			      wc = wc2;
Packit 6c4009
			    }
Packit 6c4009
			  break;
Packit 6c4009
			}
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (wc == WEOF)
Packit 6c4009
		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
Packit 6c4009
	      if (wc == WEOF)
Packit 6c4009
		pstr->tip_context
Packit 6c4009
		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
Packit 6c4009
	      else
Packit 6c4009
		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
Packit 6c4009
				      && IS_WIDE_WORD_CHAR (wc))
Packit 6c4009
				     ? CONTEXT_WORD
Packit 6c4009
				     : ((IS_WIDE_NEWLINE (wc)
Packit 6c4009
					 && pstr->newline_anchor)
Packit 6c4009
					? CONTEXT_NEWLINE : 0));
Packit 6c4009
	      if (BE (pstr->valid_len, 0))
Packit 6c4009
		{
Packit 6c4009
		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
Packit 6c4009
		    pstr->wcs[wcs_idx] = WEOF;
Packit 6c4009
		  if (pstr->mbs_allocated)
Packit 6c4009
		    memset (pstr->mbs, 255, pstr->valid_len);
Packit 6c4009
		}
Packit 6c4009
	      pstr->valid_raw_len = pstr->valid_len;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
	    {
Packit 6c4009
	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
Packit 6c4009
	      pstr->valid_raw_len = 0;
Packit 6c4009
	      if (pstr->trans)
Packit 6c4009
		c = pstr->trans[c];
Packit 6c4009
	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
Packit 6c4009
				   ? CONTEXT_WORD
Packit 6c4009
				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
Packit 6c4009
				      ? CONTEXT_NEWLINE : 0));
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      if (!BE (pstr->mbs_allocated, 0))
Packit 6c4009
	pstr->mbs += offset;
Packit 6c4009
    }
Packit 6c4009
  pstr->raw_mbs_idx = idx;
Packit 6c4009
  pstr->len -= offset;
Packit 6c4009
  pstr->stop -= offset;
Packit 6c4009
Packit 6c4009
  /* Then build the buffers.  */
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  if (pstr->mb_cur_max > 1)
Packit 6c4009
    {
Packit 6c4009
      if (pstr->icase)
Packit 6c4009
	{
Packit 6c4009
	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
Packit 6c4009
	  if (BE (ret != REG_NOERROR, 0))
Packit 6c4009
	    return ret;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	build_wcs_buffer (pstr);
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
    if (BE (pstr->mbs_allocated, 0))
Packit 6c4009
      {
Packit 6c4009
	if (pstr->icase)
Packit 6c4009
	  build_upper_buffer (pstr);
Packit 6c4009
	else if (pstr->trans != NULL)
Packit 6c4009
	  re_string_translate_buffer (pstr);
Packit 6c4009
      }
Packit 6c4009
    else
Packit 6c4009
      pstr->valid_len = pstr->len;
Packit 6c4009
Packit 6c4009
  pstr->cur_idx = 0;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static unsigned char
Packit 6c4009
__attribute__ ((pure))
Packit 6c4009
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
Packit 6c4009
{
Packit 6c4009
  int ch;
Packit 6c4009
  Idx off;
Packit 6c4009
Packit 6c4009
  /* Handle the common (easiest) cases first.  */
Packit 6c4009
  if (BE (!pstr->mbs_allocated, 1))
Packit 6c4009
    return re_string_peek_byte (pstr, idx);
Packit 6c4009
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  if (pstr->mb_cur_max > 1
Packit 6c4009
      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
Packit 6c4009
    return re_string_peek_byte (pstr, idx);
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  off = pstr->cur_idx + idx;
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  if (pstr->offsets_needed)
Packit 6c4009
    off = pstr->offsets[off];
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit 6c4009
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
Packit 6c4009
     this function returns CAPITAL LETTER I instead of first byte of
Packit 6c4009
     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
Packit 6c4009
     since peek_byte_case doesn't advance cur_idx in any way.  */
Packit 6c4009
  if (pstr->offsets_needed && !isascii (ch))
Packit 6c4009
    return re_string_peek_byte (pstr, idx);
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  return ch;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static unsigned char
Packit 6c4009
re_string_fetch_byte_case (re_string_t *pstr)
Packit 6c4009
{
Packit 6c4009
  if (BE (!pstr->mbs_allocated, 1))
Packit 6c4009
    return re_string_fetch_byte (pstr);
Packit 6c4009
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  if (pstr->offsets_needed)
Packit 6c4009
    {
Packit 6c4009
      Idx off;
Packit 6c4009
      int ch;
Packit 6c4009
Packit 6c4009
      /* For tr_TR.UTF-8 [[:islower:]] there is
Packit 6c4009
	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
Packit 6c4009
	 in that case the whole multi-byte character and return
Packit 6c4009
	 the original letter.  On the other side, with
Packit 6c4009
	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
Packit 6c4009
	 anything else would complicate things too much.  */
Packit 6c4009
Packit 6c4009
      if (!re_string_first_byte (pstr, pstr->cur_idx))
Packit 6c4009
	return re_string_fetch_byte (pstr);
Packit 6c4009
Packit 6c4009
      off = pstr->offsets[pstr->cur_idx];
Packit 6c4009
      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit 6c4009
Packit 6c4009
      if (! isascii (ch))
Packit 6c4009
	return re_string_fetch_byte (pstr);
Packit 6c4009
Packit 6c4009
      re_string_skip_bytes (pstr,
Packit 6c4009
			    re_string_char_size_at (pstr, pstr->cur_idx));
Packit 6c4009
      return ch;
Packit 6c4009
    }
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
re_string_destruct (re_string_t *pstr)
Packit 6c4009
{
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  re_free (pstr->wcs);
Packit 6c4009
  re_free (pstr->offsets);
Packit 6c4009
#endif /* RE_ENABLE_I18N  */
Packit 6c4009
  if (pstr->mbs_allocated)
Packit 6c4009
    re_free (pstr->mbs);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Return the context at IDX in INPUT.  */
Packit 6c4009
Packit 6c4009
static unsigned int
Packit 6c4009
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
Packit 6c4009
{
Packit 6c4009
  int c;
Packit 6c4009
  if (BE (idx < 0, 0))
Packit 6c4009
    /* In this case, we use the value stored in input->tip_context,
Packit 6c4009
       since we can't know the character in input->mbs[-1] here.  */
Packit 6c4009
    return input->tip_context;
Packit 6c4009
  if (BE (idx == input->len, 0))
Packit 6c4009
    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
Packit 6c4009
	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  if (input->mb_cur_max > 1)
Packit 6c4009
    {
Packit 6c4009
      wint_t wc;
Packit 6c4009
      Idx wc_idx = idx;
Packit 6c4009
      while(input->wcs[wc_idx] == WEOF)
Packit 6c4009
	{
Packit 6c4009
#if defined DEBUG && DEBUG
Packit 6c4009
	  /* It must not happen.  */
Packit 6c4009
	  assert (wc_idx >= 0);
Packit 6c4009
#endif
Packit 6c4009
	  --wc_idx;
Packit 6c4009
	  if (wc_idx < 0)
Packit 6c4009
	    return input->tip_context;
Packit 6c4009
	}
Packit 6c4009
      wc = input->wcs[wc_idx];
Packit 6c4009
      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
Packit 6c4009
	return CONTEXT_WORD;
Packit 6c4009
      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
Packit 6c4009
	      ? CONTEXT_NEWLINE : 0);
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
#endif
Packit 6c4009
    {
Packit 6c4009
      c = re_string_byte_at (input, idx);
Packit 6c4009
      if (bitset_contain (input->word_char, c))
Packit 6c4009
	return CONTEXT_WORD;
Packit 6c4009
      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009

Packit 6c4009
/* Functions for set operation.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_alloc (re_node_set *set, Idx size)
Packit 6c4009
{
Packit 6c4009
  set->alloc = size;
Packit 6c4009
  set->nelem = 0;
Packit 6c4009
  set->elems = re_malloc (Idx, size);
Packit 6c4009
  if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
Packit 6c4009
    return REG_ESPACE;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_init_1 (re_node_set *set, Idx elem)
Packit 6c4009
{
Packit 6c4009
  set->alloc = 1;
Packit 6c4009
  set->nelem = 1;
Packit 6c4009
  set->elems = re_malloc (Idx, 1);
Packit 6c4009
  if (BE (set->elems == NULL, 0))
Packit 6c4009
    {
Packit 6c4009
      set->alloc = set->nelem = 0;
Packit 6c4009
      return REG_ESPACE;
Packit 6c4009
    }
Packit 6c4009
  set->elems[0] = elem;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
Packit 6c4009
{
Packit 6c4009
  set->alloc = 2;
Packit 6c4009
  set->elems = re_malloc (Idx, 2);
Packit 6c4009
  if (BE (set->elems == NULL, 0))
Packit 6c4009
    return REG_ESPACE;
Packit 6c4009
  if (elem1 == elem2)
Packit 6c4009
    {
Packit 6c4009
      set->nelem = 1;
Packit 6c4009
      set->elems[0] = elem1;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      set->nelem = 2;
Packit 6c4009
      if (elem1 < elem2)
Packit 6c4009
	{
Packit 6c4009
	  set->elems[0] = elem1;
Packit 6c4009
	  set->elems[1] = elem2;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  set->elems[0] = elem2;
Packit 6c4009
	  set->elems[1] = elem1;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
Packit 6c4009
{
Packit 6c4009
  dest->nelem = src->nelem;
Packit 6c4009
  if (src->nelem > 0)
Packit 6c4009
    {
Packit 6c4009
      dest->alloc = dest->nelem;
Packit 6c4009
      dest->elems = re_malloc (Idx, dest->alloc);
Packit 6c4009
      if (BE (dest->elems == NULL, 0))
Packit 6c4009
	{
Packit 6c4009
	  dest->alloc = dest->nelem = 0;
Packit 6c4009
	  return REG_ESPACE;
Packit 6c4009
	}
Packit 6c4009
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    re_node_set_init_empty (dest);
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
Packit 6c4009
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Packit 6c4009
   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
Packit 6c4009
			   const re_node_set *src2)
Packit 6c4009
{
Packit 6c4009
  Idx i1, i2, is, id, delta, sbase;
Packit 6c4009
  if (src1->nelem == 0 || src2->nelem == 0)
Packit 6c4009
    return REG_NOERROR;
Packit 6c4009
Packit 6c4009
  /* We need dest->nelem + 2 * elems_in_intersection; this is a
Packit 6c4009
     conservative estimate.  */
Packit 6c4009
  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
Packit 6c4009
    {
Packit 6c4009
      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
Packit 6c4009
      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
Packit 6c4009
      if (BE (new_elems == NULL, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
      dest->elems = new_elems;
Packit 6c4009
      dest->alloc = new_alloc;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Find the items in the intersection of SRC1 and SRC2, and copy
Packit 6c4009
     into the top of DEST those that are not already in DEST itself.  */
Packit 6c4009
  sbase = dest->nelem + src1->nelem + src2->nelem;
Packit 6c4009
  i1 = src1->nelem - 1;
Packit 6c4009
  i2 = src2->nelem - 1;
Packit 6c4009
  id = dest->nelem - 1;
Packit 6c4009
  for (;;)
Packit 6c4009
    {
Packit 6c4009
      if (src1->elems[i1] == src2->elems[i2])
Packit 6c4009
	{
Packit 6c4009
	  /* Try to find the item in DEST.  Maybe we could binary search?  */
Packit 6c4009
	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
Packit 6c4009
	    --id;
Packit 6c4009
Packit 6c4009
	  if (id < 0 || dest->elems[id] != src1->elems[i1])
Packit 6c4009
            dest->elems[--sbase] = src1->elems[i1];
Packit 6c4009
Packit 6c4009
	  if (--i1 < 0 || --i2 < 0)
Packit 6c4009
	    break;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Lower the highest of the two items.  */
Packit 6c4009
      else if (src1->elems[i1] < src2->elems[i2])
Packit 6c4009
	{
Packit 6c4009
	  if (--i2 < 0)
Packit 6c4009
	    break;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  if (--i1 < 0)
Packit 6c4009
	    break;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  id = dest->nelem - 1;
Packit 6c4009
  is = dest->nelem + src1->nelem + src2->nelem - 1;
Packit 6c4009
  delta = is - sbase + 1;
Packit 6c4009
Packit 6c4009
  /* Now copy.  When DELTA becomes zero, the remaining
Packit 6c4009
     DEST elements are already in place; this is more or
Packit 6c4009
     less the same loop that is in re_node_set_merge.  */
Packit 6c4009
  dest->nelem += delta;
Packit 6c4009
  if (delta > 0 && id >= 0)
Packit 6c4009
    for (;;)
Packit 6c4009
      {
Packit 6c4009
	if (dest->elems[is] > dest->elems[id])
Packit 6c4009
	  {
Packit 6c4009
	    /* Copy from the top.  */
Packit 6c4009
	    dest->elems[id + delta--] = dest->elems[is--];
Packit 6c4009
	    if (delta == 0)
Packit 6c4009
	      break;
Packit 6c4009
	  }
Packit 6c4009
	else
Packit 6c4009
	  {
Packit 6c4009
	    /* Slide from the bottom.  */
Packit 6c4009
	    dest->elems[id + delta] = dest->elems[id];
Packit 6c4009
	    if (--id < 0)
Packit 6c4009
	      break;
Packit 6c4009
	  }
Packit 6c4009
      }
Packit 6c4009
Packit 6c4009
  /* Copy remaining SRC elements.  */
Packit 6c4009
  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
Packit 6c4009
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
Packit 6c4009
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
Packit 6c4009
			const re_node_set *src2)
Packit 6c4009
{
Packit 6c4009
  Idx i1, i2, id;
Packit 6c4009
  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
Packit 6c4009
    {
Packit 6c4009
      dest->alloc = src1->nelem + src2->nelem;
Packit 6c4009
      dest->elems = re_malloc (Idx, dest->alloc);
Packit 6c4009
      if (BE (dest->elems == NULL, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if (src1 != NULL && src1->nelem > 0)
Packit 6c4009
	return re_node_set_init_copy (dest, src1);
Packit 6c4009
      else if (src2 != NULL && src2->nelem > 0)
Packit 6c4009
	return re_node_set_init_copy (dest, src2);
Packit 6c4009
      else
Packit 6c4009
	re_node_set_init_empty (dest);
Packit 6c4009
      return REG_NOERROR;
Packit 6c4009
    }
Packit 6c4009
  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
Packit 6c4009
    {
Packit 6c4009
      if (src1->elems[i1] > src2->elems[i2])
Packit 6c4009
	{
Packit 6c4009
	  dest->elems[id++] = src2->elems[i2++];
Packit 6c4009
	  continue;
Packit 6c4009
	}
Packit 6c4009
      if (src1->elems[i1] == src2->elems[i2])
Packit 6c4009
	++i2;
Packit 6c4009
      dest->elems[id++] = src1->elems[i1++];
Packit 6c4009
    }
Packit 6c4009
  if (i1 < src1->nelem)
Packit 6c4009
    {
Packit 6c4009
      memcpy (dest->elems + id, src1->elems + i1,
Packit 6c4009
	     (src1->nelem - i1) * sizeof (Idx));
Packit 6c4009
      id += src1->nelem - i1;
Packit 6c4009
    }
Packit 6c4009
  else if (i2 < src2->nelem)
Packit 6c4009
    {
Packit 6c4009
      memcpy (dest->elems + id, src2->elems + i2,
Packit 6c4009
	     (src2->nelem - i2) * sizeof (Idx));
Packit 6c4009
      id += src2->nelem - i2;
Packit 6c4009
    }
Packit 6c4009
  dest->nelem = id;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Calculate the union set of the sets DEST and SRC. And store it to
Packit 6c4009
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_merge (re_node_set *dest, const re_node_set *src)
Packit 6c4009
{
Packit 6c4009
  Idx is, id, sbase, delta;
Packit 6c4009
  if (src == NULL || src->nelem == 0)
Packit 6c4009
    return REG_NOERROR;
Packit 6c4009
  if (dest->alloc < 2 * src->nelem + dest->nelem)
Packit 6c4009
    {
Packit 6c4009
      Idx new_alloc = 2 * (src->nelem + dest->alloc);
Packit 6c4009
      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
Packit 6c4009
      if (BE (new_buffer == NULL, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
      dest->elems = new_buffer;
Packit 6c4009
      dest->alloc = new_alloc;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (BE (dest->nelem == 0, 0))
Packit 6c4009
    {
Packit 6c4009
      dest->nelem = src->nelem;
Packit 6c4009
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit 6c4009
      return REG_NOERROR;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Copy into the top of DEST the items of SRC that are not
Packit 6c4009
     found in DEST.  Maybe we could binary search in DEST?  */
Packit 6c4009
  for (sbase = dest->nelem + 2 * src->nelem,
Packit 6c4009
       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
Packit 6c4009
    {
Packit 6c4009
      if (dest->elems[id] == src->elems[is])
Packit 6c4009
	is--, id--;
Packit 6c4009
      else if (dest->elems[id] < src->elems[is])
Packit 6c4009
	dest->elems[--sbase] = src->elems[is--];
Packit 6c4009
      else /* if (dest->elems[id] > src->elems[is]) */
Packit 6c4009
	--id;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (is >= 0)
Packit 6c4009
    {
Packit 6c4009
      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
Packit 6c4009
      sbase -= is + 1;
Packit 6c4009
      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  id = dest->nelem - 1;
Packit 6c4009
  is = dest->nelem + 2 * src->nelem - 1;
Packit 6c4009
  delta = is - sbase + 1;
Packit 6c4009
  if (delta == 0)
Packit 6c4009
    return REG_NOERROR;
Packit 6c4009
Packit 6c4009
  /* Now copy.  When DELTA becomes zero, the remaining
Packit 6c4009
     DEST elements are already in place.  */
Packit 6c4009
  dest->nelem += delta;
Packit 6c4009
  for (;;)
Packit 6c4009
    {
Packit 6c4009
      if (dest->elems[is] > dest->elems[id])
Packit 6c4009
	{
Packit 6c4009
	  /* Copy from the top.  */
Packit 6c4009
	  dest->elems[id + delta--] = dest->elems[is--];
Packit 6c4009
	  if (delta == 0)
Packit 6c4009
	    break;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  /* Slide from the bottom.  */
Packit 6c4009
	  dest->elems[id + delta] = dest->elems[id];
Packit 6c4009
	  if (--id < 0)
Packit 6c4009
	    {
Packit 6c4009
	      /* Copy remaining SRC elements.  */
Packit 6c4009
	      memcpy (dest->elems, dest->elems + sbase,
Packit 6c4009
		      delta * sizeof (Idx));
Packit 6c4009
	      break;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Insert the new element ELEM to the re_node_set* SET.
Packit 6c4009
   SET should not already have ELEM.
Packit 6c4009
   Return true if successful.  */
Packit 6c4009
Packit 6c4009
static bool
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_insert (re_node_set *set, Idx elem)
Packit 6c4009
{
Packit 6c4009
  Idx idx;
Packit 6c4009
  /* In case the set is empty.  */
Packit 6c4009
  if (set->alloc == 0)
Packit 6c4009
    return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
Packit 6c4009
Packit 6c4009
  if (BE (set->nelem, 0) == 0)
Packit 6c4009
    {
Packit 6c4009
      /* We already guaranteed above that set->alloc != 0.  */
Packit 6c4009
      set->elems[0] = elem;
Packit 6c4009
      ++set->nelem;
Packit 6c4009
      return true;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Realloc if we need.  */
Packit 6c4009
  if (set->alloc == set->nelem)
Packit 6c4009
    {
Packit 6c4009
      Idx *new_elems;
Packit 6c4009
      set->alloc = set->alloc * 2;
Packit 6c4009
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit 6c4009
      if (BE (new_elems == NULL, 0))
Packit 6c4009
	return false;
Packit 6c4009
      set->elems = new_elems;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Move the elements which follows the new element.  Test the
Packit 6c4009
     first element separately to skip a check in the inner loop.  */
Packit 6c4009
  if (elem < set->elems[0])
Packit 6c4009
    {
Packit 6c4009
      idx = 0;
Packit 6c4009
      for (idx = set->nelem; idx > 0; idx--)
Packit 6c4009
	set->elems[idx] = set->elems[idx - 1];
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
Packit 6c4009
	set->elems[idx] = set->elems[idx - 1];
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Insert the new element.  */
Packit 6c4009
  set->elems[idx] = elem;
Packit 6c4009
  ++set->nelem;
Packit 6c4009
  return true;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Insert the new element ELEM to the re_node_set* SET.
Packit 6c4009
   SET should not already have any element greater than or equal to ELEM.
Packit 6c4009
   Return true if successful.  */
Packit 6c4009
Packit 6c4009
static bool
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_node_set_insert_last (re_node_set *set, Idx elem)
Packit 6c4009
{
Packit 6c4009
  /* Realloc if we need.  */
Packit 6c4009
  if (set->alloc == set->nelem)
Packit 6c4009
    {
Packit 6c4009
      Idx *new_elems;
Packit 6c4009
      set->alloc = (set->alloc + 1) * 2;
Packit 6c4009
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit 6c4009
      if (BE (new_elems == NULL, 0))
Packit 6c4009
	return false;
Packit 6c4009
      set->elems = new_elems;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Insert the new element.  */
Packit 6c4009
  set->elems[set->nelem++] = elem;
Packit 6c4009
  return true;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Compare two node sets SET1 and SET2.
Packit 6c4009
   Return true if SET1 and SET2 are equivalent.  */
Packit 6c4009
Packit 6c4009
static bool
Packit 6c4009
__attribute__ ((pure))
Packit 6c4009
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
Packit 6c4009
{
Packit 6c4009
  Idx i;
Packit 6c4009
  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
Packit 6c4009
    return false;
Packit 6c4009
  for (i = set1->nelem ; --i >= 0 ; )
Packit 6c4009
    if (set1->elems[i] != set2->elems[i])
Packit 6c4009
      return false;
Packit 6c4009
  return true;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
Packit 6c4009
Packit 6c4009
static Idx
Packit 6c4009
__attribute__ ((pure))
Packit 6c4009
re_node_set_contains (const re_node_set *set, Idx elem)
Packit 6c4009
{
Packit 6c4009
  __re_size_t idx, right, mid;
Packit 6c4009
  if (set->nelem <= 0)
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  /* Binary search the element.  */
Packit 6c4009
  idx = 0;
Packit 6c4009
  right = set->nelem - 1;
Packit 6c4009
  while (idx < right)
Packit 6c4009
    {
Packit 6c4009
      mid = (idx + right) / 2;
Packit 6c4009
      if (set->elems[mid] < elem)
Packit 6c4009
	idx = mid + 1;
Packit 6c4009
      else
Packit 6c4009
	right = mid;
Packit 6c4009
    }
Packit 6c4009
  return set->elems[idx] == elem ? idx + 1 : 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
re_node_set_remove_at (re_node_set *set, Idx idx)
Packit 6c4009
{
Packit 6c4009
  if (idx < 0 || idx >= set->nelem)
Packit 6c4009
    return;
Packit 6c4009
  --set->nelem;
Packit 6c4009
  for (; idx < set->nelem; idx++)
Packit 6c4009
    set->elems[idx] = set->elems[idx + 1];
Packit 6c4009
}
Packit 6c4009

Packit 6c4009
Packit 6c4009
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
Packit 6c4009
   Or return -1 if an error occurred.  */
Packit 6c4009
Packit 6c4009
static Idx
Packit 6c4009
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
Packit 6c4009
{
Packit 6c4009
  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
Packit 6c4009
    {
Packit 6c4009
      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
Packit 6c4009
      Idx *new_nexts, *new_indices;
Packit 6c4009
      re_node_set *new_edests, *new_eclosures;
Packit 6c4009
      re_token_t *new_nodes;
Packit 6c4009
Packit 6c4009
      /* Avoid overflows in realloc.  */
Packit 6c4009
      const size_t max_object_size = MAX (sizeof (re_token_t),
Packit 6c4009
					  MAX (sizeof (re_node_set),
Packit 6c4009
					       sizeof (Idx)));
Packit 6c4009
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
Packit 6c4009
	return -1;
Packit 6c4009
Packit 6c4009
      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
Packit 6c4009
      if (BE (new_nodes == NULL, 0))
Packit 6c4009
	return -1;
Packit 6c4009
      dfa->nodes = new_nodes;
Packit 6c4009
      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
Packit 6c4009
      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
Packit 6c4009
      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
Packit 6c4009
      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
Packit 6c4009
      if (BE (new_nexts == NULL || new_indices == NULL
Packit 6c4009
	      || new_edests == NULL || new_eclosures == NULL, 0))
Packit 6c4009
	{
Packit 6c4009
	   re_free (new_nexts);
Packit 6c4009
	   re_free (new_indices);
Packit 6c4009
	   re_free (new_edests);
Packit 6c4009
	   re_free (new_eclosures);
Packit 6c4009
	   return -1;
Packit 6c4009
	}
Packit 6c4009
      dfa->nexts = new_nexts;
Packit 6c4009
      dfa->org_indices = new_indices;
Packit 6c4009
      dfa->edests = new_edests;
Packit 6c4009
      dfa->eclosures = new_eclosures;
Packit 6c4009
      dfa->nodes_alloc = new_nodes_alloc;
Packit 6c4009
    }
Packit 6c4009
  dfa->nodes[dfa->nodes_len] = token;
Packit 6c4009
  dfa->nodes[dfa->nodes_len].constraint = 0;
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
  dfa->nodes[dfa->nodes_len].accept_mb =
Packit 6c4009
    ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
Packit 6c4009
     || token.type == COMPLEX_BRACKET);
Packit 6c4009
#endif
Packit 6c4009
  dfa->nexts[dfa->nodes_len] = -1;
Packit 6c4009
  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
Packit 6c4009
  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
Packit 6c4009
  return dfa->nodes_len++;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static re_hashval_t
Packit 6c4009
calc_state_hash (const re_node_set *nodes, unsigned int context)
Packit 6c4009
{
Packit 6c4009
  re_hashval_t hash = nodes->nelem + context;
Packit 6c4009
  Idx i;
Packit 6c4009
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 6c4009
    hash += nodes->elems[i];
Packit 6c4009
  return hash;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Search for the state whose node_set is equivalent to NODES.
Packit 6c4009
   Return the pointer to the state, if we found it in the DFA.
Packit 6c4009
   Otherwise create the new one and return it.  In case of an error
Packit 6c4009
   return NULL and set the error code in ERR.
Packit 6c4009
   Note: - We assume NULL as the invalid state, then it is possible that
Packit 6c4009
	   return value is NULL and ERR is REG_NOERROR.
Packit 6c4009
	 - We never return non-NULL value in case of any errors, it is for
Packit 6c4009
	   optimization.  */
Packit 6c4009
Packit 6c4009
static re_dfastate_t *
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
Packit 6c4009
		  const re_node_set *nodes)
Packit 6c4009
{
Packit 6c4009
  re_hashval_t hash;
Packit 6c4009
  re_dfastate_t *new_state;
Packit 6c4009
  struct re_state_table_entry *spot;
Packit 6c4009
  Idx i;
Packit 6c4009
#if defined GCC_LINT || defined lint
Packit 6c4009
  /* Suppress bogus uninitialized-variable warnings.  */
Packit 6c4009
  *err = REG_NOERROR;
Packit 6c4009
#endif
Packit 6c4009
  if (BE (nodes->nelem == 0, 0))
Packit 6c4009
    {
Packit 6c4009
      *err = REG_NOERROR;
Packit 6c4009
      return NULL;
Packit 6c4009
    }
Packit 6c4009
  hash = calc_state_hash (nodes, 0);
Packit 6c4009
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 6c4009
Packit 6c4009
  for (i = 0 ; i < spot->num ; i++)
Packit 6c4009
    {
Packit 6c4009
      re_dfastate_t *state = spot->array[i];
Packit 6c4009
      if (hash != state->hash)
Packit 6c4009
	continue;
Packit 6c4009
      if (re_node_set_compare (&state->nodes, nodes))
Packit 6c4009
	return state;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* There are no appropriate state in the dfa, create the new one.  */
Packit 6c4009
  new_state = create_ci_newstate (dfa, nodes, hash);
Packit 6c4009
  if (BE (new_state == NULL, 0))
Packit 6c4009
    *err = REG_ESPACE;
Packit 6c4009
Packit 6c4009
  return new_state;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Search for the state whose node_set is equivalent to NODES and
Packit 6c4009
   whose context is equivalent to CONTEXT.
Packit 6c4009
   Return the pointer to the state, if we found it in the DFA.
Packit 6c4009
   Otherwise create the new one and return it.  In case of an error
Packit 6c4009
   return NULL and set the error code in ERR.
Packit 6c4009
   Note: - We assume NULL as the invalid state, then it is possible that
Packit 6c4009
	   return value is NULL and ERR is REG_NOERROR.
Packit 6c4009
	 - We never return non-NULL value in case of any errors, it is for
Packit 6c4009
	   optimization.  */
Packit 6c4009
Packit 6c4009
static re_dfastate_t *
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
Packit 6c4009
			  const re_node_set *nodes, unsigned int context)
Packit 6c4009
{
Packit 6c4009
  re_hashval_t hash;
Packit 6c4009
  re_dfastate_t *new_state;
Packit 6c4009
  struct re_state_table_entry *spot;
Packit 6c4009
  Idx i;
Packit 6c4009
#if defined GCC_LINT || defined lint
Packit 6c4009
  /* Suppress bogus uninitialized-variable warnings.  */
Packit 6c4009
  *err = REG_NOERROR;
Packit 6c4009
#endif
Packit 6c4009
  if (nodes->nelem == 0)
Packit 6c4009
    {
Packit 6c4009
      *err = REG_NOERROR;
Packit 6c4009
      return NULL;
Packit 6c4009
    }
Packit 6c4009
  hash = calc_state_hash (nodes, context);
Packit 6c4009
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 6c4009
Packit 6c4009
  for (i = 0 ; i < spot->num ; i++)
Packit 6c4009
    {
Packit 6c4009
      re_dfastate_t *state = spot->array[i];
Packit 6c4009
      if (state->hash == hash
Packit 6c4009
	  && state->context == context
Packit 6c4009
	  && re_node_set_compare (state->entrance_nodes, nodes))
Packit 6c4009
	return state;
Packit 6c4009
    }
Packit 6c4009
  /* There are no appropriate state in 'dfa', create the new one.  */
Packit 6c4009
  new_state = create_cd_newstate (dfa, nodes, context, hash);
Packit 6c4009
  if (BE (new_state == NULL, 0))
Packit 6c4009
    *err = REG_ESPACE;
Packit 6c4009
Packit 6c4009
  return new_state;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Finish initialization of the new state NEWSTATE, and using its hash value
Packit 6c4009
   HASH put in the appropriate bucket of DFA's state table.  Return value
Packit 6c4009
   indicates the error code if failed.  */
Packit 6c4009
Packit 6c4009
static reg_errcode_t
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
Packit 6c4009
		re_hashval_t hash)
Packit 6c4009
{
Packit 6c4009
  struct re_state_table_entry *spot;
Packit 6c4009
  reg_errcode_t err;
Packit 6c4009
  Idx i;
Packit 6c4009
Packit 6c4009
  newstate->hash = hash;
Packit 6c4009
  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
Packit 6c4009
  if (BE (err != REG_NOERROR, 0))
Packit 6c4009
    return REG_ESPACE;
Packit 6c4009
  for (i = 0; i < newstate->nodes.nelem; i++)
Packit 6c4009
    {
Packit 6c4009
      Idx elem = newstate->nodes.elems[i];
Packit 6c4009
      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
Packit 6c4009
	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
Packit 6c4009
	  return REG_ESPACE;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 6c4009
  if (BE (spot->alloc <= spot->num, 0))
Packit 6c4009
    {
Packit 6c4009
      Idx new_alloc = 2 * spot->num + 2;
Packit 6c4009
      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
Packit 6c4009
					      new_alloc);
Packit 6c4009
      if (BE (new_array == NULL, 0))
Packit 6c4009
	return REG_ESPACE;
Packit 6c4009
      spot->array = new_array;
Packit 6c4009
      spot->alloc = new_alloc;
Packit 6c4009
    }
Packit 6c4009
  spot->array[spot->num++] = newstate;
Packit 6c4009
  return REG_NOERROR;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
free_state (re_dfastate_t *state)
Packit 6c4009
{
Packit 6c4009
  re_node_set_free (&state->non_eps_nodes);
Packit 6c4009
  re_node_set_free (&state->inveclosure);
Packit 6c4009
  if (state->entrance_nodes != &state->nodes)
Packit 6c4009
    {
Packit 6c4009
      re_node_set_free (state->entrance_nodes);
Packit 6c4009
      re_free (state->entrance_nodes);
Packit 6c4009
    }
Packit 6c4009
  re_node_set_free (&state->nodes);
Packit 6c4009
  re_free (state->word_trtable);
Packit 6c4009
  re_free (state->trtable);
Packit 6c4009
  re_free (state);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Create the new state which is independent of contexts.
Packit 6c4009
   Return the new state if succeeded, otherwise return NULL.  */
Packit 6c4009
Packit 6c4009
static re_dfastate_t *
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit 6c4009
		    re_hashval_t hash)
Packit 6c4009
{
Packit 6c4009
  Idx i;
Packit 6c4009
  reg_errcode_t err;
Packit 6c4009
  re_dfastate_t *newstate;
Packit 6c4009
Packit 6c4009
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit 6c4009
  if (BE (newstate == NULL, 0))
Packit 6c4009
    return NULL;
Packit 6c4009
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit 6c4009
  if (BE (err != REG_NOERROR, 0))
Packit 6c4009
    {
Packit 6c4009
      re_free (newstate);
Packit 6c4009
      return NULL;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  newstate->entrance_nodes = &newstate->nodes;
Packit 6c4009
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 6c4009
    {
Packit 6c4009
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit 6c4009
      re_token_type_t type = node->type;
Packit 6c4009
      if (type == CHARACTER && !node->constraint)
Packit 6c4009
	continue;
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
      newstate->accept_mb |= node->accept_mb;
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
Packit 6c4009
      /* If the state has the halt node, the state is a halt state.  */
Packit 6c4009
      if (type == END_OF_RE)
Packit 6c4009
	newstate->halt = 1;
Packit 6c4009
      else if (type == OP_BACK_REF)
Packit 6c4009
	newstate->has_backref = 1;
Packit 6c4009
      else if (type == ANCHOR || node->constraint)
Packit 6c4009
	newstate->has_constraint = 1;
Packit 6c4009
    }
Packit 6c4009
  err = register_state (dfa, newstate, hash);
Packit 6c4009
  if (BE (err != REG_NOERROR, 0))
Packit 6c4009
    {
Packit 6c4009
      free_state (newstate);
Packit 6c4009
      newstate = NULL;
Packit 6c4009
    }
Packit 6c4009
  return newstate;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Create the new state which is depend on the context CONTEXT.
Packit 6c4009
   Return the new state if succeeded, otherwise return NULL.  */
Packit 6c4009
Packit 6c4009
static re_dfastate_t *
Packit 6c4009
__attribute_warn_unused_result__
Packit 6c4009
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit 6c4009
		    unsigned int context, re_hashval_t hash)
Packit 6c4009
{
Packit 6c4009
  Idx i, nctx_nodes = 0;
Packit 6c4009
  reg_errcode_t err;
Packit 6c4009
  re_dfastate_t *newstate;
Packit 6c4009
Packit 6c4009
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit 6c4009
  if (BE (newstate == NULL, 0))
Packit 6c4009
    return NULL;
Packit 6c4009
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit 6c4009
  if (BE (err != REG_NOERROR, 0))
Packit 6c4009
    {
Packit 6c4009
      re_free (newstate);
Packit 6c4009
      return NULL;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  newstate->context = context;
Packit 6c4009
  newstate->entrance_nodes = &newstate->nodes;
Packit 6c4009
Packit 6c4009
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 6c4009
    {
Packit 6c4009
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit 6c4009
      re_token_type_t type = node->type;
Packit 6c4009
      unsigned int constraint = node->constraint;
Packit 6c4009
Packit 6c4009
      if (type == CHARACTER && !constraint)
Packit 6c4009
	continue;
Packit 6c4009
#ifdef RE_ENABLE_I18N
Packit 6c4009
      newstate->accept_mb |= node->accept_mb;
Packit 6c4009
#endif /* RE_ENABLE_I18N */
Packit 6c4009
Packit 6c4009
      /* If the state has the halt node, the state is a halt state.  */
Packit 6c4009
      if (type == END_OF_RE)
Packit 6c4009
	newstate->halt = 1;
Packit 6c4009
      else if (type == OP_BACK_REF)
Packit 6c4009
	newstate->has_backref = 1;
Packit 6c4009
Packit 6c4009
      if (constraint)
Packit 6c4009
	{
Packit 6c4009
	  if (newstate->entrance_nodes == &newstate->nodes)
Packit 6c4009
	    {
Packit 6c4009
	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
Packit 6c4009
	      if (BE (newstate->entrance_nodes == NULL, 0))
Packit 6c4009
		{
Packit 6c4009
		  free_state (newstate);
Packit 6c4009
		  return NULL;
Packit 6c4009
		}
Packit 6c4009
	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
Packit 6c4009
		  != REG_NOERROR)
Packit 6c4009
		return NULL;
Packit 6c4009
	      nctx_nodes = 0;
Packit 6c4009
	      newstate->has_constraint = 1;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
Packit 6c4009
	    {
Packit 6c4009
	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
Packit 6c4009
	      ++nctx_nodes;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  err = register_state (dfa, newstate, hash);
Packit 6c4009
  if (BE (err != REG_NOERROR, 0))
Packit 6c4009
    {
Packit 6c4009
      free_state (newstate);
Packit 6c4009
      newstate = NULL;
Packit 6c4009
    }
Packit 6c4009
  return  newstate;
Packit 6c4009
}