Blame lib/regex_internal.c

Packit 33f14e
/* Extended regular expression matching and search library.
Packit 33f14e
   Copyright (C) 2002-2017 Free Software Foundation, Inc.
Packit 33f14e
   This file is part of the GNU C Library.
Packit 33f14e
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Packit 33f14e
Packit 33f14e
   The GNU C Library is free software; you can redistribute it and/or
Packit 33f14e
   modify it under the terms of the GNU General Public
Packit 33f14e
   License as published by the Free Software Foundation; either
Packit 33f14e
   version 3 of the License, or (at your option) any later version.
Packit 33f14e
Packit 33f14e
   The GNU C Library is distributed in the hope that it will be useful,
Packit 33f14e
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 33f14e
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 33f14e
   General Public License for more details.
Packit 33f14e
Packit 33f14e
   You should have received a copy of the GNU General Public
Packit 33f14e
   License along with the GNU C Library; if not, see
Packit 33f14e
   <http://www.gnu.org/licenses/>.  */
Packit 33f14e
Packit 33f14e
static void re_string_construct_common (const char *str, Idx len,
Packit 33f14e
					re_string_t *pstr,
Packit 33f14e
					RE_TRANSLATE_TYPE trans, bool icase,
Packit 33f14e
					const re_dfa_t *dfa) internal_function;
Packit 33f14e
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
Packit 33f14e
					  const re_node_set *nodes,
Packit 33f14e
					  re_hashval_t hash) internal_function;
Packit 33f14e
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
Packit 33f14e
					  const re_node_set *nodes,
Packit 33f14e
					  unsigned int context,
Packit 33f14e
					  re_hashval_t hash) internal_function;
Packit 33f14e

Packit 33f14e
/* Functions for string operation.  */
Packit 33f14e
Packit 33f14e
/* This function allocate the buffers.  It is necessary to call
Packit 33f14e
   re_string_reconstruct before using the object.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
Packit 33f14e
		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit 33f14e
{
Packit 33f14e
  reg_errcode_t ret;
Packit 33f14e
  Idx init_buf_len;
Packit 33f14e
Packit 33f14e
  /* Ensure at least one character fits into the buffers.  */
Packit 33f14e
  if (init_len < dfa->mb_cur_max)
Packit 33f14e
    init_len = dfa->mb_cur_max;
Packit 33f14e
  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
Packit 33f14e
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit 33f14e
Packit 33f14e
  ret = re_string_realloc_buffers (pstr, init_buf_len);
Packit 33f14e
  if (BE (ret != REG_NOERROR, 0))
Packit 33f14e
    return ret;
Packit 33f14e
Packit 33f14e
  pstr->word_char = dfa->word_char;
Packit 33f14e
  pstr->word_ops_used = dfa->word_ops_used;
Packit 33f14e
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit 33f14e
  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
Packit 33f14e
  pstr->valid_raw_len = pstr->valid_len;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* This function allocate the buffers, and initialize them.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_string_construct (re_string_t *pstr, const char *str, Idx len,
Packit 33f14e
		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit 33f14e
{
Packit 33f14e
  reg_errcode_t ret;
Packit 33f14e
  memset (pstr, '\0', sizeof (re_string_t));
Packit 33f14e
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit 33f14e
Packit 33f14e
  if (len > 0)
Packit 33f14e
    {
Packit 33f14e
      ret = re_string_realloc_buffers (pstr, len + 1);
Packit 33f14e
      if (BE (ret != REG_NOERROR, 0))
Packit 33f14e
	return ret;
Packit 33f14e
    }
Packit 33f14e
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit 33f14e
Packit 33f14e
  if (icase)
Packit 33f14e
    {
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
      if (dfa->mb_cur_max > 1)
Packit 33f14e
	{
Packit 33f14e
	  while (1)
Packit 33f14e
	    {
Packit 33f14e
	      ret = build_wcs_upper_buffer (pstr);
Packit 33f14e
	      if (BE (ret != REG_NOERROR, 0))
Packit 33f14e
		return ret;
Packit 33f14e
	      if (pstr->valid_raw_len >= len)
Packit 33f14e
		break;
Packit 33f14e
	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
Packit 33f14e
		break;
Packit 33f14e
	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
Packit 33f14e
	      if (BE (ret != REG_NOERROR, 0))
Packit 33f14e
		return ret;
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
#endif /* RE_ENABLE_I18N  */
Packit 33f14e
	build_upper_buffer (pstr);
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
      if (dfa->mb_cur_max > 1)
Packit 33f14e
	build_wcs_buffer (pstr);
Packit 33f14e
      else
Packit 33f14e
#endif /* RE_ENABLE_I18N  */
Packit 33f14e
	{
Packit 33f14e
	  if (trans != NULL)
Packit 33f14e
	    re_string_translate_buffer (pstr);
Packit 33f14e
	  else
Packit 33f14e
	    {
Packit 33f14e
	      pstr->valid_len = pstr->bufs_len;
Packit 33f14e
	      pstr->valid_raw_len = pstr->bufs_len;
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Helper functions for re_string_allocate, and re_string_construct.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
Packit 33f14e
{
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  if (pstr->mb_cur_max > 1)
Packit 33f14e
    {
Packit 33f14e
      wint_t *new_wcs;
Packit 33f14e
Packit 33f14e
      /* Avoid overflow in realloc.  */
Packit 33f14e
      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
Packit 33f14e
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
Packit 33f14e
      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
Packit 33f14e
      if (BE (new_wcs == NULL, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
      pstr->wcs = new_wcs;
Packit 33f14e
      if (pstr->offsets != NULL)
Packit 33f14e
	{
Packit 33f14e
	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
Packit 33f14e
	  if (BE (new_offsets == NULL, 0))
Packit 33f14e
	    return REG_ESPACE;
Packit 33f14e
	  pstr->offsets = new_offsets;
Packit 33f14e
	}
Packit 33f14e
    }
Packit 33f14e
#endif /* RE_ENABLE_I18N  */
Packit 33f14e
  if (pstr->mbs_allocated)
Packit 33f14e
    {
Packit 33f14e
      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
Packit 33f14e
					   new_buf_len);
Packit 33f14e
      if (BE (new_mbs == NULL, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
      pstr->mbs = new_mbs;
Packit 33f14e
    }
Packit 33f14e
  pstr->bufs_len = new_buf_len;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
internal_function
Packit 33f14e
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
Packit 33f14e
			    RE_TRANSLATE_TYPE trans, bool icase,
Packit 33f14e
			    const re_dfa_t *dfa)
Packit 33f14e
{
Packit 33f14e
  pstr->raw_mbs = (const unsigned char *) str;
Packit 33f14e
  pstr->len = len;
Packit 33f14e
  pstr->raw_len = len;
Packit 33f14e
  pstr->trans = trans;
Packit 33f14e
  pstr->icase = icase;
Packit 33f14e
  pstr->mbs_allocated = (trans != NULL || icase);
Packit 33f14e
  pstr->mb_cur_max = dfa->mb_cur_max;
Packit 33f14e
  pstr->is_utf8 = dfa->is_utf8;
Packit 33f14e
  pstr->map_notascii = dfa->map_notascii;
Packit 33f14e
  pstr->stop = pstr->len;
Packit 33f14e
  pstr->raw_stop = pstr->stop;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
Packit 33f14e
/* Build wide character buffer PSTR->WCS.
Packit 33f14e
   If the byte sequence of the string are:
Packit 33f14e
     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Packit 33f14e
   Then wide character buffer will be:
Packit 33f14e
     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
Packit 33f14e
   We use WEOF for padding, they indicate that the position isn't
Packit 33f14e
   a first byte of a multibyte character.
Packit 33f14e
Packit 33f14e
   Note that this function assumes PSTR->VALID_LEN elements are already
Packit 33f14e
   built and starts from PSTR->VALID_LEN.  */
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
internal_function
Packit 33f14e
build_wcs_buffer (re_string_t *pstr)
Packit 33f14e
{
Packit 33f14e
#ifdef _LIBC
Packit 33f14e
  unsigned char buf[MB_LEN_MAX];
Packit 33f14e
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit 33f14e
#else
Packit 33f14e
  unsigned char buf[64];
Packit 33f14e
#endif
Packit 33f14e
  mbstate_t prev_st;
Packit 33f14e
  Idx byte_idx, end_idx, remain_len;
Packit 33f14e
  size_t mbclen;
Packit 33f14e
Packit 33f14e
  /* Build the buffers from pstr->valid_len to either pstr->len or
Packit 33f14e
     pstr->bufs_len.  */
Packit 33f14e
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 33f14e
  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
Packit 33f14e
    {
Packit 33f14e
      wchar_t wc;
Packit 33f14e
      const char *p;
Packit 33f14e
Packit 33f14e
      remain_len = end_idx - byte_idx;
Packit 33f14e
      prev_st = pstr->cur_state;
Packit 33f14e
      /* Apply the translation if we need.  */
Packit 33f14e
      if (BE (pstr->trans != NULL, 0))
Packit 33f14e
	{
Packit 33f14e
	  int i, ch;
Packit 33f14e
Packit 33f14e
	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit 33f14e
	    {
Packit 33f14e
	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
Packit 33f14e
	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
Packit 33f14e
	    }
Packit 33f14e
	  p = (const char *) buf;
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
Packit 33f14e
      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit 33f14e
      if (BE (mbclen == (size_t) -1 || mbclen == 0
Packit 33f14e
	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
Packit 33f14e
	{
Packit 33f14e
	  /* We treat these cases as a singlebyte character.  */
Packit 33f14e
	  mbclen = 1;
Packit 33f14e
	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit 33f14e
	  if (BE (pstr->trans != NULL, 0))
Packit 33f14e
	    wc = pstr->trans[wc];
Packit 33f14e
	  pstr->cur_state = prev_st;
Packit 33f14e
	}
Packit 33f14e
      else if (BE (mbclen == (size_t) -2, 0))
Packit 33f14e
	{
Packit 33f14e
	  /* The buffer doesn't have enough space, finish to build.  */
Packit 33f14e
	  pstr->cur_state = prev_st;
Packit 33f14e
	  break;
Packit 33f14e
	}
Packit 33f14e
Packit 33f14e
      /* Write wide character and padding.  */
Packit 33f14e
      pstr->wcs[byte_idx++] = wc;
Packit 33f14e
      /* Write paddings.  */
Packit 33f14e
      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 33f14e
	pstr->wcs[byte_idx++] = WEOF;
Packit 33f14e
    }
Packit 33f14e
  pstr->valid_len = byte_idx;
Packit 33f14e
  pstr->valid_raw_len = byte_idx;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
Packit 33f14e
   but for REG_ICASE.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
build_wcs_upper_buffer (re_string_t *pstr)
Packit 33f14e
{
Packit 33f14e
  mbstate_t prev_st;
Packit 33f14e
  Idx src_idx, byte_idx, end_idx, remain_len;
Packit 33f14e
  size_t mbclen;
Packit 33f14e
#ifdef _LIBC
Packit 33f14e
  char buf[MB_LEN_MAX];
Packit 33f14e
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit 33f14e
#else
Packit 33f14e
  char buf[64];
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  byte_idx = pstr->valid_len;
Packit 33f14e
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 33f14e
Packit 33f14e
  /* The following optimization assumes that ASCII characters can be
Packit 33f14e
     mapped to wide characters with a simple cast.  */
Packit 33f14e
  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
Packit 33f14e
    {
Packit 33f14e
      while (byte_idx < end_idx)
Packit 33f14e
	{
Packit 33f14e
	  wchar_t wc;
Packit 33f14e
Packit 33f14e
	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
Packit 33f14e
	      && mbsinit (&pstr->cur_state))
Packit 33f14e
	    {
Packit 33f14e
	      /* In case of a singlebyte character.  */
Packit 33f14e
	      pstr->mbs[byte_idx]
Packit 33f14e
		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
Packit 33f14e
	      /* The next step uses the assumption that wchar_t is encoded
Packit 33f14e
		 ASCII-safe: all ASCII values can be converted like this.  */
Packit 33f14e
	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
Packit 33f14e
	      ++byte_idx;
Packit 33f14e
	      continue;
Packit 33f14e
	    }
Packit 33f14e
Packit 33f14e
	  remain_len = end_idx - byte_idx;
Packit 33f14e
	  prev_st = pstr->cur_state;
Packit 33f14e
	  mbclen = __mbrtowc (&wc,
Packit 33f14e
			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
Packit 33f14e
			       + byte_idx), remain_len, &pstr->cur_state);
Packit 33f14e
	  if (BE (mbclen < (size_t) -2, 1))
Packit 33f14e
	    {
Packit 33f14e
	      wchar_t wcu = __towupper (wc);
Packit 33f14e
	      if (wcu != wc)
Packit 33f14e
		{
Packit 33f14e
		  size_t mbcdlen;
Packit 33f14e
Packit 33f14e
		  mbcdlen = __wcrtomb (buf, wcu, &prev_st);
Packit 33f14e
		  if (BE (mbclen == mbcdlen, 1))
Packit 33f14e
		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit 33f14e
		  else
Packit 33f14e
		    {
Packit 33f14e
		      src_idx = byte_idx;
Packit 33f14e
		      goto offsets_needed;
Packit 33f14e
		    }
Packit 33f14e
		}
Packit 33f14e
	      else
Packit 33f14e
		memcpy (pstr->mbs + byte_idx,
Packit 33f14e
			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
Packit 33f14e
	      pstr->wcs[byte_idx++] = wcu;
Packit 33f14e
	      /* Write paddings.  */
Packit 33f14e
	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 33f14e
		pstr->wcs[byte_idx++] = WEOF;
Packit 33f14e
	    }
Packit 33f14e
	  else if (mbclen == (size_t) -1 || mbclen == 0
Packit 33f14e
		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit 33f14e
	    {
Packit 33f14e
	      /* It is an invalid character, an incomplete character
Packit 33f14e
		 at the end of the string, or '\0'.  Just use the byte.  */
Packit 33f14e
	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit 33f14e
	      pstr->mbs[byte_idx] = ch;
Packit 33f14e
	      /* And also cast it to wide char.  */
Packit 33f14e
	      pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit 33f14e
	      if (BE (mbclen == (size_t) -1, 0))
Packit 33f14e
		pstr->cur_state = prev_st;
Packit 33f14e
	    }
Packit 33f14e
	  else
Packit 33f14e
	    {
Packit 33f14e
	      /* The buffer doesn't have enough space, finish to build.  */
Packit 33f14e
	      pstr->cur_state = prev_st;
Packit 33f14e
	      break;
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
      pstr->valid_len = byte_idx;
Packit 33f14e
      pstr->valid_raw_len = byte_idx;
Packit 33f14e
      return REG_NOERROR;
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
Packit 33f14e
      {
Packit 33f14e
	wchar_t wc;
Packit 33f14e
	const char *p;
Packit 33f14e
      offsets_needed:
Packit 33f14e
	remain_len = end_idx - byte_idx;
Packit 33f14e
	prev_st = pstr->cur_state;
Packit 33f14e
	if (BE (pstr->trans != NULL, 0))
Packit 33f14e
	  {
Packit 33f14e
	    int i, ch;
Packit 33f14e
Packit 33f14e
	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit 33f14e
	      {
Packit 33f14e
		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
Packit 33f14e
		buf[i] = pstr->trans[ch];
Packit 33f14e
	      }
Packit 33f14e
	    p = (const char *) buf;
Packit 33f14e
	  }
Packit 33f14e
	else
Packit 33f14e
	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
Packit 33f14e
	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit 33f14e
	if (BE (mbclen < (size_t) -2, 1))
Packit 33f14e
	  {
Packit 33f14e
	    wchar_t wcu = __towupper (wc);
Packit 33f14e
	    if (wcu != wc)
Packit 33f14e
	      {
Packit 33f14e
		size_t mbcdlen;
Packit 33f14e
Packit 33f14e
		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
Packit 33f14e
		if (BE (mbclen == mbcdlen, 1))
Packit 33f14e
		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit 33f14e
		else if (mbcdlen != (size_t) -1)
Packit 33f14e
		  {
Packit 33f14e
		    size_t i;
Packit 33f14e
Packit 33f14e
		    if (byte_idx + mbcdlen > pstr->bufs_len)
Packit 33f14e
		      {
Packit 33f14e
			pstr->cur_state = prev_st;
Packit 33f14e
			break;
Packit 33f14e
		      }
Packit 33f14e
Packit 33f14e
		    if (pstr->offsets == NULL)
Packit 33f14e
		      {
Packit 33f14e
			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
Packit 33f14e
Packit 33f14e
			if (pstr->offsets == NULL)
Packit 33f14e
			  return REG_ESPACE;
Packit 33f14e
		      }
Packit 33f14e
		    if (!pstr->offsets_needed)
Packit 33f14e
		      {
Packit 33f14e
			for (i = 0; i < (size_t) byte_idx; ++i)
Packit 33f14e
			  pstr->offsets[i] = i;
Packit 33f14e
			pstr->offsets_needed = 1;
Packit 33f14e
		      }
Packit 33f14e
Packit 33f14e
		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
Packit 33f14e
		    pstr->wcs[byte_idx] = wcu;
Packit 33f14e
		    pstr->offsets[byte_idx] = src_idx;
Packit 33f14e
		    for (i = 1; i < mbcdlen; ++i)
Packit 33f14e
		      {
Packit 33f14e
			pstr->offsets[byte_idx + i]
Packit 33f14e
			  = src_idx + (i < mbclen ? i : mbclen - 1);
Packit 33f14e
			pstr->wcs[byte_idx + i] = WEOF;
Packit 33f14e
		      }
Packit 33f14e
		    pstr->len += mbcdlen - mbclen;
Packit 33f14e
		    if (pstr->raw_stop > src_idx)
Packit 33f14e
		      pstr->stop += mbcdlen - mbclen;
Packit 33f14e
		    end_idx = (pstr->bufs_len > pstr->len)
Packit 33f14e
			      ? pstr->len : pstr->bufs_len;
Packit 33f14e
		    byte_idx += mbcdlen;
Packit 33f14e
		    src_idx += mbclen;
Packit 33f14e
		    continue;
Packit 33f14e
		  }
Packit 33f14e
		else
Packit 33f14e
		  memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit 33f14e
	      }
Packit 33f14e
	    else
Packit 33f14e
	      memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit 33f14e
Packit 33f14e
	    if (BE (pstr->offsets_needed != 0, 0))
Packit 33f14e
	      {
Packit 33f14e
		size_t i;
Packit 33f14e
		for (i = 0; i < mbclen; ++i)
Packit 33f14e
		  pstr->offsets[byte_idx + i] = src_idx + i;
Packit 33f14e
	      }
Packit 33f14e
	    src_idx += mbclen;
Packit 33f14e
Packit 33f14e
	    pstr->wcs[byte_idx++] = wcu;
Packit 33f14e
	    /* Write paddings.  */
Packit 33f14e
	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 33f14e
	      pstr->wcs[byte_idx++] = WEOF;
Packit 33f14e
	  }
Packit 33f14e
	else if (mbclen == (size_t) -1 || mbclen == 0
Packit 33f14e
		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit 33f14e
	  {
Packit 33f14e
	    /* It is an invalid character or '\0'.  Just use the byte.  */
Packit 33f14e
	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
Packit 33f14e
Packit 33f14e
	    if (BE (pstr->trans != NULL, 0))
Packit 33f14e
	      ch = pstr->trans [ch];
Packit 33f14e
	    pstr->mbs[byte_idx] = ch;
Packit 33f14e
Packit 33f14e
	    if (BE (pstr->offsets_needed != 0, 0))
Packit 33f14e
	      pstr->offsets[byte_idx] = src_idx;
Packit 33f14e
	    ++src_idx;
Packit 33f14e
Packit 33f14e
	    /* And also cast it to wide char.  */
Packit 33f14e
	    pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit 33f14e
	    if (BE (mbclen == (size_t) -1, 0))
Packit 33f14e
	      pstr->cur_state = prev_st;
Packit 33f14e
	  }
Packit 33f14e
	else
Packit 33f14e
	  {
Packit 33f14e
	    /* The buffer doesn't have enough space, finish to build.  */
Packit 33f14e
	    pstr->cur_state = prev_st;
Packit 33f14e
	    break;
Packit 33f14e
	  }
Packit 33f14e
      }
Packit 33f14e
  pstr->valid_len = byte_idx;
Packit 33f14e
  pstr->valid_raw_len = src_idx;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
Packit 33f14e
   Return the index.  */
Packit 33f14e
Packit 33f14e
static Idx
Packit 33f14e
internal_function
Packit 33f14e
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
Packit 33f14e
{
Packit 33f14e
  mbstate_t prev_st;
Packit 33f14e
  Idx rawbuf_idx;
Packit 33f14e
  size_t mbclen;
Packit 33f14e
  wint_t wc = WEOF;
Packit 33f14e
Packit 33f14e
  /* Skip the characters which are not necessary to check.  */
Packit 33f14e
  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
Packit 33f14e
       rawbuf_idx < new_raw_idx;)
Packit 33f14e
    {
Packit 33f14e
      wchar_t wc2;
Packit 33f14e
      Idx remain_len = pstr->raw_len - rawbuf_idx;
Packit 33f14e
      prev_st = pstr->cur_state;
Packit 33f14e
      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
Packit 33f14e
			  remain_len, &pstr->cur_state);
Packit 33f14e
      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
Packit 33f14e
	{
Packit 33f14e
	  /* We treat these cases as a single byte character.  */
Packit 33f14e
	  if (mbclen == 0 || remain_len == 0)
Packit 33f14e
	    wc = L'\0';
Packit 33f14e
	  else
Packit 33f14e
	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
Packit 33f14e
	  mbclen = 1;
Packit 33f14e
	  pstr->cur_state = prev_st;
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	wc = wc2;
Packit 33f14e
      /* Then proceed the next character.  */
Packit 33f14e
      rawbuf_idx += mbclen;
Packit 33f14e
    }
Packit 33f14e
  *last_wc = wc;
Packit 33f14e
  return rawbuf_idx;
Packit 33f14e
}
Packit 33f14e
#endif /* RE_ENABLE_I18N  */
Packit 33f14e
Packit 33f14e
/* Build the buffer PSTR->MBS, and apply the translation if we need.
Packit 33f14e
   This function is used in case of REG_ICASE.  */
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
internal_function
Packit 33f14e
build_upper_buffer (re_string_t *pstr)
Packit 33f14e
{
Packit 33f14e
  Idx char_idx, end_idx;
Packit 33f14e
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 33f14e
Packit 33f14e
  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
Packit 33f14e
    {
Packit 33f14e
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
Packit 33f14e
      if (BE (pstr->trans != NULL, 0))
Packit 33f14e
	ch = pstr->trans[ch];
Packit 33f14e
      pstr->mbs[char_idx] = toupper (ch);
Packit 33f14e
    }
Packit 33f14e
  pstr->valid_len = char_idx;
Packit 33f14e
  pstr->valid_raw_len = char_idx;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Apply TRANS to the buffer in PSTR.  */
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
internal_function
Packit 33f14e
re_string_translate_buffer (re_string_t *pstr)
Packit 33f14e
{
Packit 33f14e
  Idx buf_idx, end_idx;
Packit 33f14e
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 33f14e
Packit 33f14e
  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
Packit 33f14e
    {
Packit 33f14e
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
Packit 33f14e
      pstr->mbs[buf_idx] = pstr->trans[ch];
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  pstr->valid_len = buf_idx;
Packit 33f14e
  pstr->valid_raw_len = buf_idx;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* This function re-construct the buffers.
Packit 33f14e
   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
Packit 33f14e
   convert to upper case in case of REG_ICASE, apply translation.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
Packit 33f14e
{
Packit 33f14e
  Idx offset;
Packit 33f14e
Packit 33f14e
  if (BE (pstr->raw_mbs_idx <= idx, 0))
Packit 33f14e
    offset = idx - pstr->raw_mbs_idx;
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      /* Reset buffer.  */
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
      if (pstr->mb_cur_max > 1)
Packit 33f14e
	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit 33f14e
#endif /* RE_ENABLE_I18N */
Packit 33f14e
      pstr->len = pstr->raw_len;
Packit 33f14e
      pstr->stop = pstr->raw_stop;
Packit 33f14e
      pstr->valid_len = 0;
Packit 33f14e
      pstr->raw_mbs_idx = 0;
Packit 33f14e
      pstr->valid_raw_len = 0;
Packit 33f14e
      pstr->offsets_needed = 0;
Packit 33f14e
      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
Packit 33f14e
			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
Packit 33f14e
      if (!pstr->mbs_allocated)
Packit 33f14e
	pstr->mbs = (unsigned char *) pstr->raw_mbs;
Packit 33f14e
      offset = idx;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  if (BE (offset != 0, 1))
Packit 33f14e
    {
Packit 33f14e
      /* Should the already checked characters be kept?  */
Packit 33f14e
      if (BE (offset < pstr->valid_raw_len, 1))
Packit 33f14e
	{
Packit 33f14e
	  /* Yes, move them to the front of the buffer.  */
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
	  if (BE (pstr->offsets_needed, 0))
Packit 33f14e
	    {
Packit 33f14e
	      Idx low = 0, high = pstr->valid_len, mid;
Packit 33f14e
	      do
Packit 33f14e
		{
Packit 33f14e
		  mid = (high + low) / 2;
Packit 33f14e
		  if (pstr->offsets[mid] > offset)
Packit 33f14e
		    high = mid;
Packit 33f14e
		  else if (pstr->offsets[mid] < offset)
Packit 33f14e
		    low = mid + 1;
Packit 33f14e
		  else
Packit 33f14e
		    break;
Packit 33f14e
		}
Packit 33f14e
	      while (low < high);
Packit 33f14e
	      if (pstr->offsets[mid] < offset)
Packit 33f14e
		++mid;
Packit 33f14e
	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
Packit 33f14e
							eflags);
Packit 33f14e
	      /* This can be quite complicated, so handle specially
Packit 33f14e
		 only the common and easy case where the character with
Packit 33f14e
		 different length representation of lower and upper
Packit 33f14e
		 case is present at or after offset.  */
Packit 33f14e
	      if (pstr->valid_len > offset
Packit 33f14e
		  && mid == offset && pstr->offsets[mid] == offset)
Packit 33f14e
		{
Packit 33f14e
		  memmove (pstr->wcs, pstr->wcs + offset,
Packit 33f14e
			   (pstr->valid_len - offset) * sizeof (wint_t));
Packit 33f14e
		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
Packit 33f14e
		  pstr->valid_len -= offset;
Packit 33f14e
		  pstr->valid_raw_len -= offset;
Packit 33f14e
		  for (low = 0; low < pstr->valid_len; low++)
Packit 33f14e
		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
Packit 33f14e
		}
Packit 33f14e
	      else
Packit 33f14e
		{
Packit 33f14e
		  /* Otherwise, just find out how long the partial multibyte
Packit 33f14e
		     character at offset is and fill it with WEOF/255.  */
Packit 33f14e
		  pstr->len = pstr->raw_len - idx + offset;
Packit 33f14e
		  pstr->stop = pstr->raw_stop - idx + offset;
Packit 33f14e
		  pstr->offsets_needed = 0;
Packit 33f14e
		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
Packit 33f14e
		    --mid;
Packit 33f14e
		  while (mid < pstr->valid_len)
Packit 33f14e
		    if (pstr->wcs[mid] != WEOF)
Packit 33f14e
		      break;
Packit 33f14e
		    else
Packit 33f14e
		      ++mid;
Packit 33f14e
		  if (mid == pstr->valid_len)
Packit 33f14e
		    pstr->valid_len = 0;
Packit 33f14e
		  else
Packit 33f14e
		    {
Packit 33f14e
		      pstr->valid_len = pstr->offsets[mid] - offset;
Packit 33f14e
		      if (pstr->valid_len)
Packit 33f14e
			{
Packit 33f14e
			  for (low = 0; low < pstr->valid_len; ++low)
Packit 33f14e
			    pstr->wcs[low] = WEOF;
Packit 33f14e
			  memset (pstr->mbs, 255, pstr->valid_len);
Packit 33f14e
			}
Packit 33f14e
		    }
Packit 33f14e
		  pstr->valid_raw_len = pstr->valid_len;
Packit 33f14e
		}
Packit 33f14e
	    }
Packit 33f14e
	  else
Packit 33f14e
#endif
Packit 33f14e
	    {
Packit 33f14e
	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
Packit 33f14e
							eflags);
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
	      if (pstr->mb_cur_max > 1)
Packit 33f14e
		memmove (pstr->wcs, pstr->wcs + offset,
Packit 33f14e
			 (pstr->valid_len - offset) * sizeof (wint_t));
Packit 33f14e
#endif /* RE_ENABLE_I18N */
Packit 33f14e
	      if (BE (pstr->mbs_allocated, 0))
Packit 33f14e
		memmove (pstr->mbs, pstr->mbs + offset,
Packit 33f14e
			 pstr->valid_len - offset);
Packit 33f14e
	      pstr->valid_len -= offset;
Packit 33f14e
	      pstr->valid_raw_len -= offset;
Packit 33f14e
#if defined DEBUG && DEBUG
Packit 33f14e
	      assert (pstr->valid_len > 0);
Packit 33f14e
#endif
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	{
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
	  /* No, skip all characters until IDX.  */
Packit 33f14e
	  Idx prev_valid_len = pstr->valid_len;
Packit 33f14e
Packit 33f14e
	  if (BE (pstr->offsets_needed, 0))
Packit 33f14e
	    {
Packit 33f14e
	      pstr->len = pstr->raw_len - idx + offset;
Packit 33f14e
	      pstr->stop = pstr->raw_stop - idx + offset;
Packit 33f14e
	      pstr->offsets_needed = 0;
Packit 33f14e
	    }
Packit 33f14e
#endif
Packit 33f14e
	  pstr->valid_len = 0;
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
	  if (pstr->mb_cur_max > 1)
Packit 33f14e
	    {
Packit 33f14e
	      Idx wcs_idx;
Packit 33f14e
	      wint_t wc = WEOF;
Packit 33f14e
Packit 33f14e
	      if (pstr->is_utf8)
Packit 33f14e
		{
Packit 33f14e
		  const unsigned char *raw, *p, *end;
Packit 33f14e
Packit 33f14e
		  /* Special case UTF-8.  Multi-byte chars start with any
Packit 33f14e
		     byte other than 0x80 - 0xbf.  */
Packit 33f14e
		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
Packit 33f14e
		  end = raw + (offset - pstr->mb_cur_max);
Packit 33f14e
		  if (end < pstr->raw_mbs)
Packit 33f14e
		    end = pstr->raw_mbs;
Packit 33f14e
		  p = raw + offset - 1;
Packit 33f14e
#ifdef _LIBC
Packit 33f14e
		  /* We know the wchar_t encoding is UCS4, so for the simple
Packit 33f14e
		     case, ASCII characters, skip the conversion step.  */
Packit 33f14e
		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
Packit 33f14e
		    {
Packit 33f14e
		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit 33f14e
		      /* pstr->valid_len = 0; */
Packit 33f14e
		      wc = (wchar_t) *p;
Packit 33f14e
		    }
Packit 33f14e
		  else
Packit 33f14e
#endif
Packit 33f14e
		    for (; p >= end; --p)
Packit 33f14e
		      if ((*p & 0xc0) != 0x80)
Packit 33f14e
			{
Packit 33f14e
			  mbstate_t cur_state;
Packit 33f14e
			  wchar_t wc2;
Packit 33f14e
			  Idx mlen = raw + pstr->len - p;
Packit 33f14e
			  unsigned char buf[6];
Packit 33f14e
			  size_t mbclen;
Packit 33f14e
Packit 33f14e
			  const unsigned char *pp = p;
Packit 33f14e
			  if (BE (pstr->trans != NULL, 0))
Packit 33f14e
			    {
Packit 33f14e
			      int i = mlen < 6 ? mlen : 6;
Packit 33f14e
			      while (--i >= 0)
Packit 33f14e
				buf[i] = pstr->trans[p[i]];
Packit 33f14e
			      pp = buf;
Packit 33f14e
			    }
Packit 33f14e
			  /* XXX Don't use mbrtowc, we know which conversion
Packit 33f14e
			     to use (UTF-8 -> UCS4).  */
Packit 33f14e
			  memset (&cur_state, 0, sizeof (cur_state));
Packit 33f14e
			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
Packit 33f14e
					      &cur_state);
Packit 33f14e
			  if (raw + offset - p <= mbclen
Packit 33f14e
			      && mbclen < (size_t) -2)
Packit 33f14e
			    {
Packit 33f14e
			      memset (&pstr->cur_state, '\0',
Packit 33f14e
				      sizeof (mbstate_t));
Packit 33f14e
			      pstr->valid_len = mbclen - (raw + offset - p);
Packit 33f14e
			      wc = wc2;
Packit 33f14e
			    }
Packit 33f14e
			  break;
Packit 33f14e
			}
Packit 33f14e
		}
Packit 33f14e
Packit 33f14e
	      if (wc == WEOF)
Packit 33f14e
		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
Packit 33f14e
	      if (wc == WEOF)
Packit 33f14e
		pstr->tip_context
Packit 33f14e
		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
Packit 33f14e
	      else
Packit 33f14e
		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
Packit 33f14e
				      && IS_WIDE_WORD_CHAR (wc))
Packit 33f14e
				     ? CONTEXT_WORD
Packit 33f14e
				     : ((IS_WIDE_NEWLINE (wc)
Packit 33f14e
					 && pstr->newline_anchor)
Packit 33f14e
					? CONTEXT_NEWLINE : 0));
Packit 33f14e
	      if (BE (pstr->valid_len, 0))
Packit 33f14e
		{
Packit 33f14e
		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
Packit 33f14e
		    pstr->wcs[wcs_idx] = WEOF;
Packit 33f14e
		  if (pstr->mbs_allocated)
Packit 33f14e
		    memset (pstr->mbs, 255, pstr->valid_len);
Packit 33f14e
		}
Packit 33f14e
	      pstr->valid_raw_len = pstr->valid_len;
Packit 33f14e
	    }
Packit 33f14e
	  else
Packit 33f14e
#endif /* RE_ENABLE_I18N */
Packit 33f14e
	    {
Packit 33f14e
	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
Packit 33f14e
	      pstr->valid_raw_len = 0;
Packit 33f14e
	      if (pstr->trans)
Packit 33f14e
		c = pstr->trans[c];
Packit 33f14e
	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
Packit 33f14e
				   ? CONTEXT_WORD
Packit 33f14e
				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
Packit 33f14e
				      ? CONTEXT_NEWLINE : 0));
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
      if (!BE (pstr->mbs_allocated, 0))
Packit 33f14e
	pstr->mbs += offset;
Packit 33f14e
    }
Packit 33f14e
  pstr->raw_mbs_idx = idx;
Packit 33f14e
  pstr->len -= offset;
Packit 33f14e
  pstr->stop -= offset;
Packit 33f14e
Packit 33f14e
  /* Then build the buffers.  */
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  if (pstr->mb_cur_max > 1)
Packit 33f14e
    {
Packit 33f14e
      if (pstr->icase)
Packit 33f14e
	{
Packit 33f14e
	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
Packit 33f14e
	  if (BE (ret != REG_NOERROR, 0))
Packit 33f14e
	    return ret;
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	build_wcs_buffer (pstr);
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
#endif /* RE_ENABLE_I18N */
Packit 33f14e
    if (BE (pstr->mbs_allocated, 0))
Packit 33f14e
      {
Packit 33f14e
	if (pstr->icase)
Packit 33f14e
	  build_upper_buffer (pstr);
Packit 33f14e
	else if (pstr->trans != NULL)
Packit 33f14e
	  re_string_translate_buffer (pstr);
Packit 33f14e
      }
Packit 33f14e
    else
Packit 33f14e
      pstr->valid_len = pstr->len;
Packit 33f14e
Packit 33f14e
  pstr->cur_idx = 0;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static unsigned char
Packit 33f14e
internal_function __attribute__ ((pure))
Packit 33f14e
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
Packit 33f14e
{
Packit 33f14e
  int ch;
Packit 33f14e
  Idx off;
Packit 33f14e
Packit 33f14e
  /* Handle the common (easiest) cases first.  */
Packit 33f14e
  if (BE (!pstr->mbs_allocated, 1))
Packit 33f14e
    return re_string_peek_byte (pstr, idx);
Packit 33f14e
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  if (pstr->mb_cur_max > 1
Packit 33f14e
      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
Packit 33f14e
    return re_string_peek_byte (pstr, idx);
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  off = pstr->cur_idx + idx;
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  if (pstr->offsets_needed)
Packit 33f14e
    off = pstr->offsets[off];
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit 33f14e
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
Packit 33f14e
     this function returns CAPITAL LETTER I instead of first byte of
Packit 33f14e
     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
Packit 33f14e
     since peek_byte_case doesn't advance cur_idx in any way.  */
Packit 33f14e
  if (pstr->offsets_needed && !isascii (ch))
Packit 33f14e
    return re_string_peek_byte (pstr, idx);
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  return ch;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static unsigned char
Packit 33f14e
internal_function
Packit 33f14e
re_string_fetch_byte_case (re_string_t *pstr)
Packit 33f14e
{
Packit 33f14e
  if (BE (!pstr->mbs_allocated, 1))
Packit 33f14e
    return re_string_fetch_byte (pstr);
Packit 33f14e
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  if (pstr->offsets_needed)
Packit 33f14e
    {
Packit 33f14e
      Idx off;
Packit 33f14e
      int ch;
Packit 33f14e
Packit 33f14e
      /* For tr_TR.UTF-8 [[:islower:]] there is
Packit 33f14e
	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
Packit 33f14e
	 in that case the whole multi-byte character and return
Packit 33f14e
	 the original letter.  On the other side, with
Packit 33f14e
	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
Packit 33f14e
	 anything else would complicate things too much.  */
Packit 33f14e
Packit 33f14e
      if (!re_string_first_byte (pstr, pstr->cur_idx))
Packit 33f14e
	return re_string_fetch_byte (pstr);
Packit 33f14e
Packit 33f14e
      off = pstr->offsets[pstr->cur_idx];
Packit 33f14e
      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit 33f14e
Packit 33f14e
      if (! isascii (ch))
Packit 33f14e
	return re_string_fetch_byte (pstr);
Packit 33f14e
Packit 33f14e
      re_string_skip_bytes (pstr,
Packit 33f14e
			    re_string_char_size_at (pstr, pstr->cur_idx));
Packit 33f14e
      return ch;
Packit 33f14e
    }
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
internal_function
Packit 33f14e
re_string_destruct (re_string_t *pstr)
Packit 33f14e
{
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  re_free (pstr->wcs);
Packit 33f14e
  re_free (pstr->offsets);
Packit 33f14e
#endif /* RE_ENABLE_I18N  */
Packit 33f14e
  if (pstr->mbs_allocated)
Packit 33f14e
    re_free (pstr->mbs);
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Return the context at IDX in INPUT.  */
Packit 33f14e
Packit 33f14e
static unsigned int
Packit 33f14e
internal_function
Packit 33f14e
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
Packit 33f14e
{
Packit 33f14e
  int c;
Packit 33f14e
  if (BE (idx < 0, 0))
Packit 33f14e
    /* In this case, we use the value stored in input->tip_context,
Packit 33f14e
       since we can't know the character in input->mbs[-1] here.  */
Packit 33f14e
    return input->tip_context;
Packit 33f14e
  if (BE (idx == input->len, 0))
Packit 33f14e
    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
Packit 33f14e
	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  if (input->mb_cur_max > 1)
Packit 33f14e
    {
Packit 33f14e
      wint_t wc;
Packit 33f14e
      Idx wc_idx = idx;
Packit 33f14e
      while(input->wcs[wc_idx] == WEOF)
Packit 33f14e
	{
Packit 33f14e
#if defined DEBUG && DEBUG
Packit 33f14e
	  /* It must not happen.  */
Packit 33f14e
	  assert (wc_idx >= 0);
Packit 33f14e
#endif
Packit 33f14e
	  --wc_idx;
Packit 33f14e
	  if (wc_idx < 0)
Packit 33f14e
	    return input->tip_context;
Packit 33f14e
	}
Packit 33f14e
      wc = input->wcs[wc_idx];
Packit 33f14e
      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
Packit 33f14e
	return CONTEXT_WORD;
Packit 33f14e
      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
Packit 33f14e
	      ? CONTEXT_NEWLINE : 0);
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
#endif
Packit 33f14e
    {
Packit 33f14e
      c = re_string_byte_at (input, idx);
Packit 33f14e
      if (bitset_contain (input->word_char, c))
Packit 33f14e
	return CONTEXT_WORD;
Packit 33f14e
      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
Packit 33f14e
    }
Packit 33f14e
}
Packit 33f14e

Packit 33f14e
/* Functions for set operation.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_alloc (re_node_set *set, Idx size)
Packit 33f14e
{
Packit 33f14e
  set->alloc = size;
Packit 33f14e
  set->nelem = 0;
Packit 33f14e
  set->elems = re_malloc (Idx, size);
Packit 33f14e
  if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
Packit 33f14e
    return REG_ESPACE;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_init_1 (re_node_set *set, Idx elem)
Packit 33f14e
{
Packit 33f14e
  set->alloc = 1;
Packit 33f14e
  set->nelem = 1;
Packit 33f14e
  set->elems = re_malloc (Idx, 1);
Packit 33f14e
  if (BE (set->elems == NULL, 0))
Packit 33f14e
    {
Packit 33f14e
      set->alloc = set->nelem = 0;
Packit 33f14e
      return REG_ESPACE;
Packit 33f14e
    }
Packit 33f14e
  set->elems[0] = elem;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
Packit 33f14e
{
Packit 33f14e
  set->alloc = 2;
Packit 33f14e
  set->elems = re_malloc (Idx, 2);
Packit 33f14e
  if (BE (set->elems == NULL, 0))
Packit 33f14e
    return REG_ESPACE;
Packit 33f14e
  if (elem1 == elem2)
Packit 33f14e
    {
Packit 33f14e
      set->nelem = 1;
Packit 33f14e
      set->elems[0] = elem1;
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      set->nelem = 2;
Packit 33f14e
      if (elem1 < elem2)
Packit 33f14e
	{
Packit 33f14e
	  set->elems[0] = elem1;
Packit 33f14e
	  set->elems[1] = elem2;
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	{
Packit 33f14e
	  set->elems[0] = elem2;
Packit 33f14e
	  set->elems[1] = elem1;
Packit 33f14e
	}
Packit 33f14e
    }
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
Packit 33f14e
{
Packit 33f14e
  dest->nelem = src->nelem;
Packit 33f14e
  if (src->nelem > 0)
Packit 33f14e
    {
Packit 33f14e
      dest->alloc = dest->nelem;
Packit 33f14e
      dest->elems = re_malloc (Idx, dest->alloc);
Packit 33f14e
      if (BE (dest->elems == NULL, 0))
Packit 33f14e
	{
Packit 33f14e
	  dest->alloc = dest->nelem = 0;
Packit 33f14e
	  return REG_ESPACE;
Packit 33f14e
	}
Packit 33f14e
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    re_node_set_init_empty (dest);
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
Packit 33f14e
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Packit 33f14e
   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
Packit 33f14e
			   const re_node_set *src2)
Packit 33f14e
{
Packit 33f14e
  Idx i1, i2, is, id, delta, sbase;
Packit 33f14e
  if (src1->nelem == 0 || src2->nelem == 0)
Packit 33f14e
    return REG_NOERROR;
Packit 33f14e
Packit 33f14e
  /* We need dest->nelem + 2 * elems_in_intersection; this is a
Packit 33f14e
     conservative estimate.  */
Packit 33f14e
  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
Packit 33f14e
    {
Packit 33f14e
      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
Packit 33f14e
      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
Packit 33f14e
      if (BE (new_elems == NULL, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
      dest->elems = new_elems;
Packit 33f14e
      dest->alloc = new_alloc;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Find the items in the intersection of SRC1 and SRC2, and copy
Packit 33f14e
     into the top of DEST those that are not already in DEST itself.  */
Packit 33f14e
  sbase = dest->nelem + src1->nelem + src2->nelem;
Packit 33f14e
  i1 = src1->nelem - 1;
Packit 33f14e
  i2 = src2->nelem - 1;
Packit 33f14e
  id = dest->nelem - 1;
Packit 33f14e
  for (;;)
Packit 33f14e
    {
Packit 33f14e
      if (src1->elems[i1] == src2->elems[i2])
Packit 33f14e
	{
Packit 33f14e
	  /* Try to find the item in DEST.  Maybe we could binary search?  */
Packit 33f14e
	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
Packit 33f14e
	    --id;
Packit 33f14e
Packit 33f14e
	  if (id < 0 || dest->elems[id] != src1->elems[i1])
Packit 33f14e
            dest->elems[--sbase] = src1->elems[i1];
Packit 33f14e
Packit 33f14e
	  if (--i1 < 0 || --i2 < 0)
Packit 33f14e
	    break;
Packit 33f14e
	}
Packit 33f14e
Packit 33f14e
      /* Lower the highest of the two items.  */
Packit 33f14e
      else if (src1->elems[i1] < src2->elems[i2])
Packit 33f14e
	{
Packit 33f14e
	  if (--i2 < 0)
Packit 33f14e
	    break;
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	{
Packit 33f14e
	  if (--i1 < 0)
Packit 33f14e
	    break;
Packit 33f14e
	}
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  id = dest->nelem - 1;
Packit 33f14e
  is = dest->nelem + src1->nelem + src2->nelem - 1;
Packit 33f14e
  delta = is - sbase + 1;
Packit 33f14e
Packit 33f14e
  /* Now copy.  When DELTA becomes zero, the remaining
Packit 33f14e
     DEST elements are already in place; this is more or
Packit 33f14e
     less the same loop that is in re_node_set_merge.  */
Packit 33f14e
  dest->nelem += delta;
Packit 33f14e
  if (delta > 0 && id >= 0)
Packit 33f14e
    for (;;)
Packit 33f14e
      {
Packit 33f14e
	if (dest->elems[is] > dest->elems[id])
Packit 33f14e
	  {
Packit 33f14e
	    /* Copy from the top.  */
Packit 33f14e
	    dest->elems[id + delta--] = dest->elems[is--];
Packit 33f14e
	    if (delta == 0)
Packit 33f14e
	      break;
Packit 33f14e
	  }
Packit 33f14e
	else
Packit 33f14e
	  {
Packit 33f14e
	    /* Slide from the bottom.  */
Packit 33f14e
	    dest->elems[id + delta] = dest->elems[id];
Packit 33f14e
	    if (--id < 0)
Packit 33f14e
	      break;
Packit 33f14e
	  }
Packit 33f14e
      }
Packit 33f14e
Packit 33f14e
  /* Copy remaining SRC elements.  */
Packit 33f14e
  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
Packit 33f14e
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
Packit 33f14e
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
Packit 33f14e
			const re_node_set *src2)
Packit 33f14e
{
Packit 33f14e
  Idx i1, i2, id;
Packit 33f14e
  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
Packit 33f14e
    {
Packit 33f14e
      dest->alloc = src1->nelem + src2->nelem;
Packit 33f14e
      dest->elems = re_malloc (Idx, dest->alloc);
Packit 33f14e
      if (BE (dest->elems == NULL, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      if (src1 != NULL && src1->nelem > 0)
Packit 33f14e
	return re_node_set_init_copy (dest, src1);
Packit 33f14e
      else if (src2 != NULL && src2->nelem > 0)
Packit 33f14e
	return re_node_set_init_copy (dest, src2);
Packit 33f14e
      else
Packit 33f14e
	re_node_set_init_empty (dest);
Packit 33f14e
      return REG_NOERROR;
Packit 33f14e
    }
Packit 33f14e
  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
Packit 33f14e
    {
Packit 33f14e
      if (src1->elems[i1] > src2->elems[i2])
Packit 33f14e
	{
Packit 33f14e
	  dest->elems[id++] = src2->elems[i2++];
Packit 33f14e
	  continue;
Packit 33f14e
	}
Packit 33f14e
      if (src1->elems[i1] == src2->elems[i2])
Packit 33f14e
	++i2;
Packit 33f14e
      dest->elems[id++] = src1->elems[i1++];
Packit 33f14e
    }
Packit 33f14e
  if (i1 < src1->nelem)
Packit 33f14e
    {
Packit 33f14e
      memcpy (dest->elems + id, src1->elems + i1,
Packit 33f14e
	     (src1->nelem - i1) * sizeof (Idx));
Packit 33f14e
      id += src1->nelem - i1;
Packit 33f14e
    }
Packit 33f14e
  else if (i2 < src2->nelem)
Packit 33f14e
    {
Packit 33f14e
      memcpy (dest->elems + id, src2->elems + i2,
Packit 33f14e
	     (src2->nelem - i2) * sizeof (Idx));
Packit 33f14e
      id += src2->nelem - i2;
Packit 33f14e
    }
Packit 33f14e
  dest->nelem = id;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Calculate the union set of the sets DEST and SRC. And store it to
Packit 33f14e
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_merge (re_node_set *dest, const re_node_set *src)
Packit 33f14e
{
Packit 33f14e
  Idx is, id, sbase, delta;
Packit 33f14e
  if (src == NULL || src->nelem == 0)
Packit 33f14e
    return REG_NOERROR;
Packit 33f14e
  if (dest->alloc < 2 * src->nelem + dest->nelem)
Packit 33f14e
    {
Packit 33f14e
      Idx new_alloc = 2 * (src->nelem + dest->alloc);
Packit 33f14e
      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
Packit 33f14e
      if (BE (new_buffer == NULL, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
      dest->elems = new_buffer;
Packit 33f14e
      dest->alloc = new_alloc;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  if (BE (dest->nelem == 0, 0))
Packit 33f14e
    {
Packit 33f14e
      dest->nelem = src->nelem;
Packit 33f14e
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit 33f14e
      return REG_NOERROR;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Copy into the top of DEST the items of SRC that are not
Packit 33f14e
     found in DEST.  Maybe we could binary search in DEST?  */
Packit 33f14e
  for (sbase = dest->nelem + 2 * src->nelem,
Packit 33f14e
       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
Packit 33f14e
    {
Packit 33f14e
      if (dest->elems[id] == src->elems[is])
Packit 33f14e
	is--, id--;
Packit 33f14e
      else if (dest->elems[id] < src->elems[is])
Packit 33f14e
	dest->elems[--sbase] = src->elems[is--];
Packit 33f14e
      else /* if (dest->elems[id] > src->elems[is]) */
Packit 33f14e
	--id;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  if (is >= 0)
Packit 33f14e
    {
Packit 33f14e
      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
Packit 33f14e
      sbase -= is + 1;
Packit 33f14e
      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  id = dest->nelem - 1;
Packit 33f14e
  is = dest->nelem + 2 * src->nelem - 1;
Packit 33f14e
  delta = is - sbase + 1;
Packit 33f14e
  if (delta == 0)
Packit 33f14e
    return REG_NOERROR;
Packit 33f14e
Packit 33f14e
  /* Now copy.  When DELTA becomes zero, the remaining
Packit 33f14e
     DEST elements are already in place.  */
Packit 33f14e
  dest->nelem += delta;
Packit 33f14e
  for (;;)
Packit 33f14e
    {
Packit 33f14e
      if (dest->elems[is] > dest->elems[id])
Packit 33f14e
	{
Packit 33f14e
	  /* Copy from the top.  */
Packit 33f14e
	  dest->elems[id + delta--] = dest->elems[is--];
Packit 33f14e
	  if (delta == 0)
Packit 33f14e
	    break;
Packit 33f14e
	}
Packit 33f14e
      else
Packit 33f14e
	{
Packit 33f14e
	  /* Slide from the bottom.  */
Packit 33f14e
	  dest->elems[id + delta] = dest->elems[id];
Packit 33f14e
	  if (--id < 0)
Packit 33f14e
	    {
Packit 33f14e
	      /* Copy remaining SRC elements.  */
Packit 33f14e
	      memcpy (dest->elems, dest->elems + sbase,
Packit 33f14e
		      delta * sizeof (Idx));
Packit 33f14e
	      break;
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Insert the new element ELEM to the re_node_set* SET.
Packit 33f14e
   SET should not already have ELEM.
Packit 33f14e
   Return true if successful.  */
Packit 33f14e
Packit 33f14e
static bool
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_insert (re_node_set *set, Idx elem)
Packit 33f14e
{
Packit 33f14e
  Idx idx;
Packit 33f14e
  /* In case the set is empty.  */
Packit 33f14e
  if (set->alloc == 0)
Packit 33f14e
    return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
Packit 33f14e
Packit 33f14e
  if (BE (set->nelem, 0) == 0)
Packit 33f14e
    {
Packit 33f14e
      /* We already guaranteed above that set->alloc != 0.  */
Packit 33f14e
      set->elems[0] = elem;
Packit 33f14e
      ++set->nelem;
Packit 33f14e
      return true;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Realloc if we need.  */
Packit 33f14e
  if (set->alloc == set->nelem)
Packit 33f14e
    {
Packit 33f14e
      Idx *new_elems;
Packit 33f14e
      set->alloc = set->alloc * 2;
Packit 33f14e
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit 33f14e
      if (BE (new_elems == NULL, 0))
Packit 33f14e
	return false;
Packit 33f14e
      set->elems = new_elems;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Move the elements which follows the new element.  Test the
Packit 33f14e
     first element separately to skip a check in the inner loop.  */
Packit 33f14e
  if (elem < set->elems[0])
Packit 33f14e
    {
Packit 33f14e
      idx = 0;
Packit 33f14e
      for (idx = set->nelem; idx > 0; idx--)
Packit 33f14e
	set->elems[idx] = set->elems[idx - 1];
Packit 33f14e
    }
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
Packit 33f14e
	set->elems[idx] = set->elems[idx - 1];
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Insert the new element.  */
Packit 33f14e
  set->elems[idx] = elem;
Packit 33f14e
  ++set->nelem;
Packit 33f14e
  return true;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Insert the new element ELEM to the re_node_set* SET.
Packit 33f14e
   SET should not already have any element greater than or equal to ELEM.
Packit 33f14e
   Return true if successful.  */
Packit 33f14e
Packit 33f14e
static bool
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_node_set_insert_last (re_node_set *set, Idx elem)
Packit 33f14e
{
Packit 33f14e
  /* Realloc if we need.  */
Packit 33f14e
  if (set->alloc == set->nelem)
Packit 33f14e
    {
Packit 33f14e
      Idx *new_elems;
Packit 33f14e
      set->alloc = (set->alloc + 1) * 2;
Packit 33f14e
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit 33f14e
      if (BE (new_elems == NULL, 0))
Packit 33f14e
	return false;
Packit 33f14e
      set->elems = new_elems;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Insert the new element.  */
Packit 33f14e
  set->elems[set->nelem++] = elem;
Packit 33f14e
  return true;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Compare two node sets SET1 and SET2.
Packit 33f14e
   Return true if SET1 and SET2 are equivalent.  */
Packit 33f14e
Packit 33f14e
static bool
Packit 33f14e
internal_function __attribute__ ((pure))
Packit 33f14e
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
Packit 33f14e
{
Packit 33f14e
  Idx i;
Packit 33f14e
  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
Packit 33f14e
    return false;
Packit 33f14e
  for (i = set1->nelem ; --i >= 0 ; )
Packit 33f14e
    if (set1->elems[i] != set2->elems[i])
Packit 33f14e
      return false;
Packit 33f14e
  return true;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
Packit 33f14e
Packit 33f14e
static Idx
Packit 33f14e
internal_function __attribute__ ((pure))
Packit 33f14e
re_node_set_contains (const re_node_set *set, Idx elem)
Packit 33f14e
{
Packit 33f14e
  __re_size_t idx, right, mid;
Packit 33f14e
  if (set->nelem <= 0)
Packit 33f14e
    return 0;
Packit 33f14e
Packit 33f14e
  /* Binary search the element.  */
Packit 33f14e
  idx = 0;
Packit 33f14e
  right = set->nelem - 1;
Packit 33f14e
  while (idx < right)
Packit 33f14e
    {
Packit 33f14e
      mid = (idx + right) / 2;
Packit 33f14e
      if (set->elems[mid] < elem)
Packit 33f14e
	idx = mid + 1;
Packit 33f14e
      else
Packit 33f14e
	right = mid;
Packit 33f14e
    }
Packit 33f14e
  return set->elems[idx] == elem ? idx + 1 : 0;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
internal_function
Packit 33f14e
re_node_set_remove_at (re_node_set *set, Idx idx)
Packit 33f14e
{
Packit 33f14e
  if (idx < 0 || idx >= set->nelem)
Packit 33f14e
    return;
Packit 33f14e
  --set->nelem;
Packit 33f14e
  for (; idx < set->nelem; idx++)
Packit 33f14e
    set->elems[idx] = set->elems[idx + 1];
Packit 33f14e
}
Packit 33f14e

Packit 33f14e
Packit 33f14e
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
Packit 33f14e
   Or return -1 if an error occurred.  */
Packit 33f14e
Packit 33f14e
static Idx
Packit 33f14e
internal_function
Packit 33f14e
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
Packit 33f14e
{
Packit 33f14e
  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
Packit 33f14e
    {
Packit 33f14e
      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
Packit 33f14e
      Idx *new_nexts, *new_indices;
Packit 33f14e
      re_node_set *new_edests, *new_eclosures;
Packit 33f14e
      re_token_t *new_nodes;
Packit 33f14e
Packit 33f14e
      /* Avoid overflows in realloc.  */
Packit 33f14e
      const size_t max_object_size = MAX (sizeof (re_token_t),
Packit 33f14e
					  MAX (sizeof (re_node_set),
Packit 33f14e
					       sizeof (Idx)));
Packit 33f14e
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
Packit 33f14e
	return -1;
Packit 33f14e
Packit 33f14e
      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
Packit 33f14e
      if (BE (new_nodes == NULL, 0))
Packit 33f14e
	return -1;
Packit 33f14e
      dfa->nodes = new_nodes;
Packit 33f14e
      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
Packit 33f14e
      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
Packit 33f14e
      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
Packit 33f14e
      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
Packit 33f14e
      if (BE (new_nexts == NULL || new_indices == NULL
Packit 33f14e
	      || new_edests == NULL || new_eclosures == NULL, 0))
Packit 33f14e
	{
Packit 33f14e
	   re_free (new_nexts);
Packit 33f14e
	   re_free (new_indices);
Packit 33f14e
	   re_free (new_edests);
Packit 33f14e
	   re_free (new_eclosures);
Packit 33f14e
	   return -1;
Packit 33f14e
	}
Packit 33f14e
      dfa->nexts = new_nexts;
Packit 33f14e
      dfa->org_indices = new_indices;
Packit 33f14e
      dfa->edests = new_edests;
Packit 33f14e
      dfa->eclosures = new_eclosures;
Packit 33f14e
      dfa->nodes_alloc = new_nodes_alloc;
Packit 33f14e
    }
Packit 33f14e
  dfa->nodes[dfa->nodes_len] = token;
Packit 33f14e
  dfa->nodes[dfa->nodes_len].constraint = 0;
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
  dfa->nodes[dfa->nodes_len].accept_mb =
Packit 33f14e
    ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
Packit 33f14e
     || token.type == COMPLEX_BRACKET);
Packit 33f14e
#endif
Packit 33f14e
  dfa->nexts[dfa->nodes_len] = -1;
Packit 33f14e
  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
Packit 33f14e
  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
Packit 33f14e
  return dfa->nodes_len++;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static re_hashval_t
Packit 33f14e
internal_function
Packit 33f14e
calc_state_hash (const re_node_set *nodes, unsigned int context)
Packit 33f14e
{
Packit 33f14e
  re_hashval_t hash = nodes->nelem + context;
Packit 33f14e
  Idx i;
Packit 33f14e
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 33f14e
    hash += nodes->elems[i];
Packit 33f14e
  return hash;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Search for the state whose node_set is equivalent to NODES.
Packit 33f14e
   Return the pointer to the state, if we found it in the DFA.
Packit 33f14e
   Otherwise create the new one and return it.  In case of an error
Packit 33f14e
   return NULL and set the error code in ERR.
Packit 33f14e
   Note: - We assume NULL as the invalid state, then it is possible that
Packit 33f14e
	   return value is NULL and ERR is REG_NOERROR.
Packit 33f14e
	 - We never return non-NULL value in case of any errors, it is for
Packit 33f14e
	   optimization.  */
Packit 33f14e
Packit 33f14e
static re_dfastate_t *
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
Packit 33f14e
		  const re_node_set *nodes)
Packit 33f14e
{
Packit 33f14e
  re_hashval_t hash;
Packit 33f14e
  re_dfastate_t *new_state;
Packit 33f14e
  struct re_state_table_entry *spot;
Packit 33f14e
  Idx i;
Packit 33f14e
#if defined GCC_LINT || defined lint
Packit 33f14e
  /* Suppress bogus uninitialized-variable warnings.  */
Packit 33f14e
  *err = REG_NOERROR;
Packit 33f14e
#endif
Packit 33f14e
  if (BE (nodes->nelem == 0, 0))
Packit 33f14e
    {
Packit 33f14e
      *err = REG_NOERROR;
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
  hash = calc_state_hash (nodes, 0);
Packit 33f14e
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 33f14e
Packit 33f14e
  for (i = 0 ; i < spot->num ; i++)
Packit 33f14e
    {
Packit 33f14e
      re_dfastate_t *state = spot->array[i];
Packit 33f14e
      if (hash != state->hash)
Packit 33f14e
	continue;
Packit 33f14e
      if (re_node_set_compare (&state->nodes, nodes))
Packit 33f14e
	return state;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* There are no appropriate state in the dfa, create the new one.  */
Packit 33f14e
  new_state = create_ci_newstate (dfa, nodes, hash);
Packit 33f14e
  if (BE (new_state == NULL, 0))
Packit 33f14e
    *err = REG_ESPACE;
Packit 33f14e
Packit 33f14e
  return new_state;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Search for the state whose node_set is equivalent to NODES and
Packit 33f14e
   whose context is equivalent to CONTEXT.
Packit 33f14e
   Return the pointer to the state, if we found it in the DFA.
Packit 33f14e
   Otherwise create the new one and return it.  In case of an error
Packit 33f14e
   return NULL and set the error code in ERR.
Packit 33f14e
   Note: - We assume NULL as the invalid state, then it is possible that
Packit 33f14e
	   return value is NULL and ERR is REG_NOERROR.
Packit 33f14e
	 - We never return non-NULL value in case of any errors, it is for
Packit 33f14e
	   optimization.  */
Packit 33f14e
Packit 33f14e
static re_dfastate_t *
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
Packit 33f14e
			  const re_node_set *nodes, unsigned int context)
Packit 33f14e
{
Packit 33f14e
  re_hashval_t hash;
Packit 33f14e
  re_dfastate_t *new_state;
Packit 33f14e
  struct re_state_table_entry *spot;
Packit 33f14e
  Idx i;
Packit 33f14e
#if defined GCC_LINT || defined lint
Packit 33f14e
  /* Suppress bogus uninitialized-variable warnings.  */
Packit 33f14e
  *err = REG_NOERROR;
Packit 33f14e
#endif
Packit 33f14e
  if (nodes->nelem == 0)
Packit 33f14e
    {
Packit 33f14e
      *err = REG_NOERROR;
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
  hash = calc_state_hash (nodes, context);
Packit 33f14e
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 33f14e
Packit 33f14e
  for (i = 0 ; i < spot->num ; i++)
Packit 33f14e
    {
Packit 33f14e
      re_dfastate_t *state = spot->array[i];
Packit 33f14e
      if (state->hash == hash
Packit 33f14e
	  && state->context == context
Packit 33f14e
	  && re_node_set_compare (state->entrance_nodes, nodes))
Packit 33f14e
	return state;
Packit 33f14e
    }
Packit 33f14e
  /* There are no appropriate state in 'dfa', create the new one.  */
Packit 33f14e
  new_state = create_cd_newstate (dfa, nodes, context, hash);
Packit 33f14e
  if (BE (new_state == NULL, 0))
Packit 33f14e
    *err = REG_ESPACE;
Packit 33f14e
Packit 33f14e
  return new_state;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Finish initialization of the new state NEWSTATE, and using its hash value
Packit 33f14e
   HASH put in the appropriate bucket of DFA's state table.  Return value
Packit 33f14e
   indicates the error code if failed.  */
Packit 33f14e
Packit 33f14e
static reg_errcode_t
Packit 33f14e
__attribute_warn_unused_result__
Packit 33f14e
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
Packit 33f14e
		re_hashval_t hash)
Packit 33f14e
{
Packit 33f14e
  struct re_state_table_entry *spot;
Packit 33f14e
  reg_errcode_t err;
Packit 33f14e
  Idx i;
Packit 33f14e
Packit 33f14e
  newstate->hash = hash;
Packit 33f14e
  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
Packit 33f14e
  if (BE (err != REG_NOERROR, 0))
Packit 33f14e
    return REG_ESPACE;
Packit 33f14e
  for (i = 0; i < newstate->nodes.nelem; i++)
Packit 33f14e
    {
Packit 33f14e
      Idx elem = newstate->nodes.elems[i];
Packit 33f14e
      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
Packit 33f14e
	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
Packit 33f14e
	  return REG_ESPACE;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 33f14e
  if (BE (spot->alloc <= spot->num, 0))
Packit 33f14e
    {
Packit 33f14e
      Idx new_alloc = 2 * spot->num + 2;
Packit 33f14e
      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
Packit 33f14e
					      new_alloc);
Packit 33f14e
      if (BE (new_array == NULL, 0))
Packit 33f14e
	return REG_ESPACE;
Packit 33f14e
      spot->array = new_array;
Packit 33f14e
      spot->alloc = new_alloc;
Packit 33f14e
    }
Packit 33f14e
  spot->array[spot->num++] = newstate;
Packit 33f14e
  return REG_NOERROR;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
static void
Packit 33f14e
free_state (re_dfastate_t *state)
Packit 33f14e
{
Packit 33f14e
  re_node_set_free (&state->non_eps_nodes);
Packit 33f14e
  re_node_set_free (&state->inveclosure);
Packit 33f14e
  if (state->entrance_nodes != &state->nodes)
Packit 33f14e
    {
Packit 33f14e
      re_node_set_free (state->entrance_nodes);
Packit 33f14e
      re_free (state->entrance_nodes);
Packit 33f14e
    }
Packit 33f14e
  re_node_set_free (&state->nodes);
Packit 33f14e
  re_free (state->word_trtable);
Packit 33f14e
  re_free (state->trtable);
Packit 33f14e
  re_free (state);
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Create the new state which is independent of contexts.
Packit 33f14e
   Return the new state if succeeded, otherwise return NULL.  */
Packit 33f14e
Packit 33f14e
static re_dfastate_t *
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit 33f14e
		    re_hashval_t hash)
Packit 33f14e
{
Packit 33f14e
  Idx i;
Packit 33f14e
  reg_errcode_t err;
Packit 33f14e
  re_dfastate_t *newstate;
Packit 33f14e
Packit 33f14e
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit 33f14e
  if (BE (newstate == NULL, 0))
Packit 33f14e
    return NULL;
Packit 33f14e
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit 33f14e
  if (BE (err != REG_NOERROR, 0))
Packit 33f14e
    {
Packit 33f14e
      re_free (newstate);
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  newstate->entrance_nodes = &newstate->nodes;
Packit 33f14e
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 33f14e
    {
Packit 33f14e
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit 33f14e
      re_token_type_t type = node->type;
Packit 33f14e
      if (type == CHARACTER && !node->constraint)
Packit 33f14e
	continue;
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
      newstate->accept_mb |= node->accept_mb;
Packit 33f14e
#endif /* RE_ENABLE_I18N */
Packit 33f14e
Packit 33f14e
      /* If the state has the halt node, the state is a halt state.  */
Packit 33f14e
      if (type == END_OF_RE)
Packit 33f14e
	newstate->halt = 1;
Packit 33f14e
      else if (type == OP_BACK_REF)
Packit 33f14e
	newstate->has_backref = 1;
Packit 33f14e
      else if (type == ANCHOR || node->constraint)
Packit 33f14e
	newstate->has_constraint = 1;
Packit 33f14e
    }
Packit 33f14e
  err = register_state (dfa, newstate, hash);
Packit 33f14e
  if (BE (err != REG_NOERROR, 0))
Packit 33f14e
    {
Packit 33f14e
      free_state (newstate);
Packit 33f14e
      newstate = NULL;
Packit 33f14e
    }
Packit 33f14e
  return newstate;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Create the new state which is depend on the context CONTEXT.
Packit 33f14e
   Return the new state if succeeded, otherwise return NULL.  */
Packit 33f14e
Packit 33f14e
static re_dfastate_t *
Packit 33f14e
internal_function __attribute_warn_unused_result__
Packit 33f14e
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit 33f14e
		    unsigned int context, re_hashval_t hash)
Packit 33f14e
{
Packit 33f14e
  Idx i, nctx_nodes = 0;
Packit 33f14e
  reg_errcode_t err;
Packit 33f14e
  re_dfastate_t *newstate;
Packit 33f14e
Packit 33f14e
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit 33f14e
  if (BE (newstate == NULL, 0))
Packit 33f14e
    return NULL;
Packit 33f14e
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit 33f14e
  if (BE (err != REG_NOERROR, 0))
Packit 33f14e
    {
Packit 33f14e
      re_free (newstate);
Packit 33f14e
      return NULL;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  newstate->context = context;
Packit 33f14e
  newstate->entrance_nodes = &newstate->nodes;
Packit 33f14e
Packit 33f14e
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 33f14e
    {
Packit 33f14e
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit 33f14e
      re_token_type_t type = node->type;
Packit 33f14e
      unsigned int constraint = node->constraint;
Packit 33f14e
Packit 33f14e
      if (type == CHARACTER && !constraint)
Packit 33f14e
	continue;
Packit 33f14e
#ifdef RE_ENABLE_I18N
Packit 33f14e
      newstate->accept_mb |= node->accept_mb;
Packit 33f14e
#endif /* RE_ENABLE_I18N */
Packit 33f14e
Packit 33f14e
      /* If the state has the halt node, the state is a halt state.  */
Packit 33f14e
      if (type == END_OF_RE)
Packit 33f14e
	newstate->halt = 1;
Packit 33f14e
      else if (type == OP_BACK_REF)
Packit 33f14e
	newstate->has_backref = 1;
Packit 33f14e
Packit 33f14e
      if (constraint)
Packit 33f14e
	{
Packit 33f14e
	  if (newstate->entrance_nodes == &newstate->nodes)
Packit 33f14e
	    {
Packit 33f14e
	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
Packit 33f14e
	      if (BE (newstate->entrance_nodes == NULL, 0))
Packit 33f14e
		{
Packit 33f14e
		  free_state (newstate);
Packit 33f14e
		  return NULL;
Packit 33f14e
		}
Packit 33f14e
	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
Packit 33f14e
		  != REG_NOERROR)
Packit 33f14e
		return NULL;
Packit 33f14e
	      nctx_nodes = 0;
Packit 33f14e
	      newstate->has_constraint = 1;
Packit 33f14e
	    }
Packit 33f14e
Packit 33f14e
	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
Packit 33f14e
	    {
Packit 33f14e
	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
Packit 33f14e
	      ++nctx_nodes;
Packit 33f14e
	    }
Packit 33f14e
	}
Packit 33f14e
    }
Packit 33f14e
  err = register_state (dfa, newstate, hash);
Packit 33f14e
  if (BE (err != REG_NOERROR, 0))
Packit 33f14e
    {
Packit 33f14e
      free_state (newstate);
Packit 33f14e
      newstate = NULL;
Packit 33f14e
    }
Packit 33f14e
  return  newstate;
Packit 33f14e
}