Blame lib/regex_internal.c

Packit 709fb3
/* Extended regular expression matching and search library.
Packit 709fb3
   Copyright (C) 2002-2017 Free Software Foundation, Inc.
Packit 709fb3
   This file is part of the GNU C Library.
Packit 709fb3
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Packit 709fb3
Packit 709fb3
   The GNU C Library is free software; you can redistribute it and/or
Packit 709fb3
   modify it under the terms of the GNU General Public
Packit 709fb3
   License as published by the Free Software Foundation; either
Packit 709fb3
   version 3 of the License, or (at your option) any later version.
Packit 709fb3
Packit 709fb3
   The GNU C Library is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 709fb3
   General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public
Packit 709fb3
   License along with the GNU C Library; if not, see
Packit 709fb3
   <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
static void re_string_construct_common (const char *str, Idx len,
Packit 709fb3
					re_string_t *pstr,
Packit 709fb3
					RE_TRANSLATE_TYPE trans, bool icase,
Packit 709fb3
					const re_dfa_t *dfa) internal_function;
Packit 709fb3
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
Packit 709fb3
					  const re_node_set *nodes,
Packit 709fb3
					  re_hashval_t hash) internal_function;
Packit 709fb3
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
Packit 709fb3
					  const re_node_set *nodes,
Packit 709fb3
					  unsigned int context,
Packit 709fb3
					  re_hashval_t hash) internal_function;
Packit 709fb3

Packit 709fb3
/* Functions for string operation.  */
Packit 709fb3
Packit 709fb3
/* This function allocate the buffers.  It is necessary to call
Packit 709fb3
   re_string_reconstruct before using the object.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
Packit 709fb3
		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
  Idx init_buf_len;
Packit 709fb3
Packit 709fb3
  /* Ensure at least one character fits into the buffers.  */
Packit 709fb3
  if (init_len < dfa->mb_cur_max)
Packit 709fb3
    init_len = dfa->mb_cur_max;
Packit 709fb3
  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
Packit 709fb3
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit 709fb3
Packit 709fb3
  ret = re_string_realloc_buffers (pstr, init_buf_len);
Packit 709fb3
  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
    return ret;
Packit 709fb3
Packit 709fb3
  pstr->word_char = dfa->word_char;
Packit 709fb3
  pstr->word_ops_used = dfa->word_ops_used;
Packit 709fb3
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit 709fb3
  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
Packit 709fb3
  pstr->valid_raw_len = pstr->valid_len;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function allocate the buffers, and initialize them.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_string_construct (re_string_t *pstr, const char *str, Idx len,
Packit 709fb3
		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  reg_errcode_t ret;
Packit 709fb3
  memset (pstr, '\0', sizeof (re_string_t));
Packit 709fb3
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit 709fb3
Packit 709fb3
  if (len > 0)
Packit 709fb3
    {
Packit 709fb3
      ret = re_string_realloc_buffers (pstr, len + 1);
Packit 709fb3
      if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
	return ret;
Packit 709fb3
    }
Packit 709fb3
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit 709fb3
Packit 709fb3
  if (icase)
Packit 709fb3
    {
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      if (dfa->mb_cur_max > 1)
Packit 709fb3
	{
Packit 709fb3
	  while (1)
Packit 709fb3
	    {
Packit 709fb3
	      ret = build_wcs_upper_buffer (pstr);
Packit 709fb3
	      if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
		return ret;
Packit 709fb3
	      if (pstr->valid_raw_len >= len)
Packit 709fb3
		break;
Packit 709fb3
	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
Packit 709fb3
		break;
Packit 709fb3
	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
Packit 709fb3
	      if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
		return ret;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
#endif /* RE_ENABLE_I18N  */
Packit 709fb3
	build_upper_buffer (pstr);
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      if (dfa->mb_cur_max > 1)
Packit 709fb3
	build_wcs_buffer (pstr);
Packit 709fb3
      else
Packit 709fb3
#endif /* RE_ENABLE_I18N  */
Packit 709fb3
	{
Packit 709fb3
	  if (trans != NULL)
Packit 709fb3
	    re_string_translate_buffer (pstr);
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      pstr->valid_len = pstr->bufs_len;
Packit 709fb3
	      pstr->valid_raw_len = pstr->bufs_len;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Helper functions for re_string_allocate, and re_string_construct.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
Packit 709fb3
{
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (pstr->mb_cur_max > 1)
Packit 709fb3
    {
Packit 709fb3
      wint_t *new_wcs;
Packit 709fb3
Packit 709fb3
      /* Avoid overflow in realloc.  */
Packit 709fb3
      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
Packit 709fb3
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
Packit 709fb3
      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
Packit 709fb3
      if (BE (new_wcs == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      pstr->wcs = new_wcs;
Packit 709fb3
      if (pstr->offsets != NULL)
Packit 709fb3
	{
Packit 709fb3
	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
Packit 709fb3
	  if (BE (new_offsets == NULL, 0))
Packit 709fb3
	    return REG_ESPACE;
Packit 709fb3
	  pstr->offsets = new_offsets;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
#endif /* RE_ENABLE_I18N  */
Packit 709fb3
  if (pstr->mbs_allocated)
Packit 709fb3
    {
Packit 709fb3
      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
Packit 709fb3
					   new_buf_len);
Packit 709fb3
      if (BE (new_mbs == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      pstr->mbs = new_mbs;
Packit 709fb3
    }
Packit 709fb3
  pstr->bufs_len = new_buf_len;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
Packit 709fb3
			    RE_TRANSLATE_TYPE trans, bool icase,
Packit 709fb3
			    const re_dfa_t *dfa)
Packit 709fb3
{
Packit 709fb3
  pstr->raw_mbs = (const unsigned char *) str;
Packit 709fb3
  pstr->len = len;
Packit 709fb3
  pstr->raw_len = len;
Packit 709fb3
  pstr->trans = trans;
Packit 709fb3
  pstr->icase = icase;
Packit 709fb3
  pstr->mbs_allocated = (trans != NULL || icase);
Packit 709fb3
  pstr->mb_cur_max = dfa->mb_cur_max;
Packit 709fb3
  pstr->is_utf8 = dfa->is_utf8;
Packit 709fb3
  pstr->map_notascii = dfa->map_notascii;
Packit 709fb3
  pstr->stop = pstr->len;
Packit 709fb3
  pstr->raw_stop = pstr->stop;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
Packit 709fb3
/* Build wide character buffer PSTR->WCS.
Packit 709fb3
   If the byte sequence of the string are:
Packit 709fb3
     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Packit 709fb3
   Then wide character buffer will be:
Packit 709fb3
     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
Packit 709fb3
   We use WEOF for padding, they indicate that the position isn't
Packit 709fb3
   a first byte of a multibyte character.
Packit 709fb3
Packit 709fb3
   Note that this function assumes PSTR->VALID_LEN elements are already
Packit 709fb3
   built and starts from PSTR->VALID_LEN.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
build_wcs_buffer (re_string_t *pstr)
Packit 709fb3
{
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
  unsigned char buf[MB_LEN_MAX];
Packit 709fb3
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit 709fb3
#else
Packit 709fb3
  unsigned char buf[64];
Packit 709fb3
#endif
Packit 709fb3
  mbstate_t prev_st;
Packit 709fb3
  Idx byte_idx, end_idx, remain_len;
Packit 709fb3
  size_t mbclen;
Packit 709fb3
Packit 709fb3
  /* Build the buffers from pstr->valid_len to either pstr->len or
Packit 709fb3
     pstr->bufs_len.  */
Packit 709fb3
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 709fb3
  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
Packit 709fb3
    {
Packit 709fb3
      wchar_t wc;
Packit 709fb3
      const char *p;
Packit 709fb3
Packit 709fb3
      remain_len = end_idx - byte_idx;
Packit 709fb3
      prev_st = pstr->cur_state;
Packit 709fb3
      /* Apply the translation if we need.  */
Packit 709fb3
      if (BE (pstr->trans != NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  int i, ch;
Packit 709fb3
Packit 709fb3
	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit 709fb3
	    {
Packit 709fb3
	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
Packit 709fb3
	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
Packit 709fb3
	    }
Packit 709fb3
	  p = (const char *) buf;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
Packit 709fb3
      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit 709fb3
      if (BE (mbclen == (size_t) -1 || mbclen == 0
Packit 709fb3
	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
Packit 709fb3
	{
Packit 709fb3
	  /* We treat these cases as a singlebyte character.  */
Packit 709fb3
	  mbclen = 1;
Packit 709fb3
	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit 709fb3
	  if (BE (pstr->trans != NULL, 0))
Packit 709fb3
	    wc = pstr->trans[wc];
Packit 709fb3
	  pstr->cur_state = prev_st;
Packit 709fb3
	}
Packit 709fb3
      else if (BE (mbclen == (size_t) -2, 0))
Packit 709fb3
	{
Packit 709fb3
	  /* The buffer doesn't have enough space, finish to build.  */
Packit 709fb3
	  pstr->cur_state = prev_st;
Packit 709fb3
	  break;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      /* Write wide character and padding.  */
Packit 709fb3
      pstr->wcs[byte_idx++] = wc;
Packit 709fb3
      /* Write paddings.  */
Packit 709fb3
      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 709fb3
	pstr->wcs[byte_idx++] = WEOF;
Packit 709fb3
    }
Packit 709fb3
  pstr->valid_len = byte_idx;
Packit 709fb3
  pstr->valid_raw_len = byte_idx;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
Packit 709fb3
   but for REG_ICASE.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
build_wcs_upper_buffer (re_string_t *pstr)
Packit 709fb3
{
Packit 709fb3
  mbstate_t prev_st;
Packit 709fb3
  Idx src_idx, byte_idx, end_idx, remain_len;
Packit 709fb3
  size_t mbclen;
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
  char buf[MB_LEN_MAX];
Packit 709fb3
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit 709fb3
#else
Packit 709fb3
  char buf[64];
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  byte_idx = pstr->valid_len;
Packit 709fb3
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 709fb3
Packit 709fb3
  /* The following optimization assumes that ASCII characters can be
Packit 709fb3
     mapped to wide characters with a simple cast.  */
Packit 709fb3
  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
Packit 709fb3
    {
Packit 709fb3
      while (byte_idx < end_idx)
Packit 709fb3
	{
Packit 709fb3
	  wchar_t wc;
Packit 709fb3
Packit 709fb3
	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
Packit 709fb3
	      && mbsinit (&pstr->cur_state))
Packit 709fb3
	    {
Packit 709fb3
	      /* In case of a singlebyte character.  */
Packit 709fb3
	      pstr->mbs[byte_idx]
Packit 709fb3
		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
Packit 709fb3
	      /* The next step uses the assumption that wchar_t is encoded
Packit 709fb3
		 ASCII-safe: all ASCII values can be converted like this.  */
Packit 709fb3
	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
Packit 709fb3
	      ++byte_idx;
Packit 709fb3
	      continue;
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  remain_len = end_idx - byte_idx;
Packit 709fb3
	  prev_st = pstr->cur_state;
Packit 709fb3
	  mbclen = __mbrtowc (&wc,
Packit 709fb3
			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
Packit 709fb3
			       + byte_idx), remain_len, &pstr->cur_state);
Packit 709fb3
	  if (BE (mbclen < (size_t) -2, 1))
Packit 709fb3
	    {
Packit 709fb3
	      wchar_t wcu = __towupper (wc);
Packit 709fb3
	      if (wcu != wc)
Packit 709fb3
		{
Packit 709fb3
		  size_t mbcdlen;
Packit 709fb3
Packit 709fb3
		  mbcdlen = __wcrtomb (buf, wcu, &prev_st);
Packit 709fb3
		  if (BE (mbclen == mbcdlen, 1))
Packit 709fb3
		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit 709fb3
		  else
Packit 709fb3
		    {
Packit 709fb3
		      src_idx = byte_idx;
Packit 709fb3
		      goto offsets_needed;
Packit 709fb3
		    }
Packit 709fb3
		}
Packit 709fb3
	      else
Packit 709fb3
		memcpy (pstr->mbs + byte_idx,
Packit 709fb3
			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
Packit 709fb3
	      pstr->wcs[byte_idx++] = wcu;
Packit 709fb3
	      /* Write paddings.  */
Packit 709fb3
	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 709fb3
		pstr->wcs[byte_idx++] = WEOF;
Packit 709fb3
	    }
Packit 709fb3
	  else if (mbclen == (size_t) -1 || mbclen == 0
Packit 709fb3
		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit 709fb3
	    {
Packit 709fb3
	      /* It is an invalid character, an incomplete character
Packit 709fb3
		 at the end of the string, or '\0'.  Just use the byte.  */
Packit 709fb3
	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit 709fb3
	      pstr->mbs[byte_idx] = ch;
Packit 709fb3
	      /* And also cast it to wide char.  */
Packit 709fb3
	      pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit 709fb3
	      if (BE (mbclen == (size_t) -1, 0))
Packit 709fb3
		pstr->cur_state = prev_st;
Packit 709fb3
	    }
Packit 709fb3
	  else
Packit 709fb3
	    {
Packit 709fb3
	      /* The buffer doesn't have enough space, finish to build.  */
Packit 709fb3
	      pstr->cur_state = prev_st;
Packit 709fb3
	      break;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      pstr->valid_len = byte_idx;
Packit 709fb3
      pstr->valid_raw_len = byte_idx;
Packit 709fb3
      return REG_NOERROR;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
Packit 709fb3
      {
Packit 709fb3
	wchar_t wc;
Packit 709fb3
	const char *p;
Packit 709fb3
      offsets_needed:
Packit 709fb3
	remain_len = end_idx - byte_idx;
Packit 709fb3
	prev_st = pstr->cur_state;
Packit 709fb3
	if (BE (pstr->trans != NULL, 0))
Packit 709fb3
	  {
Packit 709fb3
	    int i, ch;
Packit 709fb3
Packit 709fb3
	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit 709fb3
	      {
Packit 709fb3
		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
Packit 709fb3
		buf[i] = pstr->trans[ch];
Packit 709fb3
	      }
Packit 709fb3
	    p = (const char *) buf;
Packit 709fb3
	  }
Packit 709fb3
	else
Packit 709fb3
	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
Packit 709fb3
	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit 709fb3
	if (BE (mbclen < (size_t) -2, 1))
Packit 709fb3
	  {
Packit 709fb3
	    wchar_t wcu = __towupper (wc);
Packit 709fb3
	    if (wcu != wc)
Packit 709fb3
	      {
Packit 709fb3
		size_t mbcdlen;
Packit 709fb3
Packit 709fb3
		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
Packit 709fb3
		if (BE (mbclen == mbcdlen, 1))
Packit 709fb3
		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit 709fb3
		else if (mbcdlen != (size_t) -1)
Packit 709fb3
		  {
Packit 709fb3
		    size_t i;
Packit 709fb3
Packit 709fb3
		    if (byte_idx + mbcdlen > pstr->bufs_len)
Packit 709fb3
		      {
Packit 709fb3
			pstr->cur_state = prev_st;
Packit 709fb3
			break;
Packit 709fb3
		      }
Packit 709fb3
Packit 709fb3
		    if (pstr->offsets == NULL)
Packit 709fb3
		      {
Packit 709fb3
			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
Packit 709fb3
Packit 709fb3
			if (pstr->offsets == NULL)
Packit 709fb3
			  return REG_ESPACE;
Packit 709fb3
		      }
Packit 709fb3
		    if (!pstr->offsets_needed)
Packit 709fb3
		      {
Packit 709fb3
			for (i = 0; i < (size_t) byte_idx; ++i)
Packit 709fb3
			  pstr->offsets[i] = i;
Packit 709fb3
			pstr->offsets_needed = 1;
Packit 709fb3
		      }
Packit 709fb3
Packit 709fb3
		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
Packit 709fb3
		    pstr->wcs[byte_idx] = wcu;
Packit 709fb3
		    pstr->offsets[byte_idx] = src_idx;
Packit 709fb3
		    for (i = 1; i < mbcdlen; ++i)
Packit 709fb3
		      {
Packit 709fb3
			pstr->offsets[byte_idx + i]
Packit 709fb3
			  = src_idx + (i < mbclen ? i : mbclen - 1);
Packit 709fb3
			pstr->wcs[byte_idx + i] = WEOF;
Packit 709fb3
		      }
Packit 709fb3
		    pstr->len += mbcdlen - mbclen;
Packit 709fb3
		    if (pstr->raw_stop > src_idx)
Packit 709fb3
		      pstr->stop += mbcdlen - mbclen;
Packit 709fb3
		    end_idx = (pstr->bufs_len > pstr->len)
Packit 709fb3
			      ? pstr->len : pstr->bufs_len;
Packit 709fb3
		    byte_idx += mbcdlen;
Packit 709fb3
		    src_idx += mbclen;
Packit 709fb3
		    continue;
Packit 709fb3
		  }
Packit 709fb3
		else
Packit 709fb3
		  memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit 709fb3
	      }
Packit 709fb3
	    else
Packit 709fb3
	      memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit 709fb3
Packit 709fb3
	    if (BE (pstr->offsets_needed != 0, 0))
Packit 709fb3
	      {
Packit 709fb3
		size_t i;
Packit 709fb3
		for (i = 0; i < mbclen; ++i)
Packit 709fb3
		  pstr->offsets[byte_idx + i] = src_idx + i;
Packit 709fb3
	      }
Packit 709fb3
	    src_idx += mbclen;
Packit 709fb3
Packit 709fb3
	    pstr->wcs[byte_idx++] = wcu;
Packit 709fb3
	    /* Write paddings.  */
Packit 709fb3
	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit 709fb3
	      pstr->wcs[byte_idx++] = WEOF;
Packit 709fb3
	  }
Packit 709fb3
	else if (mbclen == (size_t) -1 || mbclen == 0
Packit 709fb3
		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit 709fb3
	  {
Packit 709fb3
	    /* It is an invalid character or '\0'.  Just use the byte.  */
Packit 709fb3
	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
Packit 709fb3
Packit 709fb3
	    if (BE (pstr->trans != NULL, 0))
Packit 709fb3
	      ch = pstr->trans [ch];
Packit 709fb3
	    pstr->mbs[byte_idx] = ch;
Packit 709fb3
Packit 709fb3
	    if (BE (pstr->offsets_needed != 0, 0))
Packit 709fb3
	      pstr->offsets[byte_idx] = src_idx;
Packit 709fb3
	    ++src_idx;
Packit 709fb3
Packit 709fb3
	    /* And also cast it to wide char.  */
Packit 709fb3
	    pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit 709fb3
	    if (BE (mbclen == (size_t) -1, 0))
Packit 709fb3
	      pstr->cur_state = prev_st;
Packit 709fb3
	  }
Packit 709fb3
	else
Packit 709fb3
	  {
Packit 709fb3
	    /* The buffer doesn't have enough space, finish to build.  */
Packit 709fb3
	    pstr->cur_state = prev_st;
Packit 709fb3
	    break;
Packit 709fb3
	  }
Packit 709fb3
      }
Packit 709fb3
  pstr->valid_len = byte_idx;
Packit 709fb3
  pstr->valid_raw_len = src_idx;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
Packit 709fb3
   Return the index.  */
Packit 709fb3
Packit 709fb3
static Idx
Packit 709fb3
internal_function
Packit 709fb3
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
Packit 709fb3
{
Packit 709fb3
  mbstate_t prev_st;
Packit 709fb3
  Idx rawbuf_idx;
Packit 709fb3
  size_t mbclen;
Packit 709fb3
  wint_t wc = WEOF;
Packit 709fb3
Packit 709fb3
  /* Skip the characters which are not necessary to check.  */
Packit 709fb3
  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
Packit 709fb3
       rawbuf_idx < new_raw_idx;)
Packit 709fb3
    {
Packit 709fb3
      wchar_t wc2;
Packit 709fb3
      Idx remain_len = pstr->raw_len - rawbuf_idx;
Packit 709fb3
      prev_st = pstr->cur_state;
Packit 709fb3
      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
Packit 709fb3
			  remain_len, &pstr->cur_state);
Packit 709fb3
      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
Packit 709fb3
	{
Packit 709fb3
	  /* We treat these cases as a single byte character.  */
Packit 709fb3
	  if (mbclen == 0 || remain_len == 0)
Packit 709fb3
	    wc = L'\0';
Packit 709fb3
	  else
Packit 709fb3
	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
Packit 709fb3
	  mbclen = 1;
Packit 709fb3
	  pstr->cur_state = prev_st;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	wc = wc2;
Packit 709fb3
      /* Then proceed the next character.  */
Packit 709fb3
      rawbuf_idx += mbclen;
Packit 709fb3
    }
Packit 709fb3
  *last_wc = wc;
Packit 709fb3
  return rawbuf_idx;
Packit 709fb3
}
Packit 709fb3
#endif /* RE_ENABLE_I18N  */
Packit 709fb3
Packit 709fb3
/* Build the buffer PSTR->MBS, and apply the translation if we need.
Packit 709fb3
   This function is used in case of REG_ICASE.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
build_upper_buffer (re_string_t *pstr)
Packit 709fb3
{
Packit 709fb3
  Idx char_idx, end_idx;
Packit 709fb3
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 709fb3
Packit 709fb3
  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
Packit 709fb3
    {
Packit 709fb3
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
Packit 709fb3
      if (BE (pstr->trans != NULL, 0))
Packit 709fb3
	ch = pstr->trans[ch];
Packit 709fb3
      pstr->mbs[char_idx] = toupper (ch);
Packit 709fb3
    }
Packit 709fb3
  pstr->valid_len = char_idx;
Packit 709fb3
  pstr->valid_raw_len = char_idx;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Apply TRANS to the buffer in PSTR.  */
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
re_string_translate_buffer (re_string_t *pstr)
Packit 709fb3
{
Packit 709fb3
  Idx buf_idx, end_idx;
Packit 709fb3
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit 709fb3
Packit 709fb3
  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
Packit 709fb3
    {
Packit 709fb3
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
Packit 709fb3
      pstr->mbs[buf_idx] = pstr->trans[ch];
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  pstr->valid_len = buf_idx;
Packit 709fb3
  pstr->valid_raw_len = buf_idx;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* This function re-construct the buffers.
Packit 709fb3
   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
Packit 709fb3
   convert to upper case in case of REG_ICASE, apply translation.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
Packit 709fb3
{
Packit 709fb3
  Idx offset;
Packit 709fb3
Packit 709fb3
  if (BE (pstr->raw_mbs_idx <= idx, 0))
Packit 709fb3
    offset = idx - pstr->raw_mbs_idx;
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      /* Reset buffer.  */
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      if (pstr->mb_cur_max > 1)
Packit 709fb3
	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
      pstr->len = pstr->raw_len;
Packit 709fb3
      pstr->stop = pstr->raw_stop;
Packit 709fb3
      pstr->valid_len = 0;
Packit 709fb3
      pstr->raw_mbs_idx = 0;
Packit 709fb3
      pstr->valid_raw_len = 0;
Packit 709fb3
      pstr->offsets_needed = 0;
Packit 709fb3
      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
Packit 709fb3
			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
Packit 709fb3
      if (!pstr->mbs_allocated)
Packit 709fb3
	pstr->mbs = (unsigned char *) pstr->raw_mbs;
Packit 709fb3
      offset = idx;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  if (BE (offset != 0, 1))
Packit 709fb3
    {
Packit 709fb3
      /* Should the already checked characters be kept?  */
Packit 709fb3
      if (BE (offset < pstr->valid_raw_len, 1))
Packit 709fb3
	{
Packit 709fb3
	  /* Yes, move them to the front of the buffer.  */
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	  if (BE (pstr->offsets_needed, 0))
Packit 709fb3
	    {
Packit 709fb3
	      Idx low = 0, high = pstr->valid_len, mid;
Packit 709fb3
	      do
Packit 709fb3
		{
Packit 709fb3
		  mid = (high + low) / 2;
Packit 709fb3
		  if (pstr->offsets[mid] > offset)
Packit 709fb3
		    high = mid;
Packit 709fb3
		  else if (pstr->offsets[mid] < offset)
Packit 709fb3
		    low = mid + 1;
Packit 709fb3
		  else
Packit 709fb3
		    break;
Packit 709fb3
		}
Packit 709fb3
	      while (low < high);
Packit 709fb3
	      if (pstr->offsets[mid] < offset)
Packit 709fb3
		++mid;
Packit 709fb3
	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
Packit 709fb3
							eflags);
Packit 709fb3
	      /* This can be quite complicated, so handle specially
Packit 709fb3
		 only the common and easy case where the character with
Packit 709fb3
		 different length representation of lower and upper
Packit 709fb3
		 case is present at or after offset.  */
Packit 709fb3
	      if (pstr->valid_len > offset
Packit 709fb3
		  && mid == offset && pstr->offsets[mid] == offset)
Packit 709fb3
		{
Packit 709fb3
		  memmove (pstr->wcs, pstr->wcs + offset,
Packit 709fb3
			   (pstr->valid_len - offset) * sizeof (wint_t));
Packit 709fb3
		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
Packit 709fb3
		  pstr->valid_len -= offset;
Packit 709fb3
		  pstr->valid_raw_len -= offset;
Packit 709fb3
		  for (low = 0; low < pstr->valid_len; low++)
Packit 709fb3
		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
Packit 709fb3
		}
Packit 709fb3
	      else
Packit 709fb3
		{
Packit 709fb3
		  /* Otherwise, just find out how long the partial multibyte
Packit 709fb3
		     character at offset is and fill it with WEOF/255.  */
Packit 709fb3
		  pstr->len = pstr->raw_len - idx + offset;
Packit 709fb3
		  pstr->stop = pstr->raw_stop - idx + offset;
Packit 709fb3
		  pstr->offsets_needed = 0;
Packit 709fb3
		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
Packit 709fb3
		    --mid;
Packit 709fb3
		  while (mid < pstr->valid_len)
Packit 709fb3
		    if (pstr->wcs[mid] != WEOF)
Packit 709fb3
		      break;
Packit 709fb3
		    else
Packit 709fb3
		      ++mid;
Packit 709fb3
		  if (mid == pstr->valid_len)
Packit 709fb3
		    pstr->valid_len = 0;
Packit 709fb3
		  else
Packit 709fb3
		    {
Packit 709fb3
		      pstr->valid_len = pstr->offsets[mid] - offset;
Packit 709fb3
		      if (pstr->valid_len)
Packit 709fb3
			{
Packit 709fb3
			  for (low = 0; low < pstr->valid_len; ++low)
Packit 709fb3
			    pstr->wcs[low] = WEOF;
Packit 709fb3
			  memset (pstr->mbs, 255, pstr->valid_len);
Packit 709fb3
			}
Packit 709fb3
		    }
Packit 709fb3
		  pstr->valid_raw_len = pstr->valid_len;
Packit 709fb3
		}
Packit 709fb3
	    }
Packit 709fb3
	  else
Packit 709fb3
#endif
Packit 709fb3
	    {
Packit 709fb3
	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
Packit 709fb3
							eflags);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	      if (pstr->mb_cur_max > 1)
Packit 709fb3
		memmove (pstr->wcs, pstr->wcs + offset,
Packit 709fb3
			 (pstr->valid_len - offset) * sizeof (wint_t));
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
	      if (BE (pstr->mbs_allocated, 0))
Packit 709fb3
		memmove (pstr->mbs, pstr->mbs + offset,
Packit 709fb3
			 pstr->valid_len - offset);
Packit 709fb3
	      pstr->valid_len -= offset;
Packit 709fb3
	      pstr->valid_raw_len -= offset;
Packit 709fb3
#if defined DEBUG && DEBUG
Packit 709fb3
	      assert (pstr->valid_len > 0);
Packit 709fb3
#endif
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	  /* No, skip all characters until IDX.  */
Packit 709fb3
	  Idx prev_valid_len = pstr->valid_len;
Packit 709fb3
Packit 709fb3
	  if (BE (pstr->offsets_needed, 0))
Packit 709fb3
	    {
Packit 709fb3
	      pstr->len = pstr->raw_len - idx + offset;
Packit 709fb3
	      pstr->stop = pstr->raw_stop - idx + offset;
Packit 709fb3
	      pstr->offsets_needed = 0;
Packit 709fb3
	    }
Packit 709fb3
#endif
Packit 709fb3
	  pstr->valid_len = 0;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
	  if (pstr->mb_cur_max > 1)
Packit 709fb3
	    {
Packit 709fb3
	      Idx wcs_idx;
Packit 709fb3
	      wint_t wc = WEOF;
Packit 709fb3
Packit 709fb3
	      if (pstr->is_utf8)
Packit 709fb3
		{
Packit 709fb3
		  const unsigned char *raw, *p, *end;
Packit 709fb3
Packit 709fb3
		  /* Special case UTF-8.  Multi-byte chars start with any
Packit 709fb3
		     byte other than 0x80 - 0xbf.  */
Packit 709fb3
		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
Packit 709fb3
		  end = raw + (offset - pstr->mb_cur_max);
Packit 709fb3
		  if (end < pstr->raw_mbs)
Packit 709fb3
		    end = pstr->raw_mbs;
Packit 709fb3
		  p = raw + offset - 1;
Packit 709fb3
#ifdef _LIBC
Packit 709fb3
		  /* We know the wchar_t encoding is UCS4, so for the simple
Packit 709fb3
		     case, ASCII characters, skip the conversion step.  */
Packit 709fb3
		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
Packit 709fb3
		    {
Packit 709fb3
		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit 709fb3
		      /* pstr->valid_len = 0; */
Packit 709fb3
		      wc = (wchar_t) *p;
Packit 709fb3
		    }
Packit 709fb3
		  else
Packit 709fb3
#endif
Packit 709fb3
		    for (; p >= end; --p)
Packit 709fb3
		      if ((*p & 0xc0) != 0x80)
Packit 709fb3
			{
Packit 709fb3
			  mbstate_t cur_state;
Packit 709fb3
			  wchar_t wc2;
Packit 709fb3
			  Idx mlen = raw + pstr->len - p;
Packit 709fb3
			  unsigned char buf[6];
Packit 709fb3
			  size_t mbclen;
Packit 709fb3
Packit 709fb3
			  const unsigned char *pp = p;
Packit 709fb3
			  if (BE (pstr->trans != NULL, 0))
Packit 709fb3
			    {
Packit 709fb3
			      int i = mlen < 6 ? mlen : 6;
Packit 709fb3
			      while (--i >= 0)
Packit 709fb3
				buf[i] = pstr->trans[p[i]];
Packit 709fb3
			      pp = buf;
Packit 709fb3
			    }
Packit 709fb3
			  /* XXX Don't use mbrtowc, we know which conversion
Packit 709fb3
			     to use (UTF-8 -> UCS4).  */
Packit 709fb3
			  memset (&cur_state, 0, sizeof (cur_state));
Packit 709fb3
			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
Packit 709fb3
					      &cur_state);
Packit 709fb3
			  if (raw + offset - p <= mbclen
Packit 709fb3
			      && mbclen < (size_t) -2)
Packit 709fb3
			    {
Packit 709fb3
			      memset (&pstr->cur_state, '\0',
Packit 709fb3
				      sizeof (mbstate_t));
Packit 709fb3
			      pstr->valid_len = mbclen - (raw + offset - p);
Packit 709fb3
			      wc = wc2;
Packit 709fb3
			    }
Packit 709fb3
			  break;
Packit 709fb3
			}
Packit 709fb3
		}
Packit 709fb3
Packit 709fb3
	      if (wc == WEOF)
Packit 709fb3
		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
Packit 709fb3
	      if (wc == WEOF)
Packit 709fb3
		pstr->tip_context
Packit 709fb3
		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
Packit 709fb3
	      else
Packit 709fb3
		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
Packit 709fb3
				      && IS_WIDE_WORD_CHAR (wc))
Packit 709fb3
				     ? CONTEXT_WORD
Packit 709fb3
				     : ((IS_WIDE_NEWLINE (wc)
Packit 709fb3
					 && pstr->newline_anchor)
Packit 709fb3
					? CONTEXT_NEWLINE : 0));
Packit 709fb3
	      if (BE (pstr->valid_len, 0))
Packit 709fb3
		{
Packit 709fb3
		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
Packit 709fb3
		    pstr->wcs[wcs_idx] = WEOF;
Packit 709fb3
		  if (pstr->mbs_allocated)
Packit 709fb3
		    memset (pstr->mbs, 255, pstr->valid_len);
Packit 709fb3
		}
Packit 709fb3
	      pstr->valid_raw_len = pstr->valid_len;
Packit 709fb3
	    }
Packit 709fb3
	  else
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
	    {
Packit 709fb3
	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
Packit 709fb3
	      pstr->valid_raw_len = 0;
Packit 709fb3
	      if (pstr->trans)
Packit 709fb3
		c = pstr->trans[c];
Packit 709fb3
	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
Packit 709fb3
				   ? CONTEXT_WORD
Packit 709fb3
				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
Packit 709fb3
				      ? CONTEXT_NEWLINE : 0));
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
      if (!BE (pstr->mbs_allocated, 0))
Packit 709fb3
	pstr->mbs += offset;
Packit 709fb3
    }
Packit 709fb3
  pstr->raw_mbs_idx = idx;
Packit 709fb3
  pstr->len -= offset;
Packit 709fb3
  pstr->stop -= offset;
Packit 709fb3
Packit 709fb3
  /* Then build the buffers.  */
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (pstr->mb_cur_max > 1)
Packit 709fb3
    {
Packit 709fb3
      if (pstr->icase)
Packit 709fb3
	{
Packit 709fb3
	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
Packit 709fb3
	  if (BE (ret != REG_NOERROR, 0))
Packit 709fb3
	    return ret;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	build_wcs_buffer (pstr);
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
    if (BE (pstr->mbs_allocated, 0))
Packit 709fb3
      {
Packit 709fb3
	if (pstr->icase)
Packit 709fb3
	  build_upper_buffer (pstr);
Packit 709fb3
	else if (pstr->trans != NULL)
Packit 709fb3
	  re_string_translate_buffer (pstr);
Packit 709fb3
      }
Packit 709fb3
    else
Packit 709fb3
      pstr->valid_len = pstr->len;
Packit 709fb3
Packit 709fb3
  pstr->cur_idx = 0;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static unsigned char
Packit 709fb3
internal_function __attribute__ ((pure))
Packit 709fb3
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
Packit 709fb3
{
Packit 709fb3
  int ch;
Packit 709fb3
  Idx off;
Packit 709fb3
Packit 709fb3
  /* Handle the common (easiest) cases first.  */
Packit 709fb3
  if (BE (!pstr->mbs_allocated, 1))
Packit 709fb3
    return re_string_peek_byte (pstr, idx);
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (pstr->mb_cur_max > 1
Packit 709fb3
      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
Packit 709fb3
    return re_string_peek_byte (pstr, idx);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  off = pstr->cur_idx + idx;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (pstr->offsets_needed)
Packit 709fb3
    off = pstr->offsets[off];
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
Packit 709fb3
     this function returns CAPITAL LETTER I instead of first byte of
Packit 709fb3
     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
Packit 709fb3
     since peek_byte_case doesn't advance cur_idx in any way.  */
Packit 709fb3
  if (pstr->offsets_needed && !isascii (ch))
Packit 709fb3
    return re_string_peek_byte (pstr, idx);
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  return ch;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static unsigned char
Packit 709fb3
internal_function
Packit 709fb3
re_string_fetch_byte_case (re_string_t *pstr)
Packit 709fb3
{
Packit 709fb3
  if (BE (!pstr->mbs_allocated, 1))
Packit 709fb3
    return re_string_fetch_byte (pstr);
Packit 709fb3
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (pstr->offsets_needed)
Packit 709fb3
    {
Packit 709fb3
      Idx off;
Packit 709fb3
      int ch;
Packit 709fb3
Packit 709fb3
      /* For tr_TR.UTF-8 [[:islower:]] there is
Packit 709fb3
	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
Packit 709fb3
	 in that case the whole multi-byte character and return
Packit 709fb3
	 the original letter.  On the other side, with
Packit 709fb3
	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
Packit 709fb3
	 anything else would complicate things too much.  */
Packit 709fb3
Packit 709fb3
      if (!re_string_first_byte (pstr, pstr->cur_idx))
Packit 709fb3
	return re_string_fetch_byte (pstr);
Packit 709fb3
Packit 709fb3
      off = pstr->offsets[pstr->cur_idx];
Packit 709fb3
      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit 709fb3
Packit 709fb3
      if (! isascii (ch))
Packit 709fb3
	return re_string_fetch_byte (pstr);
Packit 709fb3
Packit 709fb3
      re_string_skip_bytes (pstr,
Packit 709fb3
			    re_string_char_size_at (pstr, pstr->cur_idx));
Packit 709fb3
      return ch;
Packit 709fb3
    }
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
re_string_destruct (re_string_t *pstr)
Packit 709fb3
{
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  re_free (pstr->wcs);
Packit 709fb3
  re_free (pstr->offsets);
Packit 709fb3
#endif /* RE_ENABLE_I18N  */
Packit 709fb3
  if (pstr->mbs_allocated)
Packit 709fb3
    re_free (pstr->mbs);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Return the context at IDX in INPUT.  */
Packit 709fb3
Packit 709fb3
static unsigned int
Packit 709fb3
internal_function
Packit 709fb3
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
Packit 709fb3
{
Packit 709fb3
  int c;
Packit 709fb3
  if (BE (idx < 0, 0))
Packit 709fb3
    /* In this case, we use the value stored in input->tip_context,
Packit 709fb3
       since we can't know the character in input->mbs[-1] here.  */
Packit 709fb3
    return input->tip_context;
Packit 709fb3
  if (BE (idx == input->len, 0))
Packit 709fb3
    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
Packit 709fb3
	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  if (input->mb_cur_max > 1)
Packit 709fb3
    {
Packit 709fb3
      wint_t wc;
Packit 709fb3
      Idx wc_idx = idx;
Packit 709fb3
      while(input->wcs[wc_idx] == WEOF)
Packit 709fb3
	{
Packit 709fb3
#if defined DEBUG && DEBUG
Packit 709fb3
	  /* It must not happen.  */
Packit 709fb3
	  assert (wc_idx >= 0);
Packit 709fb3
#endif
Packit 709fb3
	  --wc_idx;
Packit 709fb3
	  if (wc_idx < 0)
Packit 709fb3
	    return input->tip_context;
Packit 709fb3
	}
Packit 709fb3
      wc = input->wcs[wc_idx];
Packit 709fb3
      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
Packit 709fb3
	return CONTEXT_WORD;
Packit 709fb3
      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
Packit 709fb3
	      ? CONTEXT_NEWLINE : 0);
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
#endif
Packit 709fb3
    {
Packit 709fb3
      c = re_string_byte_at (input, idx);
Packit 709fb3
      if (bitset_contain (input->word_char, c))
Packit 709fb3
	return CONTEXT_WORD;
Packit 709fb3
      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
/* Functions for set operation.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_alloc (re_node_set *set, Idx size)
Packit 709fb3
{
Packit 709fb3
  set->alloc = size;
Packit 709fb3
  set->nelem = 0;
Packit 709fb3
  set->elems = re_malloc (Idx, size);
Packit 709fb3
  if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_init_1 (re_node_set *set, Idx elem)
Packit 709fb3
{
Packit 709fb3
  set->alloc = 1;
Packit 709fb3
  set->nelem = 1;
Packit 709fb3
  set->elems = re_malloc (Idx, 1);
Packit 709fb3
  if (BE (set->elems == NULL, 0))
Packit 709fb3
    {
Packit 709fb3
      set->alloc = set->nelem = 0;
Packit 709fb3
      return REG_ESPACE;
Packit 709fb3
    }
Packit 709fb3
  set->elems[0] = elem;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
Packit 709fb3
{
Packit 709fb3
  set->alloc = 2;
Packit 709fb3
  set->elems = re_malloc (Idx, 2);
Packit 709fb3
  if (BE (set->elems == NULL, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
  if (elem1 == elem2)
Packit 709fb3
    {
Packit 709fb3
      set->nelem = 1;
Packit 709fb3
      set->elems[0] = elem1;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      set->nelem = 2;
Packit 709fb3
      if (elem1 < elem2)
Packit 709fb3
	{
Packit 709fb3
	  set->elems[0] = elem1;
Packit 709fb3
	  set->elems[1] = elem2;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  set->elems[0] = elem2;
Packit 709fb3
	  set->elems[1] = elem1;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
Packit 709fb3
{
Packit 709fb3
  dest->nelem = src->nelem;
Packit 709fb3
  if (src->nelem > 0)
Packit 709fb3
    {
Packit 709fb3
      dest->alloc = dest->nelem;
Packit 709fb3
      dest->elems = re_malloc (Idx, dest->alloc);
Packit 709fb3
      if (BE (dest->elems == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	  dest->alloc = dest->nelem = 0;
Packit 709fb3
	  return REG_ESPACE;
Packit 709fb3
	}
Packit 709fb3
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    re_node_set_init_empty (dest);
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
Packit 709fb3
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Packit 709fb3
   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
Packit 709fb3
			   const re_node_set *src2)
Packit 709fb3
{
Packit 709fb3
  Idx i1, i2, is, id, delta, sbase;
Packit 709fb3
  if (src1->nelem == 0 || src2->nelem == 0)
Packit 709fb3
    return REG_NOERROR;
Packit 709fb3
Packit 709fb3
  /* We need dest->nelem + 2 * elems_in_intersection; this is a
Packit 709fb3
     conservative estimate.  */
Packit 709fb3
  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
Packit 709fb3
    {
Packit 709fb3
      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
Packit 709fb3
      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
Packit 709fb3
      if (BE (new_elems == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      dest->elems = new_elems;
Packit 709fb3
      dest->alloc = new_alloc;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Find the items in the intersection of SRC1 and SRC2, and copy
Packit 709fb3
     into the top of DEST those that are not already in DEST itself.  */
Packit 709fb3
  sbase = dest->nelem + src1->nelem + src2->nelem;
Packit 709fb3
  i1 = src1->nelem - 1;
Packit 709fb3
  i2 = src2->nelem - 1;
Packit 709fb3
  id = dest->nelem - 1;
Packit 709fb3
  for (;;)
Packit 709fb3
    {
Packit 709fb3
      if (src1->elems[i1] == src2->elems[i2])
Packit 709fb3
	{
Packit 709fb3
	  /* Try to find the item in DEST.  Maybe we could binary search?  */
Packit 709fb3
	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
Packit 709fb3
	    --id;
Packit 709fb3
Packit 709fb3
	  if (id < 0 || dest->elems[id] != src1->elems[i1])
Packit 709fb3
            dest->elems[--sbase] = src1->elems[i1];
Packit 709fb3
Packit 709fb3
	  if (--i1 < 0 || --i2 < 0)
Packit 709fb3
	    break;
Packit 709fb3
	}
Packit 709fb3
Packit 709fb3
      /* Lower the highest of the two items.  */
Packit 709fb3
      else if (src1->elems[i1] < src2->elems[i2])
Packit 709fb3
	{
Packit 709fb3
	  if (--i2 < 0)
Packit 709fb3
	    break;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  if (--i1 < 0)
Packit 709fb3
	    break;
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  id = dest->nelem - 1;
Packit 709fb3
  is = dest->nelem + src1->nelem + src2->nelem - 1;
Packit 709fb3
  delta = is - sbase + 1;
Packit 709fb3
Packit 709fb3
  /* Now copy.  When DELTA becomes zero, the remaining
Packit 709fb3
     DEST elements are already in place; this is more or
Packit 709fb3
     less the same loop that is in re_node_set_merge.  */
Packit 709fb3
  dest->nelem += delta;
Packit 709fb3
  if (delta > 0 && id >= 0)
Packit 709fb3
    for (;;)
Packit 709fb3
      {
Packit 709fb3
	if (dest->elems[is] > dest->elems[id])
Packit 709fb3
	  {
Packit 709fb3
	    /* Copy from the top.  */
Packit 709fb3
	    dest->elems[id + delta--] = dest->elems[is--];
Packit 709fb3
	    if (delta == 0)
Packit 709fb3
	      break;
Packit 709fb3
	  }
Packit 709fb3
	else
Packit 709fb3
	  {
Packit 709fb3
	    /* Slide from the bottom.  */
Packit 709fb3
	    dest->elems[id + delta] = dest->elems[id];
Packit 709fb3
	    if (--id < 0)
Packit 709fb3
	      break;
Packit 709fb3
	  }
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
  /* Copy remaining SRC elements.  */
Packit 709fb3
  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
Packit 709fb3
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
Packit 709fb3
			const re_node_set *src2)
Packit 709fb3
{
Packit 709fb3
  Idx i1, i2, id;
Packit 709fb3
  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
Packit 709fb3
    {
Packit 709fb3
      dest->alloc = src1->nelem + src2->nelem;
Packit 709fb3
      dest->elems = re_malloc (Idx, dest->alloc);
Packit 709fb3
      if (BE (dest->elems == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      if (src1 != NULL && src1->nelem > 0)
Packit 709fb3
	return re_node_set_init_copy (dest, src1);
Packit 709fb3
      else if (src2 != NULL && src2->nelem > 0)
Packit 709fb3
	return re_node_set_init_copy (dest, src2);
Packit 709fb3
      else
Packit 709fb3
	re_node_set_init_empty (dest);
Packit 709fb3
      return REG_NOERROR;
Packit 709fb3
    }
Packit 709fb3
  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
Packit 709fb3
    {
Packit 709fb3
      if (src1->elems[i1] > src2->elems[i2])
Packit 709fb3
	{
Packit 709fb3
	  dest->elems[id++] = src2->elems[i2++];
Packit 709fb3
	  continue;
Packit 709fb3
	}
Packit 709fb3
      if (src1->elems[i1] == src2->elems[i2])
Packit 709fb3
	++i2;
Packit 709fb3
      dest->elems[id++] = src1->elems[i1++];
Packit 709fb3
    }
Packit 709fb3
  if (i1 < src1->nelem)
Packit 709fb3
    {
Packit 709fb3
      memcpy (dest->elems + id, src1->elems + i1,
Packit 709fb3
	     (src1->nelem - i1) * sizeof (Idx));
Packit 709fb3
      id += src1->nelem - i1;
Packit 709fb3
    }
Packit 709fb3
  else if (i2 < src2->nelem)
Packit 709fb3
    {
Packit 709fb3
      memcpy (dest->elems + id, src2->elems + i2,
Packit 709fb3
	     (src2->nelem - i2) * sizeof (Idx));
Packit 709fb3
      id += src2->nelem - i2;
Packit 709fb3
    }
Packit 709fb3
  dest->nelem = id;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Calculate the union set of the sets DEST and SRC. And store it to
Packit 709fb3
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_merge (re_node_set *dest, const re_node_set *src)
Packit 709fb3
{
Packit 709fb3
  Idx is, id, sbase, delta;
Packit 709fb3
  if (src == NULL || src->nelem == 0)
Packit 709fb3
    return REG_NOERROR;
Packit 709fb3
  if (dest->alloc < 2 * src->nelem + dest->nelem)
Packit 709fb3
    {
Packit 709fb3
      Idx new_alloc = 2 * (src->nelem + dest->alloc);
Packit 709fb3
      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
Packit 709fb3
      if (BE (new_buffer == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      dest->elems = new_buffer;
Packit 709fb3
      dest->alloc = new_alloc;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  if (BE (dest->nelem == 0, 0))
Packit 709fb3
    {
Packit 709fb3
      dest->nelem = src->nelem;
Packit 709fb3
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit 709fb3
      return REG_NOERROR;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Copy into the top of DEST the items of SRC that are not
Packit 709fb3
     found in DEST.  Maybe we could binary search in DEST?  */
Packit 709fb3
  for (sbase = dest->nelem + 2 * src->nelem,
Packit 709fb3
       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
Packit 709fb3
    {
Packit 709fb3
      if (dest->elems[id] == src->elems[is])
Packit 709fb3
	is--, id--;
Packit 709fb3
      else if (dest->elems[id] < src->elems[is])
Packit 709fb3
	dest->elems[--sbase] = src->elems[is--];
Packit 709fb3
      else /* if (dest->elems[id] > src->elems[is]) */
Packit 709fb3
	--id;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  if (is >= 0)
Packit 709fb3
    {
Packit 709fb3
      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
Packit 709fb3
      sbase -= is + 1;
Packit 709fb3
      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  id = dest->nelem - 1;
Packit 709fb3
  is = dest->nelem + 2 * src->nelem - 1;
Packit 709fb3
  delta = is - sbase + 1;
Packit 709fb3
  if (delta == 0)
Packit 709fb3
    return REG_NOERROR;
Packit 709fb3
Packit 709fb3
  /* Now copy.  When DELTA becomes zero, the remaining
Packit 709fb3
     DEST elements are already in place.  */
Packit 709fb3
  dest->nelem += delta;
Packit 709fb3
  for (;;)
Packit 709fb3
    {
Packit 709fb3
      if (dest->elems[is] > dest->elems[id])
Packit 709fb3
	{
Packit 709fb3
	  /* Copy from the top.  */
Packit 709fb3
	  dest->elems[id + delta--] = dest->elems[is--];
Packit 709fb3
	  if (delta == 0)
Packit 709fb3
	    break;
Packit 709fb3
	}
Packit 709fb3
      else
Packit 709fb3
	{
Packit 709fb3
	  /* Slide from the bottom.  */
Packit 709fb3
	  dest->elems[id + delta] = dest->elems[id];
Packit 709fb3
	  if (--id < 0)
Packit 709fb3
	    {
Packit 709fb3
	      /* Copy remaining SRC elements.  */
Packit 709fb3
	      memcpy (dest->elems, dest->elems + sbase,
Packit 709fb3
		      delta * sizeof (Idx));
Packit 709fb3
	      break;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Insert the new element ELEM to the re_node_set* SET.
Packit 709fb3
   SET should not already have ELEM.
Packit 709fb3
   Return true if successful.  */
Packit 709fb3
Packit 709fb3
static bool
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_insert (re_node_set *set, Idx elem)
Packit 709fb3
{
Packit 709fb3
  Idx idx;
Packit 709fb3
  /* In case the set is empty.  */
Packit 709fb3
  if (set->alloc == 0)
Packit 709fb3
    return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
Packit 709fb3
Packit 709fb3
  if (BE (set->nelem, 0) == 0)
Packit 709fb3
    {
Packit 709fb3
      /* We already guaranteed above that set->alloc != 0.  */
Packit 709fb3
      set->elems[0] = elem;
Packit 709fb3
      ++set->nelem;
Packit 709fb3
      return true;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Realloc if we need.  */
Packit 709fb3
  if (set->alloc == set->nelem)
Packit 709fb3
    {
Packit 709fb3
      Idx *new_elems;
Packit 709fb3
      set->alloc = set->alloc * 2;
Packit 709fb3
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit 709fb3
      if (BE (new_elems == NULL, 0))
Packit 709fb3
	return false;
Packit 709fb3
      set->elems = new_elems;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Move the elements which follows the new element.  Test the
Packit 709fb3
     first element separately to skip a check in the inner loop.  */
Packit 709fb3
  if (elem < set->elems[0])
Packit 709fb3
    {
Packit 709fb3
      idx = 0;
Packit 709fb3
      for (idx = set->nelem; idx > 0; idx--)
Packit 709fb3
	set->elems[idx] = set->elems[idx - 1];
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
Packit 709fb3
	set->elems[idx] = set->elems[idx - 1];
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Insert the new element.  */
Packit 709fb3
  set->elems[idx] = elem;
Packit 709fb3
  ++set->nelem;
Packit 709fb3
  return true;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Insert the new element ELEM to the re_node_set* SET.
Packit 709fb3
   SET should not already have any element greater than or equal to ELEM.
Packit 709fb3
   Return true if successful.  */
Packit 709fb3
Packit 709fb3
static bool
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_node_set_insert_last (re_node_set *set, Idx elem)
Packit 709fb3
{
Packit 709fb3
  /* Realloc if we need.  */
Packit 709fb3
  if (set->alloc == set->nelem)
Packit 709fb3
    {
Packit 709fb3
      Idx *new_elems;
Packit 709fb3
      set->alloc = (set->alloc + 1) * 2;
Packit 709fb3
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit 709fb3
      if (BE (new_elems == NULL, 0))
Packit 709fb3
	return false;
Packit 709fb3
      set->elems = new_elems;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Insert the new element.  */
Packit 709fb3
  set->elems[set->nelem++] = elem;
Packit 709fb3
  return true;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Compare two node sets SET1 and SET2.
Packit 709fb3
   Return true if SET1 and SET2 are equivalent.  */
Packit 709fb3
Packit 709fb3
static bool
Packit 709fb3
internal_function __attribute__ ((pure))
Packit 709fb3
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
Packit 709fb3
{
Packit 709fb3
  Idx i;
Packit 709fb3
  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
Packit 709fb3
    return false;
Packit 709fb3
  for (i = set1->nelem ; --i >= 0 ; )
Packit 709fb3
    if (set1->elems[i] != set2->elems[i])
Packit 709fb3
      return false;
Packit 709fb3
  return true;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
Packit 709fb3
Packit 709fb3
static Idx
Packit 709fb3
internal_function __attribute__ ((pure))
Packit 709fb3
re_node_set_contains (const re_node_set *set, Idx elem)
Packit 709fb3
{
Packit 709fb3
  __re_size_t idx, right, mid;
Packit 709fb3
  if (set->nelem <= 0)
Packit 709fb3
    return 0;
Packit 709fb3
Packit 709fb3
  /* Binary search the element.  */
Packit 709fb3
  idx = 0;
Packit 709fb3
  right = set->nelem - 1;
Packit 709fb3
  while (idx < right)
Packit 709fb3
    {
Packit 709fb3
      mid = (idx + right) / 2;
Packit 709fb3
      if (set->elems[mid] < elem)
Packit 709fb3
	idx = mid + 1;
Packit 709fb3
      else
Packit 709fb3
	right = mid;
Packit 709fb3
    }
Packit 709fb3
  return set->elems[idx] == elem ? idx + 1 : 0;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
internal_function
Packit 709fb3
re_node_set_remove_at (re_node_set *set, Idx idx)
Packit 709fb3
{
Packit 709fb3
  if (idx < 0 || idx >= set->nelem)
Packit 709fb3
    return;
Packit 709fb3
  --set->nelem;
Packit 709fb3
  for (; idx < set->nelem; idx++)
Packit 709fb3
    set->elems[idx] = set->elems[idx + 1];
Packit 709fb3
}
Packit 709fb3

Packit 709fb3
Packit 709fb3
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
Packit 709fb3
   Or return -1 if an error occurred.  */
Packit 709fb3
Packit 709fb3
static Idx
Packit 709fb3
internal_function
Packit 709fb3
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
Packit 709fb3
{
Packit 709fb3
  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
Packit 709fb3
    {
Packit 709fb3
      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
Packit 709fb3
      Idx *new_nexts, *new_indices;
Packit 709fb3
      re_node_set *new_edests, *new_eclosures;
Packit 709fb3
      re_token_t *new_nodes;
Packit 709fb3
Packit 709fb3
      /* Avoid overflows in realloc.  */
Packit 709fb3
      const size_t max_object_size = MAX (sizeof (re_token_t),
Packit 709fb3
					  MAX (sizeof (re_node_set),
Packit 709fb3
					       sizeof (Idx)));
Packit 709fb3
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
Packit 709fb3
	return -1;
Packit 709fb3
Packit 709fb3
      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
Packit 709fb3
      if (BE (new_nodes == NULL, 0))
Packit 709fb3
	return -1;
Packit 709fb3
      dfa->nodes = new_nodes;
Packit 709fb3
      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
Packit 709fb3
      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
Packit 709fb3
      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
Packit 709fb3
      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
Packit 709fb3
      if (BE (new_nexts == NULL || new_indices == NULL
Packit 709fb3
	      || new_edests == NULL || new_eclosures == NULL, 0))
Packit 709fb3
	{
Packit 709fb3
	   re_free (new_nexts);
Packit 709fb3
	   re_free (new_indices);
Packit 709fb3
	   re_free (new_edests);
Packit 709fb3
	   re_free (new_eclosures);
Packit 709fb3
	   return -1;
Packit 709fb3
	}
Packit 709fb3
      dfa->nexts = new_nexts;
Packit 709fb3
      dfa->org_indices = new_indices;
Packit 709fb3
      dfa->edests = new_edests;
Packit 709fb3
      dfa->eclosures = new_eclosures;
Packit 709fb3
      dfa->nodes_alloc = new_nodes_alloc;
Packit 709fb3
    }
Packit 709fb3
  dfa->nodes[dfa->nodes_len] = token;
Packit 709fb3
  dfa->nodes[dfa->nodes_len].constraint = 0;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
  dfa->nodes[dfa->nodes_len].accept_mb =
Packit 709fb3
    ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
Packit 709fb3
     || token.type == COMPLEX_BRACKET);
Packit 709fb3
#endif
Packit 709fb3
  dfa->nexts[dfa->nodes_len] = -1;
Packit 709fb3
  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
Packit 709fb3
  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
Packit 709fb3
  return dfa->nodes_len++;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static re_hashval_t
Packit 709fb3
internal_function
Packit 709fb3
calc_state_hash (const re_node_set *nodes, unsigned int context)
Packit 709fb3
{
Packit 709fb3
  re_hashval_t hash = nodes->nelem + context;
Packit 709fb3
  Idx i;
Packit 709fb3
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 709fb3
    hash += nodes->elems[i];
Packit 709fb3
  return hash;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Search for the state whose node_set is equivalent to NODES.
Packit 709fb3
   Return the pointer to the state, if we found it in the DFA.
Packit 709fb3
   Otherwise create the new one and return it.  In case of an error
Packit 709fb3
   return NULL and set the error code in ERR.
Packit 709fb3
   Note: - We assume NULL as the invalid state, then it is possible that
Packit 709fb3
	   return value is NULL and ERR is REG_NOERROR.
Packit 709fb3
	 - We never return non-NULL value in case of any errors, it is for
Packit 709fb3
	   optimization.  */
Packit 709fb3
Packit 709fb3
static re_dfastate_t *
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
Packit 709fb3
		  const re_node_set *nodes)
Packit 709fb3
{
Packit 709fb3
  re_hashval_t hash;
Packit 709fb3
  re_dfastate_t *new_state;
Packit 709fb3
  struct re_state_table_entry *spot;
Packit 709fb3
  Idx i;
Packit 709fb3
#if defined GCC_LINT || defined lint
Packit 709fb3
  /* Suppress bogus uninitialized-variable warnings.  */
Packit 709fb3
  *err = REG_NOERROR;
Packit 709fb3
#endif
Packit 709fb3
  if (BE (nodes->nelem == 0, 0))
Packit 709fb3
    {
Packit 709fb3
      *err = REG_NOERROR;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  hash = calc_state_hash (nodes, 0);
Packit 709fb3
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 709fb3
Packit 709fb3
  for (i = 0 ; i < spot->num ; i++)
Packit 709fb3
    {
Packit 709fb3
      re_dfastate_t *state = spot->array[i];
Packit 709fb3
      if (hash != state->hash)
Packit 709fb3
	continue;
Packit 709fb3
      if (re_node_set_compare (&state->nodes, nodes))
Packit 709fb3
	return state;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* There are no appropriate state in the dfa, create the new one.  */
Packit 709fb3
  new_state = create_ci_newstate (dfa, nodes, hash);
Packit 709fb3
  if (BE (new_state == NULL, 0))
Packit 709fb3
    *err = REG_ESPACE;
Packit 709fb3
Packit 709fb3
  return new_state;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Search for the state whose node_set is equivalent to NODES and
Packit 709fb3
   whose context is equivalent to CONTEXT.
Packit 709fb3
   Return the pointer to the state, if we found it in the DFA.
Packit 709fb3
   Otherwise create the new one and return it.  In case of an error
Packit 709fb3
   return NULL and set the error code in ERR.
Packit 709fb3
   Note: - We assume NULL as the invalid state, then it is possible that
Packit 709fb3
	   return value is NULL and ERR is REG_NOERROR.
Packit 709fb3
	 - We never return non-NULL value in case of any errors, it is for
Packit 709fb3
	   optimization.  */
Packit 709fb3
Packit 709fb3
static re_dfastate_t *
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
Packit 709fb3
			  const re_node_set *nodes, unsigned int context)
Packit 709fb3
{
Packit 709fb3
  re_hashval_t hash;
Packit 709fb3
  re_dfastate_t *new_state;
Packit 709fb3
  struct re_state_table_entry *spot;
Packit 709fb3
  Idx i;
Packit 709fb3
#if defined GCC_LINT || defined lint
Packit 709fb3
  /* Suppress bogus uninitialized-variable warnings.  */
Packit 709fb3
  *err = REG_NOERROR;
Packit 709fb3
#endif
Packit 709fb3
  if (nodes->nelem == 0)
Packit 709fb3
    {
Packit 709fb3
      *err = REG_NOERROR;
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
  hash = calc_state_hash (nodes, context);
Packit 709fb3
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 709fb3
Packit 709fb3
  for (i = 0 ; i < spot->num ; i++)
Packit 709fb3
    {
Packit 709fb3
      re_dfastate_t *state = spot->array[i];
Packit 709fb3
      if (state->hash == hash
Packit 709fb3
	  && state->context == context
Packit 709fb3
	  && re_node_set_compare (state->entrance_nodes, nodes))
Packit 709fb3
	return state;
Packit 709fb3
    }
Packit 709fb3
  /* There are no appropriate state in 'dfa', create the new one.  */
Packit 709fb3
  new_state = create_cd_newstate (dfa, nodes, context, hash);
Packit 709fb3
  if (BE (new_state == NULL, 0))
Packit 709fb3
    *err = REG_ESPACE;
Packit 709fb3
Packit 709fb3
  return new_state;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Finish initialization of the new state NEWSTATE, and using its hash value
Packit 709fb3
   HASH put in the appropriate bucket of DFA's state table.  Return value
Packit 709fb3
   indicates the error code if failed.  */
Packit 709fb3
Packit 709fb3
static reg_errcode_t
Packit 709fb3
__attribute_warn_unused_result__
Packit 709fb3
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
Packit 709fb3
		re_hashval_t hash)
Packit 709fb3
{
Packit 709fb3
  struct re_state_table_entry *spot;
Packit 709fb3
  reg_errcode_t err;
Packit 709fb3
  Idx i;
Packit 709fb3
Packit 709fb3
  newstate->hash = hash;
Packit 709fb3
  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    return REG_ESPACE;
Packit 709fb3
  for (i = 0; i < newstate->nodes.nelem; i++)
Packit 709fb3
    {
Packit 709fb3
      Idx elem = newstate->nodes.elems[i];
Packit 709fb3
      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
Packit 709fb3
	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
Packit 709fb3
	  return REG_ESPACE;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit 709fb3
  if (BE (spot->alloc <= spot->num, 0))
Packit 709fb3
    {
Packit 709fb3
      Idx new_alloc = 2 * spot->num + 2;
Packit 709fb3
      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
Packit 709fb3
					      new_alloc);
Packit 709fb3
      if (BE (new_array == NULL, 0))
Packit 709fb3
	return REG_ESPACE;
Packit 709fb3
      spot->array = new_array;
Packit 709fb3
      spot->alloc = new_alloc;
Packit 709fb3
    }
Packit 709fb3
  spot->array[spot->num++] = newstate;
Packit 709fb3
  return REG_NOERROR;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
static void
Packit 709fb3
free_state (re_dfastate_t *state)
Packit 709fb3
{
Packit 709fb3
  re_node_set_free (&state->non_eps_nodes);
Packit 709fb3
  re_node_set_free (&state->inveclosure);
Packit 709fb3
  if (state->entrance_nodes != &state->nodes)
Packit 709fb3
    {
Packit 709fb3
      re_node_set_free (state->entrance_nodes);
Packit 709fb3
      re_free (state->entrance_nodes);
Packit 709fb3
    }
Packit 709fb3
  re_node_set_free (&state->nodes);
Packit 709fb3
  re_free (state->word_trtable);
Packit 709fb3
  re_free (state->trtable);
Packit 709fb3
  re_free (state);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Create the new state which is independent of contexts.
Packit 709fb3
   Return the new state if succeeded, otherwise return NULL.  */
Packit 709fb3
Packit 709fb3
static re_dfastate_t *
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit 709fb3
		    re_hashval_t hash)
Packit 709fb3
{
Packit 709fb3
  Idx i;
Packit 709fb3
  reg_errcode_t err;
Packit 709fb3
  re_dfastate_t *newstate;
Packit 709fb3
Packit 709fb3
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit 709fb3
  if (BE (newstate == NULL, 0))
Packit 709fb3
    return NULL;
Packit 709fb3
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      re_free (newstate);
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  newstate->entrance_nodes = &newstate->nodes;
Packit 709fb3
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 709fb3
    {
Packit 709fb3
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit 709fb3
      re_token_type_t type = node->type;
Packit 709fb3
      if (type == CHARACTER && !node->constraint)
Packit 709fb3
	continue;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      newstate->accept_mb |= node->accept_mb;
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
Packit 709fb3
      /* If the state has the halt node, the state is a halt state.  */
Packit 709fb3
      if (type == END_OF_RE)
Packit 709fb3
	newstate->halt = 1;
Packit 709fb3
      else if (type == OP_BACK_REF)
Packit 709fb3
	newstate->has_backref = 1;
Packit 709fb3
      else if (type == ANCHOR || node->constraint)
Packit 709fb3
	newstate->has_constraint = 1;
Packit 709fb3
    }
Packit 709fb3
  err = register_state (dfa, newstate, hash);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      free_state (newstate);
Packit 709fb3
      newstate = NULL;
Packit 709fb3
    }
Packit 709fb3
  return newstate;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
/* Create the new state which is depend on the context CONTEXT.
Packit 709fb3
   Return the new state if succeeded, otherwise return NULL.  */
Packit 709fb3
Packit 709fb3
static re_dfastate_t *
Packit 709fb3
internal_function __attribute_warn_unused_result__
Packit 709fb3
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit 709fb3
		    unsigned int context, re_hashval_t hash)
Packit 709fb3
{
Packit 709fb3
  Idx i, nctx_nodes = 0;
Packit 709fb3
  reg_errcode_t err;
Packit 709fb3
  re_dfastate_t *newstate;
Packit 709fb3
Packit 709fb3
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit 709fb3
  if (BE (newstate == NULL, 0))
Packit 709fb3
    return NULL;
Packit 709fb3
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      re_free (newstate);
Packit 709fb3
      return NULL;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  newstate->context = context;
Packit 709fb3
  newstate->entrance_nodes = &newstate->nodes;
Packit 709fb3
Packit 709fb3
  for (i = 0 ; i < nodes->nelem ; i++)
Packit 709fb3
    {
Packit 709fb3
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit 709fb3
      re_token_type_t type = node->type;
Packit 709fb3
      unsigned int constraint = node->constraint;
Packit 709fb3
Packit 709fb3
      if (type == CHARACTER && !constraint)
Packit 709fb3
	continue;
Packit 709fb3
#ifdef RE_ENABLE_I18N
Packit 709fb3
      newstate->accept_mb |= node->accept_mb;
Packit 709fb3
#endif /* RE_ENABLE_I18N */
Packit 709fb3
Packit 709fb3
      /* If the state has the halt node, the state is a halt state.  */
Packit 709fb3
      if (type == END_OF_RE)
Packit 709fb3
	newstate->halt = 1;
Packit 709fb3
      else if (type == OP_BACK_REF)
Packit 709fb3
	newstate->has_backref = 1;
Packit 709fb3
Packit 709fb3
      if (constraint)
Packit 709fb3
	{
Packit 709fb3
	  if (newstate->entrance_nodes == &newstate->nodes)
Packit 709fb3
	    {
Packit 709fb3
	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
Packit 709fb3
	      if (BE (newstate->entrance_nodes == NULL, 0))
Packit 709fb3
		{
Packit 709fb3
		  free_state (newstate);
Packit 709fb3
		  return NULL;
Packit 709fb3
		}
Packit 709fb3
	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
Packit 709fb3
		  != REG_NOERROR)
Packit 709fb3
		return NULL;
Packit 709fb3
	      nctx_nodes = 0;
Packit 709fb3
	      newstate->has_constraint = 1;
Packit 709fb3
	    }
Packit 709fb3
Packit 709fb3
	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
Packit 709fb3
	    {
Packit 709fb3
	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
Packit 709fb3
	      ++nctx_nodes;
Packit 709fb3
	    }
Packit 709fb3
	}
Packit 709fb3
    }
Packit 709fb3
  err = register_state (dfa, newstate, hash);
Packit 709fb3
  if (BE (err != REG_NOERROR, 0))
Packit 709fb3
    {
Packit 709fb3
      free_state (newstate);
Packit 709fb3
      newstate = NULL;
Packit 709fb3
    }
Packit 709fb3
  return  newstate;
Packit 709fb3
}