Blame gl/regex_internal.c

Packit a4aae4
/* Extended regular expression matching and search library.
Packit a4aae4
   Copyright (C) 2002-2017 Free Software Foundation, Inc.
Packit a4aae4
   This file is part of the GNU C Library.
Packit a4aae4
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Packit a4aae4
Packit a4aae4
   The GNU C Library is free software; you can redistribute it and/or
Packit a4aae4
   modify it under the terms of the GNU Lesser General Public
Packit a4aae4
   License as published by the Free Software Foundation; either
Packit a4aae4
   version 2.1 of the License, or (at your option) any later version.
Packit a4aae4
Packit a4aae4
   The GNU C Library is distributed in the hope that it will be useful,
Packit a4aae4
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4aae4
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4aae4
   Lesser General Public License for more details.
Packit a4aae4
Packit a4aae4
   You should have received a copy of the GNU Lesser General Public
Packit a4aae4
   License along with the GNU C Library; if not, see
Packit a4aae4
   <http://www.gnu.org/licenses/>.  */
Packit a4aae4
Packit a4aae4
static void re_string_construct_common (const char *str, Idx len,
Packit a4aae4
					re_string_t *pstr,
Packit a4aae4
					RE_TRANSLATE_TYPE trans, bool icase,
Packit a4aae4
					const re_dfa_t *dfa) internal_function;
Packit a4aae4
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
Packit a4aae4
					  const re_node_set *nodes,
Packit a4aae4
					  re_hashval_t hash) internal_function;
Packit a4aae4
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
Packit a4aae4
					  const re_node_set *nodes,
Packit a4aae4
					  unsigned int context,
Packit a4aae4
					  re_hashval_t hash) internal_function;
Packit a4aae4

Packit a4aae4
/* Functions for string operation.  */
Packit a4aae4
Packit a4aae4
/* This function allocate the buffers.  It is necessary to call
Packit a4aae4
   re_string_reconstruct before using the object.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
Packit a4aae4
		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit a4aae4
{
Packit a4aae4
  reg_errcode_t ret;
Packit a4aae4
  Idx init_buf_len;
Packit a4aae4
Packit a4aae4
  /* Ensure at least one character fits into the buffers.  */
Packit a4aae4
  if (init_len < dfa->mb_cur_max)
Packit a4aae4
    init_len = dfa->mb_cur_max;
Packit a4aae4
  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
Packit a4aae4
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit a4aae4
Packit a4aae4
  ret = re_string_realloc_buffers (pstr, init_buf_len);
Packit a4aae4
  if (BE (ret != REG_NOERROR, 0))
Packit a4aae4
    return ret;
Packit a4aae4
Packit a4aae4
  pstr->word_char = dfa->word_char;
Packit a4aae4
  pstr->word_ops_used = dfa->word_ops_used;
Packit a4aae4
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit a4aae4
  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
Packit a4aae4
  pstr->valid_raw_len = pstr->valid_len;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* This function allocate the buffers, and initialize them.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_string_construct (re_string_t *pstr, const char *str, Idx len,
Packit a4aae4
		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
Packit a4aae4
{
Packit a4aae4
  reg_errcode_t ret;
Packit a4aae4
  memset (pstr, '\0', sizeof (re_string_t));
Packit a4aae4
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
Packit a4aae4
Packit a4aae4
  if (len > 0)
Packit a4aae4
    {
Packit a4aae4
      ret = re_string_realloc_buffers (pstr, len + 1);
Packit a4aae4
      if (BE (ret != REG_NOERROR, 0))
Packit a4aae4
	return ret;
Packit a4aae4
    }
Packit a4aae4
  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
Packit a4aae4
Packit a4aae4
  if (icase)
Packit a4aae4
    {
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
      if (dfa->mb_cur_max > 1)
Packit a4aae4
	{
Packit a4aae4
	  while (1)
Packit a4aae4
	    {
Packit a4aae4
	      ret = build_wcs_upper_buffer (pstr);
Packit a4aae4
	      if (BE (ret != REG_NOERROR, 0))
Packit a4aae4
		return ret;
Packit a4aae4
	      if (pstr->valid_raw_len >= len)
Packit a4aae4
		break;
Packit a4aae4
	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
Packit a4aae4
		break;
Packit a4aae4
	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
Packit a4aae4
	      if (BE (ret != REG_NOERROR, 0))
Packit a4aae4
		return ret;
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
#endif /* RE_ENABLE_I18N  */
Packit a4aae4
	build_upper_buffer (pstr);
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
    {
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
      if (dfa->mb_cur_max > 1)
Packit a4aae4
	build_wcs_buffer (pstr);
Packit a4aae4
      else
Packit a4aae4
#endif /* RE_ENABLE_I18N  */
Packit a4aae4
	{
Packit a4aae4
	  if (trans != NULL)
Packit a4aae4
	    re_string_translate_buffer (pstr);
Packit a4aae4
	  else
Packit a4aae4
	    {
Packit a4aae4
	      pstr->valid_len = pstr->bufs_len;
Packit a4aae4
	      pstr->valid_raw_len = pstr->bufs_len;
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Helper functions for re_string_allocate, and re_string_construct.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
Packit a4aae4
{
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  if (pstr->mb_cur_max > 1)
Packit a4aae4
    {
Packit a4aae4
      wint_t *new_wcs;
Packit a4aae4
Packit a4aae4
      /* Avoid overflow in realloc.  */
Packit a4aae4
      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
Packit a4aae4
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
Packit a4aae4
      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
Packit a4aae4
      if (BE (new_wcs == NULL, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
      pstr->wcs = new_wcs;
Packit a4aae4
      if (pstr->offsets != NULL)
Packit a4aae4
	{
Packit a4aae4
	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
Packit a4aae4
	  if (BE (new_offsets == NULL, 0))
Packit a4aae4
	    return REG_ESPACE;
Packit a4aae4
	  pstr->offsets = new_offsets;
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
#endif /* RE_ENABLE_I18N  */
Packit a4aae4
  if (pstr->mbs_allocated)
Packit a4aae4
    {
Packit a4aae4
      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
Packit a4aae4
					   new_buf_len);
Packit a4aae4
      if (BE (new_mbs == NULL, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
      pstr->mbs = new_mbs;
Packit a4aae4
    }
Packit a4aae4
  pstr->bufs_len = new_buf_len;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
internal_function
Packit a4aae4
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
Packit a4aae4
			    RE_TRANSLATE_TYPE trans, bool icase,
Packit a4aae4
			    const re_dfa_t *dfa)
Packit a4aae4
{
Packit a4aae4
  pstr->raw_mbs = (const unsigned char *) str;
Packit a4aae4
  pstr->len = len;
Packit a4aae4
  pstr->raw_len = len;
Packit a4aae4
  pstr->trans = trans;
Packit a4aae4
  pstr->icase = icase;
Packit a4aae4
  pstr->mbs_allocated = (trans != NULL || icase);
Packit a4aae4
  pstr->mb_cur_max = dfa->mb_cur_max;
Packit a4aae4
  pstr->is_utf8 = dfa->is_utf8;
Packit a4aae4
  pstr->map_notascii = dfa->map_notascii;
Packit a4aae4
  pstr->stop = pstr->len;
Packit a4aae4
  pstr->raw_stop = pstr->stop;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
Packit a4aae4
/* Build wide character buffer PSTR->WCS.
Packit a4aae4
   If the byte sequence of the string are:
Packit a4aae4
     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Packit a4aae4
   Then wide character buffer will be:
Packit a4aae4
     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
Packit a4aae4
   We use WEOF for padding, they indicate that the position isn't
Packit a4aae4
   a first byte of a multibyte character.
Packit a4aae4
Packit a4aae4
   Note that this function assumes PSTR->VALID_LEN elements are already
Packit a4aae4
   built and starts from PSTR->VALID_LEN.  */
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
internal_function
Packit a4aae4
build_wcs_buffer (re_string_t *pstr)
Packit a4aae4
{
Packit a4aae4
#ifdef _LIBC
Packit a4aae4
  unsigned char buf[MB_LEN_MAX];
Packit a4aae4
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit a4aae4
#else
Packit a4aae4
  unsigned char buf[64];
Packit a4aae4
#endif
Packit a4aae4
  mbstate_t prev_st;
Packit a4aae4
  Idx byte_idx, end_idx, remain_len;
Packit a4aae4
  size_t mbclen;
Packit a4aae4
Packit a4aae4
  /* Build the buffers from pstr->valid_len to either pstr->len or
Packit a4aae4
     pstr->bufs_len.  */
Packit a4aae4
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit a4aae4
  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
Packit a4aae4
    {
Packit a4aae4
      wchar_t wc;
Packit a4aae4
      const char *p;
Packit a4aae4
Packit a4aae4
      remain_len = end_idx - byte_idx;
Packit a4aae4
      prev_st = pstr->cur_state;
Packit a4aae4
      /* Apply the translation if we need.  */
Packit a4aae4
      if (BE (pstr->trans != NULL, 0))
Packit a4aae4
	{
Packit a4aae4
	  int i, ch;
Packit a4aae4
Packit a4aae4
	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit a4aae4
	    {
Packit a4aae4
	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
Packit a4aae4
	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
Packit a4aae4
	    }
Packit a4aae4
	  p = (const char *) buf;
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
Packit a4aae4
      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit a4aae4
      if (BE (mbclen == (size_t) -1 || mbclen == 0
Packit a4aae4
	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
Packit a4aae4
	{
Packit a4aae4
	  /* We treat these cases as a singlebyte character.  */
Packit a4aae4
	  mbclen = 1;
Packit a4aae4
	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit a4aae4
	  if (BE (pstr->trans != NULL, 0))
Packit a4aae4
	    wc = pstr->trans[wc];
Packit a4aae4
	  pstr->cur_state = prev_st;
Packit a4aae4
	}
Packit a4aae4
      else if (BE (mbclen == (size_t) -2, 0))
Packit a4aae4
	{
Packit a4aae4
	  /* The buffer doesn't have enough space, finish to build.  */
Packit a4aae4
	  pstr->cur_state = prev_st;
Packit a4aae4
	  break;
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
      /* Write wide character and padding.  */
Packit a4aae4
      pstr->wcs[byte_idx++] = wc;
Packit a4aae4
      /* Write paddings.  */
Packit a4aae4
      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit a4aae4
	pstr->wcs[byte_idx++] = WEOF;
Packit a4aae4
    }
Packit a4aae4
  pstr->valid_len = byte_idx;
Packit a4aae4
  pstr->valid_raw_len = byte_idx;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
Packit a4aae4
   but for REG_ICASE.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
build_wcs_upper_buffer (re_string_t *pstr)
Packit a4aae4
{
Packit a4aae4
  mbstate_t prev_st;
Packit a4aae4
  Idx src_idx, byte_idx, end_idx, remain_len;
Packit a4aae4
  size_t mbclen;
Packit a4aae4
#ifdef _LIBC
Packit a4aae4
  char buf[MB_LEN_MAX];
Packit a4aae4
  assert (MB_LEN_MAX >= pstr->mb_cur_max);
Packit a4aae4
#else
Packit a4aae4
  char buf[64];
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
  byte_idx = pstr->valid_len;
Packit a4aae4
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit a4aae4
Packit a4aae4
  /* The following optimization assumes that ASCII characters can be
Packit a4aae4
     mapped to wide characters with a simple cast.  */
Packit a4aae4
  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
Packit a4aae4
    {
Packit a4aae4
      while (byte_idx < end_idx)
Packit a4aae4
	{
Packit a4aae4
	  wchar_t wc;
Packit a4aae4
Packit a4aae4
	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
Packit a4aae4
	      && mbsinit (&pstr->cur_state))
Packit a4aae4
	    {
Packit a4aae4
	      /* In case of a singlebyte character.  */
Packit a4aae4
	      pstr->mbs[byte_idx]
Packit a4aae4
		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
Packit a4aae4
	      /* The next step uses the assumption that wchar_t is encoded
Packit a4aae4
		 ASCII-safe: all ASCII values can be converted like this.  */
Packit a4aae4
	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
Packit a4aae4
	      ++byte_idx;
Packit a4aae4
	      continue;
Packit a4aae4
	    }
Packit a4aae4
Packit a4aae4
	  remain_len = end_idx - byte_idx;
Packit a4aae4
	  prev_st = pstr->cur_state;
Packit a4aae4
	  mbclen = __mbrtowc (&wc,
Packit a4aae4
			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
Packit a4aae4
			       + byte_idx), remain_len, &pstr->cur_state);
Packit a4aae4
	  if (BE (mbclen < (size_t) -2, 1))
Packit a4aae4
	    {
Packit a4aae4
	      wchar_t wcu = __towupper (wc);
Packit a4aae4
	      if (wcu != wc)
Packit a4aae4
		{
Packit a4aae4
		  size_t mbcdlen;
Packit a4aae4
Packit a4aae4
		  mbcdlen = __wcrtomb (buf, wcu, &prev_st);
Packit a4aae4
		  if (BE (mbclen == mbcdlen, 1))
Packit a4aae4
		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit a4aae4
		  else
Packit a4aae4
		    {
Packit a4aae4
		      src_idx = byte_idx;
Packit a4aae4
		      goto offsets_needed;
Packit a4aae4
		    }
Packit a4aae4
		}
Packit a4aae4
	      else
Packit a4aae4
		memcpy (pstr->mbs + byte_idx,
Packit a4aae4
			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
Packit a4aae4
	      pstr->wcs[byte_idx++] = wcu;
Packit a4aae4
	      /* Write paddings.  */
Packit a4aae4
	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit a4aae4
		pstr->wcs[byte_idx++] = WEOF;
Packit a4aae4
	    }
Packit a4aae4
	  else if (mbclen == (size_t) -1 || mbclen == 0
Packit a4aae4
		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit a4aae4
	    {
Packit a4aae4
	      /* It is an invalid character, an incomplete character
Packit a4aae4
		 at the end of the string, or '\0'.  Just use the byte.  */
Packit a4aae4
	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
Packit a4aae4
	      pstr->mbs[byte_idx] = ch;
Packit a4aae4
	      /* And also cast it to wide char.  */
Packit a4aae4
	      pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit a4aae4
	      if (BE (mbclen == (size_t) -1, 0))
Packit a4aae4
		pstr->cur_state = prev_st;
Packit a4aae4
	    }
Packit a4aae4
	  else
Packit a4aae4
	    {
Packit a4aae4
	      /* The buffer doesn't have enough space, finish to build.  */
Packit a4aae4
	      pstr->cur_state = prev_st;
Packit a4aae4
	      break;
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
      pstr->valid_len = byte_idx;
Packit a4aae4
      pstr->valid_raw_len = byte_idx;
Packit a4aae4
      return REG_NOERROR;
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
Packit a4aae4
      {
Packit a4aae4
	wchar_t wc;
Packit a4aae4
	const char *p;
Packit a4aae4
      offsets_needed:
Packit a4aae4
	remain_len = end_idx - byte_idx;
Packit a4aae4
	prev_st = pstr->cur_state;
Packit a4aae4
	if (BE (pstr->trans != NULL, 0))
Packit a4aae4
	  {
Packit a4aae4
	    int i, ch;
Packit a4aae4
Packit a4aae4
	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
Packit a4aae4
	      {
Packit a4aae4
		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
Packit a4aae4
		buf[i] = pstr->trans[ch];
Packit a4aae4
	      }
Packit a4aae4
	    p = (const char *) buf;
Packit a4aae4
	  }
Packit a4aae4
	else
Packit a4aae4
	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
Packit a4aae4
	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
Packit a4aae4
	if (BE (mbclen < (size_t) -2, 1))
Packit a4aae4
	  {
Packit a4aae4
	    wchar_t wcu = __towupper (wc);
Packit a4aae4
	    if (wcu != wc)
Packit a4aae4
	      {
Packit a4aae4
		size_t mbcdlen;
Packit a4aae4
Packit a4aae4
		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
Packit a4aae4
		if (BE (mbclen == mbcdlen, 1))
Packit a4aae4
		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
Packit a4aae4
		else if (mbcdlen != (size_t) -1)
Packit a4aae4
		  {
Packit a4aae4
		    size_t i;
Packit a4aae4
Packit a4aae4
		    if (byte_idx + mbcdlen > pstr->bufs_len)
Packit a4aae4
		      {
Packit a4aae4
			pstr->cur_state = prev_st;
Packit a4aae4
			break;
Packit a4aae4
		      }
Packit a4aae4
Packit a4aae4
		    if (pstr->offsets == NULL)
Packit a4aae4
		      {
Packit a4aae4
			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
Packit a4aae4
Packit a4aae4
			if (pstr->offsets == NULL)
Packit a4aae4
			  return REG_ESPACE;
Packit a4aae4
		      }
Packit a4aae4
		    if (!pstr->offsets_needed)
Packit a4aae4
		      {
Packit a4aae4
			for (i = 0; i < (size_t) byte_idx; ++i)
Packit a4aae4
			  pstr->offsets[i] = i;
Packit a4aae4
			pstr->offsets_needed = 1;
Packit a4aae4
		      }
Packit a4aae4
Packit a4aae4
		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
Packit a4aae4
		    pstr->wcs[byte_idx] = wcu;
Packit a4aae4
		    pstr->offsets[byte_idx] = src_idx;
Packit a4aae4
		    for (i = 1; i < mbcdlen; ++i)
Packit a4aae4
		      {
Packit a4aae4
			pstr->offsets[byte_idx + i]
Packit a4aae4
			  = src_idx + (i < mbclen ? i : mbclen - 1);
Packit a4aae4
			pstr->wcs[byte_idx + i] = WEOF;
Packit a4aae4
		      }
Packit a4aae4
		    pstr->len += mbcdlen - mbclen;
Packit a4aae4
		    if (pstr->raw_stop > src_idx)
Packit a4aae4
		      pstr->stop += mbcdlen - mbclen;
Packit a4aae4
		    end_idx = (pstr->bufs_len > pstr->len)
Packit a4aae4
			      ? pstr->len : pstr->bufs_len;
Packit a4aae4
		    byte_idx += mbcdlen;
Packit a4aae4
		    src_idx += mbclen;
Packit a4aae4
		    continue;
Packit a4aae4
		  }
Packit a4aae4
		else
Packit a4aae4
		  memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit a4aae4
	      }
Packit a4aae4
	    else
Packit a4aae4
	      memcpy (pstr->mbs + byte_idx, p, mbclen);
Packit a4aae4
Packit a4aae4
	    if (BE (pstr->offsets_needed != 0, 0))
Packit a4aae4
	      {
Packit a4aae4
		size_t i;
Packit a4aae4
		for (i = 0; i < mbclen; ++i)
Packit a4aae4
		  pstr->offsets[byte_idx + i] = src_idx + i;
Packit a4aae4
	      }
Packit a4aae4
	    src_idx += mbclen;
Packit a4aae4
Packit a4aae4
	    pstr->wcs[byte_idx++] = wcu;
Packit a4aae4
	    /* Write paddings.  */
Packit a4aae4
	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
Packit a4aae4
	      pstr->wcs[byte_idx++] = WEOF;
Packit a4aae4
	  }
Packit a4aae4
	else if (mbclen == (size_t) -1 || mbclen == 0
Packit a4aae4
		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
Packit a4aae4
	  {
Packit a4aae4
	    /* It is an invalid character or '\0'.  Just use the byte.  */
Packit a4aae4
	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
Packit a4aae4
Packit a4aae4
	    if (BE (pstr->trans != NULL, 0))
Packit a4aae4
	      ch = pstr->trans [ch];
Packit a4aae4
	    pstr->mbs[byte_idx] = ch;
Packit a4aae4
Packit a4aae4
	    if (BE (pstr->offsets_needed != 0, 0))
Packit a4aae4
	      pstr->offsets[byte_idx] = src_idx;
Packit a4aae4
	    ++src_idx;
Packit a4aae4
Packit a4aae4
	    /* And also cast it to wide char.  */
Packit a4aae4
	    pstr->wcs[byte_idx++] = (wchar_t) ch;
Packit a4aae4
	    if (BE (mbclen == (size_t) -1, 0))
Packit a4aae4
	      pstr->cur_state = prev_st;
Packit a4aae4
	  }
Packit a4aae4
	else
Packit a4aae4
	  {
Packit a4aae4
	    /* The buffer doesn't have enough space, finish to build.  */
Packit a4aae4
	    pstr->cur_state = prev_st;
Packit a4aae4
	    break;
Packit a4aae4
	  }
Packit a4aae4
      }
Packit a4aae4
  pstr->valid_len = byte_idx;
Packit a4aae4
  pstr->valid_raw_len = src_idx;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
Packit a4aae4
   Return the index.  */
Packit a4aae4
Packit a4aae4
static Idx
Packit a4aae4
internal_function
Packit a4aae4
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
Packit a4aae4
{
Packit a4aae4
  mbstate_t prev_st;
Packit a4aae4
  Idx rawbuf_idx;
Packit a4aae4
  size_t mbclen;
Packit a4aae4
  wint_t wc = WEOF;
Packit a4aae4
Packit a4aae4
  /* Skip the characters which are not necessary to check.  */
Packit a4aae4
  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
Packit a4aae4
       rawbuf_idx < new_raw_idx;)
Packit a4aae4
    {
Packit a4aae4
      wchar_t wc2;
Packit a4aae4
      Idx remain_len = pstr->raw_len - rawbuf_idx;
Packit a4aae4
      prev_st = pstr->cur_state;
Packit a4aae4
      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
Packit a4aae4
			  remain_len, &pstr->cur_state);
Packit a4aae4
      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
Packit a4aae4
	{
Packit a4aae4
	  /* We treat these cases as a single byte character.  */
Packit a4aae4
	  if (mbclen == 0 || remain_len == 0)
Packit a4aae4
	    wc = L'\0';
Packit a4aae4
	  else
Packit a4aae4
	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
Packit a4aae4
	  mbclen = 1;
Packit a4aae4
	  pstr->cur_state = prev_st;
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	wc = wc2;
Packit a4aae4
      /* Then proceed the next character.  */
Packit a4aae4
      rawbuf_idx += mbclen;
Packit a4aae4
    }
Packit a4aae4
  *last_wc = wc;
Packit a4aae4
  return rawbuf_idx;
Packit a4aae4
}
Packit a4aae4
#endif /* RE_ENABLE_I18N  */
Packit a4aae4
Packit a4aae4
/* Build the buffer PSTR->MBS, and apply the translation if we need.
Packit a4aae4
   This function is used in case of REG_ICASE.  */
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
internal_function
Packit a4aae4
build_upper_buffer (re_string_t *pstr)
Packit a4aae4
{
Packit a4aae4
  Idx char_idx, end_idx;
Packit a4aae4
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit a4aae4
Packit a4aae4
  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
Packit a4aae4
    {
Packit a4aae4
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
Packit a4aae4
      if (BE (pstr->trans != NULL, 0))
Packit a4aae4
	ch = pstr->trans[ch];
Packit a4aae4
      pstr->mbs[char_idx] = toupper (ch);
Packit a4aae4
    }
Packit a4aae4
  pstr->valid_len = char_idx;
Packit a4aae4
  pstr->valid_raw_len = char_idx;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Apply TRANS to the buffer in PSTR.  */
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
internal_function
Packit a4aae4
re_string_translate_buffer (re_string_t *pstr)
Packit a4aae4
{
Packit a4aae4
  Idx buf_idx, end_idx;
Packit a4aae4
  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
Packit a4aae4
Packit a4aae4
  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
Packit a4aae4
    {
Packit a4aae4
      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
Packit a4aae4
      pstr->mbs[buf_idx] = pstr->trans[ch];
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  pstr->valid_len = buf_idx;
Packit a4aae4
  pstr->valid_raw_len = buf_idx;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* This function re-construct the buffers.
Packit a4aae4
   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
Packit a4aae4
   convert to upper case in case of REG_ICASE, apply translation.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
Packit a4aae4
{
Packit a4aae4
  Idx offset;
Packit a4aae4
Packit a4aae4
  if (BE (pstr->raw_mbs_idx <= idx, 0))
Packit a4aae4
    offset = idx - pstr->raw_mbs_idx;
Packit a4aae4
  else
Packit a4aae4
    {
Packit a4aae4
      /* Reset buffer.  */
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
      if (pstr->mb_cur_max > 1)
Packit a4aae4
	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit a4aae4
#endif /* RE_ENABLE_I18N */
Packit a4aae4
      pstr->len = pstr->raw_len;
Packit a4aae4
      pstr->stop = pstr->raw_stop;
Packit a4aae4
      pstr->valid_len = 0;
Packit a4aae4
      pstr->raw_mbs_idx = 0;
Packit a4aae4
      pstr->valid_raw_len = 0;
Packit a4aae4
      pstr->offsets_needed = 0;
Packit a4aae4
      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
Packit a4aae4
			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
Packit a4aae4
      if (!pstr->mbs_allocated)
Packit a4aae4
	pstr->mbs = (unsigned char *) pstr->raw_mbs;
Packit a4aae4
      offset = idx;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  if (BE (offset != 0, 1))
Packit a4aae4
    {
Packit a4aae4
      /* Should the already checked characters be kept?  */
Packit a4aae4
      if (BE (offset < pstr->valid_raw_len, 1))
Packit a4aae4
	{
Packit a4aae4
	  /* Yes, move them to the front of the buffer.  */
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
	  if (BE (pstr->offsets_needed, 0))
Packit a4aae4
	    {
Packit a4aae4
	      Idx low = 0, high = pstr->valid_len, mid;
Packit a4aae4
	      do
Packit a4aae4
		{
Packit a4aae4
		  mid = (high + low) / 2;
Packit a4aae4
		  if (pstr->offsets[mid] > offset)
Packit a4aae4
		    high = mid;
Packit a4aae4
		  else if (pstr->offsets[mid] < offset)
Packit a4aae4
		    low = mid + 1;
Packit a4aae4
		  else
Packit a4aae4
		    break;
Packit a4aae4
		}
Packit a4aae4
	      while (low < high);
Packit a4aae4
	      if (pstr->offsets[mid] < offset)
Packit a4aae4
		++mid;
Packit a4aae4
	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
Packit a4aae4
							eflags);
Packit a4aae4
	      /* This can be quite complicated, so handle specially
Packit a4aae4
		 only the common and easy case where the character with
Packit a4aae4
		 different length representation of lower and upper
Packit a4aae4
		 case is present at or after offset.  */
Packit a4aae4
	      if (pstr->valid_len > offset
Packit a4aae4
		  && mid == offset && pstr->offsets[mid] == offset)
Packit a4aae4
		{
Packit a4aae4
		  memmove (pstr->wcs, pstr->wcs + offset,
Packit a4aae4
			   (pstr->valid_len - offset) * sizeof (wint_t));
Packit a4aae4
		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
Packit a4aae4
		  pstr->valid_len -= offset;
Packit a4aae4
		  pstr->valid_raw_len -= offset;
Packit a4aae4
		  for (low = 0; low < pstr->valid_len; low++)
Packit a4aae4
		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
Packit a4aae4
		}
Packit a4aae4
	      else
Packit a4aae4
		{
Packit a4aae4
		  /* Otherwise, just find out how long the partial multibyte
Packit a4aae4
		     character at offset is and fill it with WEOF/255.  */
Packit a4aae4
		  pstr->len = pstr->raw_len - idx + offset;
Packit a4aae4
		  pstr->stop = pstr->raw_stop - idx + offset;
Packit a4aae4
		  pstr->offsets_needed = 0;
Packit a4aae4
		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
Packit a4aae4
		    --mid;
Packit a4aae4
		  while (mid < pstr->valid_len)
Packit a4aae4
		    if (pstr->wcs[mid] != WEOF)
Packit a4aae4
		      break;
Packit a4aae4
		    else
Packit a4aae4
		      ++mid;
Packit a4aae4
		  if (mid == pstr->valid_len)
Packit a4aae4
		    pstr->valid_len = 0;
Packit a4aae4
		  else
Packit a4aae4
		    {
Packit a4aae4
		      pstr->valid_len = pstr->offsets[mid] - offset;
Packit a4aae4
		      if (pstr->valid_len)
Packit a4aae4
			{
Packit a4aae4
			  for (low = 0; low < pstr->valid_len; ++low)
Packit a4aae4
			    pstr->wcs[low] = WEOF;
Packit a4aae4
			  memset (pstr->mbs, 255, pstr->valid_len);
Packit a4aae4
			}
Packit a4aae4
		    }
Packit a4aae4
		  pstr->valid_raw_len = pstr->valid_len;
Packit a4aae4
		}
Packit a4aae4
	    }
Packit a4aae4
	  else
Packit a4aae4
#endif
Packit a4aae4
	    {
Packit a4aae4
	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
Packit a4aae4
							eflags);
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
	      if (pstr->mb_cur_max > 1)
Packit a4aae4
		memmove (pstr->wcs, pstr->wcs + offset,
Packit a4aae4
			 (pstr->valid_len - offset) * sizeof (wint_t));
Packit a4aae4
#endif /* RE_ENABLE_I18N */
Packit a4aae4
	      if (BE (pstr->mbs_allocated, 0))
Packit a4aae4
		memmove (pstr->mbs, pstr->mbs + offset,
Packit a4aae4
			 pstr->valid_len - offset);
Packit a4aae4
	      pstr->valid_len -= offset;
Packit a4aae4
	      pstr->valid_raw_len -= offset;
Packit a4aae4
#if defined DEBUG && DEBUG
Packit a4aae4
	      assert (pstr->valid_len > 0);
Packit a4aae4
#endif
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	{
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
	  /* No, skip all characters until IDX.  */
Packit a4aae4
	  Idx prev_valid_len = pstr->valid_len;
Packit a4aae4
Packit a4aae4
	  if (BE (pstr->offsets_needed, 0))
Packit a4aae4
	    {
Packit a4aae4
	      pstr->len = pstr->raw_len - idx + offset;
Packit a4aae4
	      pstr->stop = pstr->raw_stop - idx + offset;
Packit a4aae4
	      pstr->offsets_needed = 0;
Packit a4aae4
	    }
Packit a4aae4
#endif
Packit a4aae4
	  pstr->valid_len = 0;
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
	  if (pstr->mb_cur_max > 1)
Packit a4aae4
	    {
Packit a4aae4
	      Idx wcs_idx;
Packit a4aae4
	      wint_t wc = WEOF;
Packit a4aae4
Packit a4aae4
	      if (pstr->is_utf8)
Packit a4aae4
		{
Packit a4aae4
		  const unsigned char *raw, *p, *end;
Packit a4aae4
Packit a4aae4
		  /* Special case UTF-8.  Multi-byte chars start with any
Packit a4aae4
		     byte other than 0x80 - 0xbf.  */
Packit a4aae4
		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
Packit a4aae4
		  end = raw + (offset - pstr->mb_cur_max);
Packit a4aae4
		  if (end < pstr->raw_mbs)
Packit a4aae4
		    end = pstr->raw_mbs;
Packit a4aae4
		  p = raw + offset - 1;
Packit a4aae4
#ifdef _LIBC
Packit a4aae4
		  /* We know the wchar_t encoding is UCS4, so for the simple
Packit a4aae4
		     case, ASCII characters, skip the conversion step.  */
Packit a4aae4
		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
Packit a4aae4
		    {
Packit a4aae4
		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
Packit a4aae4
		      /* pstr->valid_len = 0; */
Packit a4aae4
		      wc = (wchar_t) *p;
Packit a4aae4
		    }
Packit a4aae4
		  else
Packit a4aae4
#endif
Packit a4aae4
		    for (; p >= end; --p)
Packit a4aae4
		      if ((*p & 0xc0) != 0x80)
Packit a4aae4
			{
Packit a4aae4
			  mbstate_t cur_state;
Packit a4aae4
			  wchar_t wc2;
Packit a4aae4
			  Idx mlen = raw + pstr->len - p;
Packit a4aae4
			  unsigned char buf[6];
Packit a4aae4
			  size_t mbclen;
Packit a4aae4
Packit a4aae4
			  const unsigned char *pp = p;
Packit a4aae4
			  if (BE (pstr->trans != NULL, 0))
Packit a4aae4
			    {
Packit a4aae4
			      int i = mlen < 6 ? mlen : 6;
Packit a4aae4
			      while (--i >= 0)
Packit a4aae4
				buf[i] = pstr->trans[p[i]];
Packit a4aae4
			      pp = buf;
Packit a4aae4
			    }
Packit a4aae4
			  /* XXX Don't use mbrtowc, we know which conversion
Packit a4aae4
			     to use (UTF-8 -> UCS4).  */
Packit a4aae4
			  memset (&cur_state, 0, sizeof (cur_state));
Packit a4aae4
			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
Packit a4aae4
					      &cur_state);
Packit a4aae4
			  if (raw + offset - p <= mbclen
Packit a4aae4
			      && mbclen < (size_t) -2)
Packit a4aae4
			    {
Packit a4aae4
			      memset (&pstr->cur_state, '\0',
Packit a4aae4
				      sizeof (mbstate_t));
Packit a4aae4
			      pstr->valid_len = mbclen - (raw + offset - p);
Packit a4aae4
			      wc = wc2;
Packit a4aae4
			    }
Packit a4aae4
			  break;
Packit a4aae4
			}
Packit a4aae4
		}
Packit a4aae4
Packit a4aae4
	      if (wc == WEOF)
Packit a4aae4
		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
Packit a4aae4
	      if (wc == WEOF)
Packit a4aae4
		pstr->tip_context
Packit a4aae4
		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
Packit a4aae4
	      else
Packit a4aae4
		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
Packit a4aae4
				      && IS_WIDE_WORD_CHAR (wc))
Packit a4aae4
				     ? CONTEXT_WORD
Packit a4aae4
				     : ((IS_WIDE_NEWLINE (wc)
Packit a4aae4
					 && pstr->newline_anchor)
Packit a4aae4
					? CONTEXT_NEWLINE : 0));
Packit a4aae4
	      if (BE (pstr->valid_len, 0))
Packit a4aae4
		{
Packit a4aae4
		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
Packit a4aae4
		    pstr->wcs[wcs_idx] = WEOF;
Packit a4aae4
		  if (pstr->mbs_allocated)
Packit a4aae4
		    memset (pstr->mbs, 255, pstr->valid_len);
Packit a4aae4
		}
Packit a4aae4
	      pstr->valid_raw_len = pstr->valid_len;
Packit a4aae4
	    }
Packit a4aae4
	  else
Packit a4aae4
#endif /* RE_ENABLE_I18N */
Packit a4aae4
	    {
Packit a4aae4
	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
Packit a4aae4
	      pstr->valid_raw_len = 0;
Packit a4aae4
	      if (pstr->trans)
Packit a4aae4
		c = pstr->trans[c];
Packit a4aae4
	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
Packit a4aae4
				   ? CONTEXT_WORD
Packit a4aae4
				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
Packit a4aae4
				      ? CONTEXT_NEWLINE : 0));
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
      if (!BE (pstr->mbs_allocated, 0))
Packit a4aae4
	pstr->mbs += offset;
Packit a4aae4
    }
Packit a4aae4
  pstr->raw_mbs_idx = idx;
Packit a4aae4
  pstr->len -= offset;
Packit a4aae4
  pstr->stop -= offset;
Packit a4aae4
Packit a4aae4
  /* Then build the buffers.  */
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  if (pstr->mb_cur_max > 1)
Packit a4aae4
    {
Packit a4aae4
      if (pstr->icase)
Packit a4aae4
	{
Packit a4aae4
	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
Packit a4aae4
	  if (BE (ret != REG_NOERROR, 0))
Packit a4aae4
	    return ret;
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	build_wcs_buffer (pstr);
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
#endif /* RE_ENABLE_I18N */
Packit a4aae4
    if (BE (pstr->mbs_allocated, 0))
Packit a4aae4
      {
Packit a4aae4
	if (pstr->icase)
Packit a4aae4
	  build_upper_buffer (pstr);
Packit a4aae4
	else if (pstr->trans != NULL)
Packit a4aae4
	  re_string_translate_buffer (pstr);
Packit a4aae4
      }
Packit a4aae4
    else
Packit a4aae4
      pstr->valid_len = pstr->len;
Packit a4aae4
Packit a4aae4
  pstr->cur_idx = 0;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static unsigned char
Packit a4aae4
internal_function __attribute__ ((pure))
Packit a4aae4
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
Packit a4aae4
{
Packit a4aae4
  int ch;
Packit a4aae4
  Idx off;
Packit a4aae4
Packit a4aae4
  /* Handle the common (easiest) cases first.  */
Packit a4aae4
  if (BE (!pstr->mbs_allocated, 1))
Packit a4aae4
    return re_string_peek_byte (pstr, idx);
Packit a4aae4
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  if (pstr->mb_cur_max > 1
Packit a4aae4
      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
Packit a4aae4
    return re_string_peek_byte (pstr, idx);
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
  off = pstr->cur_idx + idx;
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  if (pstr->offsets_needed)
Packit a4aae4
    off = pstr->offsets[off];
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit a4aae4
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
Packit a4aae4
     this function returns CAPITAL LETTER I instead of first byte of
Packit a4aae4
     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
Packit a4aae4
     since peek_byte_case doesn't advance cur_idx in any way.  */
Packit a4aae4
  if (pstr->offsets_needed && !isascii (ch))
Packit a4aae4
    return re_string_peek_byte (pstr, idx);
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
  return ch;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static unsigned char
Packit a4aae4
internal_function
Packit a4aae4
re_string_fetch_byte_case (re_string_t *pstr)
Packit a4aae4
{
Packit a4aae4
  if (BE (!pstr->mbs_allocated, 1))
Packit a4aae4
    return re_string_fetch_byte (pstr);
Packit a4aae4
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  if (pstr->offsets_needed)
Packit a4aae4
    {
Packit a4aae4
      Idx off;
Packit a4aae4
      int ch;
Packit a4aae4
Packit a4aae4
      /* For tr_TR.UTF-8 [[:islower:]] there is
Packit a4aae4
	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
Packit a4aae4
	 in that case the whole multi-byte character and return
Packit a4aae4
	 the original letter.  On the other side, with
Packit a4aae4
	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
Packit a4aae4
	 anything else would complicate things too much.  */
Packit a4aae4
Packit a4aae4
      if (!re_string_first_byte (pstr, pstr->cur_idx))
Packit a4aae4
	return re_string_fetch_byte (pstr);
Packit a4aae4
Packit a4aae4
      off = pstr->offsets[pstr->cur_idx];
Packit a4aae4
      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
Packit a4aae4
Packit a4aae4
      if (! isascii (ch))
Packit a4aae4
	return re_string_fetch_byte (pstr);
Packit a4aae4
Packit a4aae4
      re_string_skip_bytes (pstr,
Packit a4aae4
			    re_string_char_size_at (pstr, pstr->cur_idx));
Packit a4aae4
      return ch;
Packit a4aae4
    }
Packit a4aae4
#endif
Packit a4aae4
Packit a4aae4
  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
internal_function
Packit a4aae4
re_string_destruct (re_string_t *pstr)
Packit a4aae4
{
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  re_free (pstr->wcs);
Packit a4aae4
  re_free (pstr->offsets);
Packit a4aae4
#endif /* RE_ENABLE_I18N  */
Packit a4aae4
  if (pstr->mbs_allocated)
Packit a4aae4
    re_free (pstr->mbs);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Return the context at IDX in INPUT.  */
Packit a4aae4
Packit a4aae4
static unsigned int
Packit a4aae4
internal_function
Packit a4aae4
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
Packit a4aae4
{
Packit a4aae4
  int c;
Packit a4aae4
  if (BE (idx < 0, 0))
Packit a4aae4
    /* In this case, we use the value stored in input->tip_context,
Packit a4aae4
       since we can't know the character in input->mbs[-1] here.  */
Packit a4aae4
    return input->tip_context;
Packit a4aae4
  if (BE (idx == input->len, 0))
Packit a4aae4
    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
Packit a4aae4
	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  if (input->mb_cur_max > 1)
Packit a4aae4
    {
Packit a4aae4
      wint_t wc;
Packit a4aae4
      Idx wc_idx = idx;
Packit a4aae4
      while(input->wcs[wc_idx] == WEOF)
Packit a4aae4
	{
Packit a4aae4
#if defined DEBUG && DEBUG
Packit a4aae4
	  /* It must not happen.  */
Packit a4aae4
	  assert (wc_idx >= 0);
Packit a4aae4
#endif
Packit a4aae4
	  --wc_idx;
Packit a4aae4
	  if (wc_idx < 0)
Packit a4aae4
	    return input->tip_context;
Packit a4aae4
	}
Packit a4aae4
      wc = input->wcs[wc_idx];
Packit a4aae4
      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
Packit a4aae4
	return CONTEXT_WORD;
Packit a4aae4
      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
Packit a4aae4
	      ? CONTEXT_NEWLINE : 0);
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
#endif
Packit a4aae4
    {
Packit a4aae4
      c = re_string_byte_at (input, idx);
Packit a4aae4
      if (bitset_contain (input->word_char, c))
Packit a4aae4
	return CONTEXT_WORD;
Packit a4aae4
      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
Packit a4aae4
    }
Packit a4aae4
}
Packit a4aae4

Packit a4aae4
/* Functions for set operation.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_alloc (re_node_set *set, Idx size)
Packit a4aae4
{
Packit a4aae4
  set->alloc = size;
Packit a4aae4
  set->nelem = 0;
Packit a4aae4
  set->elems = re_malloc (Idx, size);
Packit a4aae4
  if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
Packit a4aae4
    return REG_ESPACE;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_init_1 (re_node_set *set, Idx elem)
Packit a4aae4
{
Packit a4aae4
  set->alloc = 1;
Packit a4aae4
  set->nelem = 1;
Packit a4aae4
  set->elems = re_malloc (Idx, 1);
Packit a4aae4
  if (BE (set->elems == NULL, 0))
Packit a4aae4
    {
Packit a4aae4
      set->alloc = set->nelem = 0;
Packit a4aae4
      return REG_ESPACE;
Packit a4aae4
    }
Packit a4aae4
  set->elems[0] = elem;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
Packit a4aae4
{
Packit a4aae4
  set->alloc = 2;
Packit a4aae4
  set->elems = re_malloc (Idx, 2);
Packit a4aae4
  if (BE (set->elems == NULL, 0))
Packit a4aae4
    return REG_ESPACE;
Packit a4aae4
  if (elem1 == elem2)
Packit a4aae4
    {
Packit a4aae4
      set->nelem = 1;
Packit a4aae4
      set->elems[0] = elem1;
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
    {
Packit a4aae4
      set->nelem = 2;
Packit a4aae4
      if (elem1 < elem2)
Packit a4aae4
	{
Packit a4aae4
	  set->elems[0] = elem1;
Packit a4aae4
	  set->elems[1] = elem2;
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	{
Packit a4aae4
	  set->elems[0] = elem2;
Packit a4aae4
	  set->elems[1] = elem1;
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
Packit a4aae4
{
Packit a4aae4
  dest->nelem = src->nelem;
Packit a4aae4
  if (src->nelem > 0)
Packit a4aae4
    {
Packit a4aae4
      dest->alloc = dest->nelem;
Packit a4aae4
      dest->elems = re_malloc (Idx, dest->alloc);
Packit a4aae4
      if (BE (dest->elems == NULL, 0))
Packit a4aae4
	{
Packit a4aae4
	  dest->alloc = dest->nelem = 0;
Packit a4aae4
	  return REG_ESPACE;
Packit a4aae4
	}
Packit a4aae4
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
    re_node_set_init_empty (dest);
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
Packit a4aae4
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Packit a4aae4
   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
Packit a4aae4
			   const re_node_set *src2)
Packit a4aae4
{
Packit a4aae4
  Idx i1, i2, is, id, delta, sbase;
Packit a4aae4
  if (src1->nelem == 0 || src2->nelem == 0)
Packit a4aae4
    return REG_NOERROR;
Packit a4aae4
Packit a4aae4
  /* We need dest->nelem + 2 * elems_in_intersection; this is a
Packit a4aae4
     conservative estimate.  */
Packit a4aae4
  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
Packit a4aae4
    {
Packit a4aae4
      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
Packit a4aae4
      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
Packit a4aae4
      if (BE (new_elems == NULL, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
      dest->elems = new_elems;
Packit a4aae4
      dest->alloc = new_alloc;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* Find the items in the intersection of SRC1 and SRC2, and copy
Packit a4aae4
     into the top of DEST those that are not already in DEST itself.  */
Packit a4aae4
  sbase = dest->nelem + src1->nelem + src2->nelem;
Packit a4aae4
  i1 = src1->nelem - 1;
Packit a4aae4
  i2 = src2->nelem - 1;
Packit a4aae4
  id = dest->nelem - 1;
Packit a4aae4
  for (;;)
Packit a4aae4
    {
Packit a4aae4
      if (src1->elems[i1] == src2->elems[i2])
Packit a4aae4
	{
Packit a4aae4
	  /* Try to find the item in DEST.  Maybe we could binary search?  */
Packit a4aae4
	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
Packit a4aae4
	    --id;
Packit a4aae4
Packit a4aae4
	  if (id < 0 || dest->elems[id] != src1->elems[i1])
Packit a4aae4
            dest->elems[--sbase] = src1->elems[i1];
Packit a4aae4
Packit a4aae4
	  if (--i1 < 0 || --i2 < 0)
Packit a4aae4
	    break;
Packit a4aae4
	}
Packit a4aae4
Packit a4aae4
      /* Lower the highest of the two items.  */
Packit a4aae4
      else if (src1->elems[i1] < src2->elems[i2])
Packit a4aae4
	{
Packit a4aae4
	  if (--i2 < 0)
Packit a4aae4
	    break;
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	{
Packit a4aae4
	  if (--i1 < 0)
Packit a4aae4
	    break;
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  id = dest->nelem - 1;
Packit a4aae4
  is = dest->nelem + src1->nelem + src2->nelem - 1;
Packit a4aae4
  delta = is - sbase + 1;
Packit a4aae4
Packit a4aae4
  /* Now copy.  When DELTA becomes zero, the remaining
Packit a4aae4
     DEST elements are already in place; this is more or
Packit a4aae4
     less the same loop that is in re_node_set_merge.  */
Packit a4aae4
  dest->nelem += delta;
Packit a4aae4
  if (delta > 0 && id >= 0)
Packit a4aae4
    for (;;)
Packit a4aae4
      {
Packit a4aae4
	if (dest->elems[is] > dest->elems[id])
Packit a4aae4
	  {
Packit a4aae4
	    /* Copy from the top.  */
Packit a4aae4
	    dest->elems[id + delta--] = dest->elems[is--];
Packit a4aae4
	    if (delta == 0)
Packit a4aae4
	      break;
Packit a4aae4
	  }
Packit a4aae4
	else
Packit a4aae4
	  {
Packit a4aae4
	    /* Slide from the bottom.  */
Packit a4aae4
	    dest->elems[id + delta] = dest->elems[id];
Packit a4aae4
	    if (--id < 0)
Packit a4aae4
	      break;
Packit a4aae4
	  }
Packit a4aae4
      }
Packit a4aae4
Packit a4aae4
  /* Copy remaining SRC elements.  */
Packit a4aae4
  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
Packit a4aae4
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
Packit a4aae4
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
Packit a4aae4
			const re_node_set *src2)
Packit a4aae4
{
Packit a4aae4
  Idx i1, i2, id;
Packit a4aae4
  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
Packit a4aae4
    {
Packit a4aae4
      dest->alloc = src1->nelem + src2->nelem;
Packit a4aae4
      dest->elems = re_malloc (Idx, dest->alloc);
Packit a4aae4
      if (BE (dest->elems == NULL, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
    {
Packit a4aae4
      if (src1 != NULL && src1->nelem > 0)
Packit a4aae4
	return re_node_set_init_copy (dest, src1);
Packit a4aae4
      else if (src2 != NULL && src2->nelem > 0)
Packit a4aae4
	return re_node_set_init_copy (dest, src2);
Packit a4aae4
      else
Packit a4aae4
	re_node_set_init_empty (dest);
Packit a4aae4
      return REG_NOERROR;
Packit a4aae4
    }
Packit a4aae4
  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
Packit a4aae4
    {
Packit a4aae4
      if (src1->elems[i1] > src2->elems[i2])
Packit a4aae4
	{
Packit a4aae4
	  dest->elems[id++] = src2->elems[i2++];
Packit a4aae4
	  continue;
Packit a4aae4
	}
Packit a4aae4
      if (src1->elems[i1] == src2->elems[i2])
Packit a4aae4
	++i2;
Packit a4aae4
      dest->elems[id++] = src1->elems[i1++];
Packit a4aae4
    }
Packit a4aae4
  if (i1 < src1->nelem)
Packit a4aae4
    {
Packit a4aae4
      memcpy (dest->elems + id, src1->elems + i1,
Packit a4aae4
	     (src1->nelem - i1) * sizeof (Idx));
Packit a4aae4
      id += src1->nelem - i1;
Packit a4aae4
    }
Packit a4aae4
  else if (i2 < src2->nelem)
Packit a4aae4
    {
Packit a4aae4
      memcpy (dest->elems + id, src2->elems + i2,
Packit a4aae4
	     (src2->nelem - i2) * sizeof (Idx));
Packit a4aae4
      id += src2->nelem - i2;
Packit a4aae4
    }
Packit a4aae4
  dest->nelem = id;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Calculate the union set of the sets DEST and SRC. And store it to
Packit a4aae4
   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_merge (re_node_set *dest, const re_node_set *src)
Packit a4aae4
{
Packit a4aae4
  Idx is, id, sbase, delta;
Packit a4aae4
  if (src == NULL || src->nelem == 0)
Packit a4aae4
    return REG_NOERROR;
Packit a4aae4
  if (dest->alloc < 2 * src->nelem + dest->nelem)
Packit a4aae4
    {
Packit a4aae4
      Idx new_alloc = 2 * (src->nelem + dest->alloc);
Packit a4aae4
      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
Packit a4aae4
      if (BE (new_buffer == NULL, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
      dest->elems = new_buffer;
Packit a4aae4
      dest->alloc = new_alloc;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  if (BE (dest->nelem == 0, 0))
Packit a4aae4
    {
Packit a4aae4
      dest->nelem = src->nelem;
Packit a4aae4
      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
Packit a4aae4
      return REG_NOERROR;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* Copy into the top of DEST the items of SRC that are not
Packit a4aae4
     found in DEST.  Maybe we could binary search in DEST?  */
Packit a4aae4
  for (sbase = dest->nelem + 2 * src->nelem,
Packit a4aae4
       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
Packit a4aae4
    {
Packit a4aae4
      if (dest->elems[id] == src->elems[is])
Packit a4aae4
	is--, id--;
Packit a4aae4
      else if (dest->elems[id] < src->elems[is])
Packit a4aae4
	dest->elems[--sbase] = src->elems[is--];
Packit a4aae4
      else /* if (dest->elems[id] > src->elems[is]) */
Packit a4aae4
	--id;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  if (is >= 0)
Packit a4aae4
    {
Packit a4aae4
      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
Packit a4aae4
      sbase -= is + 1;
Packit a4aae4
      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  id = dest->nelem - 1;
Packit a4aae4
  is = dest->nelem + 2 * src->nelem - 1;
Packit a4aae4
  delta = is - sbase + 1;
Packit a4aae4
  if (delta == 0)
Packit a4aae4
    return REG_NOERROR;
Packit a4aae4
Packit a4aae4
  /* Now copy.  When DELTA becomes zero, the remaining
Packit a4aae4
     DEST elements are already in place.  */
Packit a4aae4
  dest->nelem += delta;
Packit a4aae4
  for (;;)
Packit a4aae4
    {
Packit a4aae4
      if (dest->elems[is] > dest->elems[id])
Packit a4aae4
	{
Packit a4aae4
	  /* Copy from the top.  */
Packit a4aae4
	  dest->elems[id + delta--] = dest->elems[is--];
Packit a4aae4
	  if (delta == 0)
Packit a4aae4
	    break;
Packit a4aae4
	}
Packit a4aae4
      else
Packit a4aae4
	{
Packit a4aae4
	  /* Slide from the bottom.  */
Packit a4aae4
	  dest->elems[id + delta] = dest->elems[id];
Packit a4aae4
	  if (--id < 0)
Packit a4aae4
	    {
Packit a4aae4
	      /* Copy remaining SRC elements.  */
Packit a4aae4
	      memcpy (dest->elems, dest->elems + sbase,
Packit a4aae4
		      delta * sizeof (Idx));
Packit a4aae4
	      break;
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Insert the new element ELEM to the re_node_set* SET.
Packit a4aae4
   SET should not already have ELEM.
Packit a4aae4
   Return true if successful.  */
Packit a4aae4
Packit a4aae4
static bool
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_insert (re_node_set *set, Idx elem)
Packit a4aae4
{
Packit a4aae4
  Idx idx;
Packit a4aae4
  /* In case the set is empty.  */
Packit a4aae4
  if (set->alloc == 0)
Packit a4aae4
    return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
Packit a4aae4
Packit a4aae4
  if (BE (set->nelem, 0) == 0)
Packit a4aae4
    {
Packit a4aae4
      /* We already guaranteed above that set->alloc != 0.  */
Packit a4aae4
      set->elems[0] = elem;
Packit a4aae4
      ++set->nelem;
Packit a4aae4
      return true;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* Realloc if we need.  */
Packit a4aae4
  if (set->alloc == set->nelem)
Packit a4aae4
    {
Packit a4aae4
      Idx *new_elems;
Packit a4aae4
      set->alloc = set->alloc * 2;
Packit a4aae4
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit a4aae4
      if (BE (new_elems == NULL, 0))
Packit a4aae4
	return false;
Packit a4aae4
      set->elems = new_elems;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* Move the elements which follows the new element.  Test the
Packit a4aae4
     first element separately to skip a check in the inner loop.  */
Packit a4aae4
  if (elem < set->elems[0])
Packit a4aae4
    {
Packit a4aae4
      idx = 0;
Packit a4aae4
      for (idx = set->nelem; idx > 0; idx--)
Packit a4aae4
	set->elems[idx] = set->elems[idx - 1];
Packit a4aae4
    }
Packit a4aae4
  else
Packit a4aae4
    {
Packit a4aae4
      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
Packit a4aae4
	set->elems[idx] = set->elems[idx - 1];
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* Insert the new element.  */
Packit a4aae4
  set->elems[idx] = elem;
Packit a4aae4
  ++set->nelem;
Packit a4aae4
  return true;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Insert the new element ELEM to the re_node_set* SET.
Packit a4aae4
   SET should not already have any element greater than or equal to ELEM.
Packit a4aae4
   Return true if successful.  */
Packit a4aae4
Packit a4aae4
static bool
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_node_set_insert_last (re_node_set *set, Idx elem)
Packit a4aae4
{
Packit a4aae4
  /* Realloc if we need.  */
Packit a4aae4
  if (set->alloc == set->nelem)
Packit a4aae4
    {
Packit a4aae4
      Idx *new_elems;
Packit a4aae4
      set->alloc = (set->alloc + 1) * 2;
Packit a4aae4
      new_elems = re_realloc (set->elems, Idx, set->alloc);
Packit a4aae4
      if (BE (new_elems == NULL, 0))
Packit a4aae4
	return false;
Packit a4aae4
      set->elems = new_elems;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* Insert the new element.  */
Packit a4aae4
  set->elems[set->nelem++] = elem;
Packit a4aae4
  return true;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Compare two node sets SET1 and SET2.
Packit a4aae4
   Return true if SET1 and SET2 are equivalent.  */
Packit a4aae4
Packit a4aae4
static bool
Packit a4aae4
internal_function __attribute__ ((pure))
Packit a4aae4
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
Packit a4aae4
{
Packit a4aae4
  Idx i;
Packit a4aae4
  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
Packit a4aae4
    return false;
Packit a4aae4
  for (i = set1->nelem ; --i >= 0 ; )
Packit a4aae4
    if (set1->elems[i] != set2->elems[i])
Packit a4aae4
      return false;
Packit a4aae4
  return true;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
Packit a4aae4
Packit a4aae4
static Idx
Packit a4aae4
internal_function __attribute__ ((pure))
Packit a4aae4
re_node_set_contains (const re_node_set *set, Idx elem)
Packit a4aae4
{
Packit a4aae4
  __re_size_t idx, right, mid;
Packit a4aae4
  if (set->nelem <= 0)
Packit a4aae4
    return 0;
Packit a4aae4
Packit a4aae4
  /* Binary search the element.  */
Packit a4aae4
  idx = 0;
Packit a4aae4
  right = set->nelem - 1;
Packit a4aae4
  while (idx < right)
Packit a4aae4
    {
Packit a4aae4
      mid = (idx + right) / 2;
Packit a4aae4
      if (set->elems[mid] < elem)
Packit a4aae4
	idx = mid + 1;
Packit a4aae4
      else
Packit a4aae4
	right = mid;
Packit a4aae4
    }
Packit a4aae4
  return set->elems[idx] == elem ? idx + 1 : 0;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
internal_function
Packit a4aae4
re_node_set_remove_at (re_node_set *set, Idx idx)
Packit a4aae4
{
Packit a4aae4
  if (idx < 0 || idx >= set->nelem)
Packit a4aae4
    return;
Packit a4aae4
  --set->nelem;
Packit a4aae4
  for (; idx < set->nelem; idx++)
Packit a4aae4
    set->elems[idx] = set->elems[idx + 1];
Packit a4aae4
}
Packit a4aae4

Packit a4aae4
Packit a4aae4
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
Packit a4aae4
   Or return -1 if an error occurred.  */
Packit a4aae4
Packit a4aae4
static Idx
Packit a4aae4
internal_function
Packit a4aae4
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
Packit a4aae4
{
Packit a4aae4
  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
Packit a4aae4
    {
Packit a4aae4
      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
Packit a4aae4
      Idx *new_nexts, *new_indices;
Packit a4aae4
      re_node_set *new_edests, *new_eclosures;
Packit a4aae4
      re_token_t *new_nodes;
Packit a4aae4
Packit a4aae4
      /* Avoid overflows in realloc.  */
Packit a4aae4
      const size_t max_object_size = MAX (sizeof (re_token_t),
Packit a4aae4
					  MAX (sizeof (re_node_set),
Packit a4aae4
					       sizeof (Idx)));
Packit a4aae4
      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
Packit a4aae4
	return -1;
Packit a4aae4
Packit a4aae4
      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
Packit a4aae4
      if (BE (new_nodes == NULL, 0))
Packit a4aae4
	return -1;
Packit a4aae4
      dfa->nodes = new_nodes;
Packit a4aae4
      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
Packit a4aae4
      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
Packit a4aae4
      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
Packit a4aae4
      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
Packit a4aae4
      if (BE (new_nexts == NULL || new_indices == NULL
Packit a4aae4
	      || new_edests == NULL || new_eclosures == NULL, 0))
Packit a4aae4
	{
Packit a4aae4
	   re_free (new_nexts);
Packit a4aae4
	   re_free (new_indices);
Packit a4aae4
	   re_free (new_edests);
Packit a4aae4
	   re_free (new_eclosures);
Packit a4aae4
	   return -1;
Packit a4aae4
	}
Packit a4aae4
      dfa->nexts = new_nexts;
Packit a4aae4
      dfa->org_indices = new_indices;
Packit a4aae4
      dfa->edests = new_edests;
Packit a4aae4
      dfa->eclosures = new_eclosures;
Packit a4aae4
      dfa->nodes_alloc = new_nodes_alloc;
Packit a4aae4
    }
Packit a4aae4
  dfa->nodes[dfa->nodes_len] = token;
Packit a4aae4
  dfa->nodes[dfa->nodes_len].constraint = 0;
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
  dfa->nodes[dfa->nodes_len].accept_mb =
Packit a4aae4
    ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
Packit a4aae4
     || token.type == COMPLEX_BRACKET);
Packit a4aae4
#endif
Packit a4aae4
  dfa->nexts[dfa->nodes_len] = -1;
Packit a4aae4
  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
Packit a4aae4
  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
Packit a4aae4
  return dfa->nodes_len++;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static re_hashval_t
Packit a4aae4
internal_function
Packit a4aae4
calc_state_hash (const re_node_set *nodes, unsigned int context)
Packit a4aae4
{
Packit a4aae4
  re_hashval_t hash = nodes->nelem + context;
Packit a4aae4
  Idx i;
Packit a4aae4
  for (i = 0 ; i < nodes->nelem ; i++)
Packit a4aae4
    hash += nodes->elems[i];
Packit a4aae4
  return hash;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Search for the state whose node_set is equivalent to NODES.
Packit a4aae4
   Return the pointer to the state, if we found it in the DFA.
Packit a4aae4
   Otherwise create the new one and return it.  In case of an error
Packit a4aae4
   return NULL and set the error code in ERR.
Packit a4aae4
   Note: - We assume NULL as the invalid state, then it is possible that
Packit a4aae4
	   return value is NULL and ERR is REG_NOERROR.
Packit a4aae4
	 - We never return non-NULL value in case of any errors, it is for
Packit a4aae4
	   optimization.  */
Packit a4aae4
Packit a4aae4
static re_dfastate_t *
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
Packit a4aae4
		  const re_node_set *nodes)
Packit a4aae4
{
Packit a4aae4
  re_hashval_t hash;
Packit a4aae4
  re_dfastate_t *new_state;
Packit a4aae4
  struct re_state_table_entry *spot;
Packit a4aae4
  Idx i;
Packit a4aae4
#if defined GCC_LINT || defined lint
Packit a4aae4
  /* Suppress bogus uninitialized-variable warnings.  */
Packit a4aae4
  *err = REG_NOERROR;
Packit a4aae4
#endif
Packit a4aae4
  if (BE (nodes->nelem == 0, 0))
Packit a4aae4
    {
Packit a4aae4
      *err = REG_NOERROR;
Packit a4aae4
      return NULL;
Packit a4aae4
    }
Packit a4aae4
  hash = calc_state_hash (nodes, 0);
Packit a4aae4
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit a4aae4
Packit a4aae4
  for (i = 0 ; i < spot->num ; i++)
Packit a4aae4
    {
Packit a4aae4
      re_dfastate_t *state = spot->array[i];
Packit a4aae4
      if (hash != state->hash)
Packit a4aae4
	continue;
Packit a4aae4
      if (re_node_set_compare (&state->nodes, nodes))
Packit a4aae4
	return state;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  /* There are no appropriate state in the dfa, create the new one.  */
Packit a4aae4
  new_state = create_ci_newstate (dfa, nodes, hash);
Packit a4aae4
  if (BE (new_state == NULL, 0))
Packit a4aae4
    *err = REG_ESPACE;
Packit a4aae4
Packit a4aae4
  return new_state;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Search for the state whose node_set is equivalent to NODES and
Packit a4aae4
   whose context is equivalent to CONTEXT.
Packit a4aae4
   Return the pointer to the state, if we found it in the DFA.
Packit a4aae4
   Otherwise create the new one and return it.  In case of an error
Packit a4aae4
   return NULL and set the error code in ERR.
Packit a4aae4
   Note: - We assume NULL as the invalid state, then it is possible that
Packit a4aae4
	   return value is NULL and ERR is REG_NOERROR.
Packit a4aae4
	 - We never return non-NULL value in case of any errors, it is for
Packit a4aae4
	   optimization.  */
Packit a4aae4
Packit a4aae4
static re_dfastate_t *
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
Packit a4aae4
			  const re_node_set *nodes, unsigned int context)
Packit a4aae4
{
Packit a4aae4
  re_hashval_t hash;
Packit a4aae4
  re_dfastate_t *new_state;
Packit a4aae4
  struct re_state_table_entry *spot;
Packit a4aae4
  Idx i;
Packit a4aae4
#if defined GCC_LINT || defined lint
Packit a4aae4
  /* Suppress bogus uninitialized-variable warnings.  */
Packit a4aae4
  *err = REG_NOERROR;
Packit a4aae4
#endif
Packit a4aae4
  if (nodes->nelem == 0)
Packit a4aae4
    {
Packit a4aae4
      *err = REG_NOERROR;
Packit a4aae4
      return NULL;
Packit a4aae4
    }
Packit a4aae4
  hash = calc_state_hash (nodes, context);
Packit a4aae4
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit a4aae4
Packit a4aae4
  for (i = 0 ; i < spot->num ; i++)
Packit a4aae4
    {
Packit a4aae4
      re_dfastate_t *state = spot->array[i];
Packit a4aae4
      if (state->hash == hash
Packit a4aae4
	  && state->context == context
Packit a4aae4
	  && re_node_set_compare (state->entrance_nodes, nodes))
Packit a4aae4
	return state;
Packit a4aae4
    }
Packit a4aae4
  /* There are no appropriate state in 'dfa', create the new one.  */
Packit a4aae4
  new_state = create_cd_newstate (dfa, nodes, context, hash);
Packit a4aae4
  if (BE (new_state == NULL, 0))
Packit a4aae4
    *err = REG_ESPACE;
Packit a4aae4
Packit a4aae4
  return new_state;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Finish initialization of the new state NEWSTATE, and using its hash value
Packit a4aae4
   HASH put in the appropriate bucket of DFA's state table.  Return value
Packit a4aae4
   indicates the error code if failed.  */
Packit a4aae4
Packit a4aae4
static reg_errcode_t
Packit a4aae4
__attribute_warn_unused_result__
Packit a4aae4
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
Packit a4aae4
		re_hashval_t hash)
Packit a4aae4
{
Packit a4aae4
  struct re_state_table_entry *spot;
Packit a4aae4
  reg_errcode_t err;
Packit a4aae4
  Idx i;
Packit a4aae4
Packit a4aae4
  newstate->hash = hash;
Packit a4aae4
  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
Packit a4aae4
  if (BE (err != REG_NOERROR, 0))
Packit a4aae4
    return REG_ESPACE;
Packit a4aae4
  for (i = 0; i < newstate->nodes.nelem; i++)
Packit a4aae4
    {
Packit a4aae4
      Idx elem = newstate->nodes.elems[i];
Packit a4aae4
      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
Packit a4aae4
	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
Packit a4aae4
	  return REG_ESPACE;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  spot = dfa->state_table + (hash & dfa->state_hash_mask);
Packit a4aae4
  if (BE (spot->alloc <= spot->num, 0))
Packit a4aae4
    {
Packit a4aae4
      Idx new_alloc = 2 * spot->num + 2;
Packit a4aae4
      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
Packit a4aae4
					      new_alloc);
Packit a4aae4
      if (BE (new_array == NULL, 0))
Packit a4aae4
	return REG_ESPACE;
Packit a4aae4
      spot->array = new_array;
Packit a4aae4
      spot->alloc = new_alloc;
Packit a4aae4
    }
Packit a4aae4
  spot->array[spot->num++] = newstate;
Packit a4aae4
  return REG_NOERROR;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
static void
Packit a4aae4
free_state (re_dfastate_t *state)
Packit a4aae4
{
Packit a4aae4
  re_node_set_free (&state->non_eps_nodes);
Packit a4aae4
  re_node_set_free (&state->inveclosure);
Packit a4aae4
  if (state->entrance_nodes != &state->nodes)
Packit a4aae4
    {
Packit a4aae4
      re_node_set_free (state->entrance_nodes);
Packit a4aae4
      re_free (state->entrance_nodes);
Packit a4aae4
    }
Packit a4aae4
  re_node_set_free (&state->nodes);
Packit a4aae4
  re_free (state->word_trtable);
Packit a4aae4
  re_free (state->trtable);
Packit a4aae4
  re_free (state);
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Create the new state which is independent of contexts.
Packit a4aae4
   Return the new state if succeeded, otherwise return NULL.  */
Packit a4aae4
Packit a4aae4
static re_dfastate_t *
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit a4aae4
		    re_hashval_t hash)
Packit a4aae4
{
Packit a4aae4
  Idx i;
Packit a4aae4
  reg_errcode_t err;
Packit a4aae4
  re_dfastate_t *newstate;
Packit a4aae4
Packit a4aae4
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit a4aae4
  if (BE (newstate == NULL, 0))
Packit a4aae4
    return NULL;
Packit a4aae4
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit a4aae4
  if (BE (err != REG_NOERROR, 0))
Packit a4aae4
    {
Packit a4aae4
      re_free (newstate);
Packit a4aae4
      return NULL;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  newstate->entrance_nodes = &newstate->nodes;
Packit a4aae4
  for (i = 0 ; i < nodes->nelem ; i++)
Packit a4aae4
    {
Packit a4aae4
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit a4aae4
      re_token_type_t type = node->type;
Packit a4aae4
      if (type == CHARACTER && !node->constraint)
Packit a4aae4
	continue;
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
      newstate->accept_mb |= node->accept_mb;
Packit a4aae4
#endif /* RE_ENABLE_I18N */
Packit a4aae4
Packit a4aae4
      /* If the state has the halt node, the state is a halt state.  */
Packit a4aae4
      if (type == END_OF_RE)
Packit a4aae4
	newstate->halt = 1;
Packit a4aae4
      else if (type == OP_BACK_REF)
Packit a4aae4
	newstate->has_backref = 1;
Packit a4aae4
      else if (type == ANCHOR || node->constraint)
Packit a4aae4
	newstate->has_constraint = 1;
Packit a4aae4
    }
Packit a4aae4
  err = register_state (dfa, newstate, hash);
Packit a4aae4
  if (BE (err != REG_NOERROR, 0))
Packit a4aae4
    {
Packit a4aae4
      free_state (newstate);
Packit a4aae4
      newstate = NULL;
Packit a4aae4
    }
Packit a4aae4
  return newstate;
Packit a4aae4
}
Packit a4aae4
Packit a4aae4
/* Create the new state which is depend on the context CONTEXT.
Packit a4aae4
   Return the new state if succeeded, otherwise return NULL.  */
Packit a4aae4
Packit a4aae4
static re_dfastate_t *
Packit a4aae4
internal_function __attribute_warn_unused_result__
Packit a4aae4
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
Packit a4aae4
		    unsigned int context, re_hashval_t hash)
Packit a4aae4
{
Packit a4aae4
  Idx i, nctx_nodes = 0;
Packit a4aae4
  reg_errcode_t err;
Packit a4aae4
  re_dfastate_t *newstate;
Packit a4aae4
Packit a4aae4
  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
Packit a4aae4
  if (BE (newstate == NULL, 0))
Packit a4aae4
    return NULL;
Packit a4aae4
  err = re_node_set_init_copy (&newstate->nodes, nodes);
Packit a4aae4
  if (BE (err != REG_NOERROR, 0))
Packit a4aae4
    {
Packit a4aae4
      re_free (newstate);
Packit a4aae4
      return NULL;
Packit a4aae4
    }
Packit a4aae4
Packit a4aae4
  newstate->context = context;
Packit a4aae4
  newstate->entrance_nodes = &newstate->nodes;
Packit a4aae4
Packit a4aae4
  for (i = 0 ; i < nodes->nelem ; i++)
Packit a4aae4
    {
Packit a4aae4
      re_token_t *node = dfa->nodes + nodes->elems[i];
Packit a4aae4
      re_token_type_t type = node->type;
Packit a4aae4
      unsigned int constraint = node->constraint;
Packit a4aae4
Packit a4aae4
      if (type == CHARACTER && !constraint)
Packit a4aae4
	continue;
Packit a4aae4
#ifdef RE_ENABLE_I18N
Packit a4aae4
      newstate->accept_mb |= node->accept_mb;
Packit a4aae4
#endif /* RE_ENABLE_I18N */
Packit a4aae4
Packit a4aae4
      /* If the state has the halt node, the state is a halt state.  */
Packit a4aae4
      if (type == END_OF_RE)
Packit a4aae4
	newstate->halt = 1;
Packit a4aae4
      else if (type == OP_BACK_REF)
Packit a4aae4
	newstate->has_backref = 1;
Packit a4aae4
Packit a4aae4
      if (constraint)
Packit a4aae4
	{
Packit a4aae4
	  if (newstate->entrance_nodes == &newstate->nodes)
Packit a4aae4
	    {
Packit a4aae4
	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
Packit a4aae4
	      if (BE (newstate->entrance_nodes == NULL, 0))
Packit a4aae4
		{
Packit a4aae4
		  free_state (newstate);
Packit a4aae4
		  return NULL;
Packit a4aae4
		}
Packit a4aae4
	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
Packit a4aae4
		  != REG_NOERROR)
Packit a4aae4
		return NULL;
Packit a4aae4
	      nctx_nodes = 0;
Packit a4aae4
	      newstate->has_constraint = 1;
Packit a4aae4
	    }
Packit a4aae4
Packit a4aae4
	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
Packit a4aae4
	    {
Packit a4aae4
	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
Packit a4aae4
	      ++nctx_nodes;
Packit a4aae4
	    }
Packit a4aae4
	}
Packit a4aae4
    }
Packit a4aae4
  err = register_state (dfa, newstate, hash);
Packit a4aae4
  if (BE (err != REG_NOERROR, 0))
Packit a4aae4
    {
Packit a4aae4
      free_state (newstate);
Packit a4aae4
      newstate = NULL;
Packit a4aae4
    }
Packit a4aae4
  return  newstate;
Packit a4aae4
}