Blob Blame History Raw
/* shmbutil.h -- utility functions for multibyte characters. */

/* Copyright (C) 2002-2004 Free Software Foundation, Inc.

   This file is part of GNU Bash, the Bourne Again SHell.

   Bash is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   Bash is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bash.  If not, see <http://www.gnu.org/licenses/>.
*/
                                 
#if !defined (_SH_MBUTIL_H_)
#define _SH_MBUTIL_H_

#include "stdc.h"

/* Include config.h for HANDLE_MULTIBYTE */
#include <config.h>

#if defined (HANDLE_MULTIBYTE)
#include "shmbchar.h"

extern size_t xmbsrtowcs __P((wchar_t *, const char **, size_t, mbstate_t *));
extern size_t xdupmbstowcs __P((wchar_t **, char ***, const char *));

extern size_t mbstrlen __P((const char *));

extern char *xstrchr __P((const char *, int));

extern int locale_mb_cur_max;	/* XXX */

#ifndef MB_INVALIDCH
#define MB_INVALIDCH(x)		((x) == (size_t)-1 || (x) == (size_t)-2)
#define MB_NULLWCH(x)		((x) == 0)
#endif

#define MBSLEN(s)	(((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0)
#define MB_STRLEN(s)	((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s))

#define MBLEN(s, n)	((MB_CUR_MAX > 1) ? mblen ((s), (n)) : 1)
#define MBRLEN(s, n, p)	((MB_CUR_MAX > 1) ? mbrlen ((s), (n), (p)) : 1)

#else /* !HANDLE_MULTIBYTE */

#undef MB_LEN_MAX
#undef MB_CUR_MAX

#define MB_LEN_MAX	1
#define MB_CUR_MAX	1

#undef xstrchr
#define xstrchr(s, c)	strchr(s, c)

#ifndef MB_INVALIDCH
#define MB_INVALIDCH(x)		(0)
#define MB_NULLWCH(x)		(0)
#endif

#define MB_STRLEN(s)		(STRLEN(s))

#define MBLEN(s, n)		1
#define MBRLEN(s, n, p)		1

#ifndef wchar_t
#  define wchar_t	int
#endif

#endif /* !HANDLE_MULTIBYTE */

/* Declare and initialize a multibyte state.  Call must be terminated
   with `;'. */
#if defined (HANDLE_MULTIBYTE)
#  define DECLARE_MBSTATE \
	mbstate_t state; \
	memset (&state, '\0', sizeof (mbstate_t))
#else
#  define DECLARE_MBSTATE
#endif  /* !HANDLE_MULTIBYTE */

/* Initialize or reinitialize a multibyte state named `state'.  Call must be
   terminated with `;'. */
#if defined (HANDLE_MULTIBYTE)
#  define INITIALIZE_MBSTATE memset (&state, '\0', sizeof (mbstate_t))
#else
#  define INITIALIZE_MBSTATE
#endif  /* !HANDLE_MULTIBYTE */

/* Advance one (possibly multi-byte) character in string _STR of length
   _STRSIZE, starting at index _I.  STATE must have already been declared. */
#if defined (HANDLE_MULTIBYTE)
#  define ADVANCE_CHAR(_str, _strsize, _i) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _f; \
\
	    _f = is_basic ((_str)[_i]); \
	    if (_f) \
	      mblength = 1; \
	    else \
	      { \
	        state_bak = state; \
	        mblength = mbrlen ((_str) + (_i), (_strsize) - (_i), &state); \
	      } \
\
	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
	      { \
		state = state_bak; \
		(_i)++; \
	      } \
	    else if (mblength == 0) \
	      (_i)++; \
	    else \
	      (_i) += mblength; \
	  } \
	else \
	  (_i)++; \
      } \
    while (0)
#else
#  define ADVANCE_CHAR(_str, _strsize, _i)	(_i)++
#endif  /* !HANDLE_MULTIBYTE */

/* Advance one (possibly multibyte) character in the string _STR of length
   _STRSIZE.
   SPECIAL:  assume that _STR will be incremented by 1 after this call. */
#if defined (HANDLE_MULTIBYTE)
#  define ADVANCE_CHAR_P(_str, _strsize) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _f; \
\
	    _f = is_basic (*(_str)); \
	    if (_f) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_str), (_strsize), &state); \
	      } \
\
	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    else \
	      (_str) += (mblength < 1) ? 0 : (mblength - 1); \
	  } \
      } \
    while (0)
#else
#  define ADVANCE_CHAR_P(_str, _strsize)
#endif  /* !HANDLE_MULTIBYTE */

/* Back up one (possibly multi-byte) character in string _STR of length
   _STRSIZE, starting at index _I.  STATE must have already been declared. */
#if defined (HANDLE_MULTIBYTE)
#  define BACKUP_CHAR(_str, _strsize, _i) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _x, _p; /* _x == temp index into string, _p == prev index */ \
\
	    _x = _p = 0; \
	    while (_x < (_i)) \
	      { \
	        state_bak = state; \
	        mblength = mbrlen ((_str) + (_x), (_strsize) - (_x), &state); \
\
		if (mblength == (size_t)-2 || mblength == (size_t)-1) \
		  { \
		    state = state_bak; \
		    _x++; \
		  } \
		else if (mblength == 0) \
		  _x++; \
		else \
		  { \
		    _p = _x; /* _p == start of prev mbchar */ \
		    _x += mblength; \
		  } \
	      } \
	    (_i) = _p; \
	  } \
	else \
	  (_i)--; \
      } \
    while (0)
#else
#  define BACKUP_CHAR(_str, _strsize, _i)	(_i)--
#endif  /* !HANDLE_MULTIBYTE */

/* Back up one (possibly multibyte) character in the string _BASE of length
   _STRSIZE starting at _STR (_BASE <= _STR <= (_BASE + _STRSIZE) ).
   SPECIAL: DO NOT assume that _STR will be decremented by 1 after this call. */
#if defined (HANDLE_MULTIBYTE)
#  define BACKUP_CHAR_P(_base, _strsize, _str) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    char *_x, _p; /* _x == temp pointer into string, _p == prev pointer */ \
\
	    _x = _p = _base; \
	    while (_x < (_str)) \
	      { \
	        state_bak = state; \
	        mblength = mbrlen (_x, (_strsize) - _x, &state); \
\
		if (mblength == (size_t)-2 || mblength == (size_t)-1) \
		  { \
		    state = state_bak; \
		    _x++; \
		  } \
		else if (mblength == 0) \
		  _x++; \
		else \
		  { \
		    _p = _x; /* _p == start of prev mbchar */ \
		    _x += mblength; \
		  } \
	      } \
	    (_str) = _p; \
	  } \
	else \
	  (_str)--; \
      } \
    while (0)
#else
#  define BACKUP_CHAR_P(_base, _strsize, _str) (_str)--
#endif  /* !HANDLE_MULTIBYTE */

/* Copy a single character from the string _SRC to the string _DST.
   _SRCEND is a pointer to the end of _SRC. */
#if defined (HANDLE_MULTIBYTE)
#  define COPY_CHAR_P(_dst, _src, _srcend) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _k; \
\
	    _k = is_basic (*(_src)); \
	    if (_k) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_src), (_srcend) - (_src), &state); \
	      } \
	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    else \
	      mblength = (mblength < 1) ? 1 : mblength; \
\
	    for (_k = 0; _k < mblength; _k++) \
	      *(_dst)++ = *(_src)++; \
	  } \
	else \
	  *(_dst)++ = *(_src)++; \
      } \
    while (0)
#else
#  define COPY_CHAR_P(_dst, _src, _srcend)	*(_dst)++ = *(_src)++
#endif  /* !HANDLE_MULTIBYTE */

/* Copy a single character from the string _SRC at index _SI to the string
   _DST at index _DI.  _SRCEND is a pointer to the end of _SRC. */
#if defined (HANDLE_MULTIBYTE)
#  define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _k; \
\
	    _k = is_basic (*((_src) + (_si))); \
	    if (_k) \
	      mblength = 1; \
	    else \
	      {\
		state_bak = state; \
		mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src)+(_si)), &state); \
	      } \
	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    else \
	      mblength = (mblength < 1) ? 1 : mblength; \
\
	    for (_k = 0; _k < mblength; _k++) \
	      _dst[_di++] = _src[_si++]; \
	  } \
	else \
	  _dst[_di++] = _src[_si++]; \
      } \
    while (0)
#else
#  define COPY_CHAR_I(_dst, _di, _src, _srcend, _si)	_dst[_di++] = _src[_si++]
#endif  /* !HANDLE_MULTIBYTE */

/****************************************************************
 *								*
 * The following are only guaranteed to work in subst.c		*
 *								*
 ****************************************************************/

#if defined (HANDLE_MULTIBYTE)
#  define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _i; \
\
	    _i = is_basic (*((_src) + (_si))); \
	    if (_i) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_src) + (_si), (_slen) - (_si), &state); \
	      } \
	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    else \
	      mblength = (mblength < 1) ? 1 : mblength; \
\
	    temp = xmalloc (mblength + 2); \
	    temp[0] = _escchar; \
	    for (_i = 0; _i < mblength; _i++) \
	      temp[_i + 1] = _src[_si++]; \
	    temp[mblength + 1] = '\0'; \
\
	    goto add_string; \
	  } \
	else \
	  { \
	    _dst[0] = _escchar; \
	    _dst[1] = _sc; \
	  } \
      } \
    while (0)
#else
#  define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
    _dst[0] = _escchar; \
    _dst[1] = _sc
#endif  /* !HANDLE_MULTIBYTE */

#if defined (HANDLE_MULTIBYTE)
#  define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    mbstate_t state_bak; \
	    size_t mblength; \
	    int _i; \
\
	    _i = is_basic (*((_src) + (_si))); \
	    if (_i) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src) + (_si)), &state); \
	      } \
	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    else \
	      mblength = (mblength < 1) ? 1 : mblength; \
\
	    FASTCOPY(((_src) + (_si)), (_dst), mblength); \
\
	    (_dst) += mblength; \
	    (_si) += mblength; \
	  } \
	else \
	  { \
	    *(_dst)++ = _src[(_si)]; \
	    (_si)++; \
	  } \
      } \
    while (0)
#else
#  define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
	*(_dst)++ = _src[(_si)]; \
	(_si)++
#endif  /* !HANDLE_MULTIBYTE */

#if HANDLE_MULTIBYTE
#  define SADD_MBCHAR(_dst, _src, _si, _srcsize) \
    do \
      { \
	if (locale_mb_cur_max > 1) \
	  { \
	    int i; \
	    mbstate_t state_bak; \
	    size_t mblength; \
\
	    i = is_basic (*((_src) + (_si))); \
	    if (i) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
	      } \
	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    if (mblength < 1) \
	      mblength = 1; \
\
	    _dst = (char *)xmalloc (mblength + 1); \
	    for (i = 0; i < mblength; i++) \
	      (_dst)[i] = (_src)[(_si)++]; \
	    (_dst)[mblength] = '\0'; \
\
	    goto add_string; \
	  } \
      } \
    while (0)

#else
#  define SADD_MBCHAR(_dst, _src, _si, _srcsize)
#endif

/* Watch out when using this -- it's just straight textual substitution */
#if defined (HANDLE_MULTIBYTE)
#  define SADD_MBQCHAR_BODY(_dst, _src, _si, _srcsize) \
\
	    int i; \
	    mbstate_t state_bak; \
	    size_t mblength; \
\
	    i = is_basic (*((_src) + (_si))); \
	    if (i) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
	      } \
	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    if (mblength < 1) \
	      mblength = 1; \
\
	    (_dst) = (char *)xmalloc (mblength + 2); \
	    (_dst)[0] = CTLESC; \
	    for (i = 0; i < mblength; i++) \
	      (_dst)[i+1] = (_src)[(_si)++]; \
	    (_dst)[mblength+1] = '\0'; \
\
	    goto add_string

#  define SADD_MBCHAR_BODY(_dst, _src, _si, _srcsize) \
\
	    int i; \
	    mbstate_t state_bak; \
	    size_t mblength; \
\
	    i = is_basic (*((_src) + (_si))); \
	    if (i) \
	      mblength = 1; \
	    else \
	      { \
		state_bak = state; \
		mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
	      } \
	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
	      { \
		state = state_bak; \
		mblength = 1; \
	      } \
	    if (mblength < 1) \
	      mblength = 1; \
\
	    (_dst) = (char *)xmalloc (mblength + 1); \
	    for (i = 0; i < mblength; i++) \
	      (_dst)[i+1] = (_src)[(_si)++]; \
	    (_dst)[mblength+1] = '\0'; \
\
	    goto add_string

#endif /* HANDLE_MULTIBYTE */
#endif /* _SH_MBUTIL_H_ */