Blame lib/mbchar.h

Packit 709fb3
/* Multibyte character data type.
Packit 709fb3
   Copyright (C) 2001, 2005-2007, 2009-2017 Free Software Foundation, Inc.
Packit 709fb3
Packit 709fb3
   This program is free software: you can redistribute it and/or modify
Packit 709fb3
   it under the terms of the GNU General Public License as published by
Packit 709fb3
   the Free Software Foundation; either version 3 of the License, or
Packit 709fb3
   (at your option) any later version.
Packit 709fb3
Packit 709fb3
   This program is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 709fb3
   GNU General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public License
Packit 709fb3
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
/* Written by Bruno Haible <bruno@clisp.org>.  */
Packit 709fb3
Packit 709fb3
/* A multibyte character is a short subsequence of a char* string,
Packit 709fb3
   representing a single wide character.
Packit 709fb3
Packit 709fb3
   We use multibyte characters instead of wide characters because of
Packit 709fb3
   the following goals:
Packit 709fb3
   1) correct multibyte handling, i.e. operate according to the LC_CTYPE
Packit 709fb3
      locale,
Packit 709fb3
   2) ease of maintenance, i.e. the maintainer needs not know all details
Packit 709fb3
      of the ISO C 99 standard,
Packit 709fb3
   3) don't fail grossly if the input is not in the encoding set by the
Packit 709fb3
      locale, because often different encodings are in use in the same
Packit 709fb3
      countries (ISO-8859-1/UTF-8, EUC-JP/Shift_JIS, ...),
Packit 709fb3
   4) fast in the case of ASCII characters,
Packit 709fb3
   5) portability, i.e. don't make unportable assumptions about wchar_t.
Packit 709fb3
Packit 709fb3
   Multibyte characters are only accessed through the mb* macros.
Packit 709fb3
Packit 709fb3
   mb_ptr (mbc)
Packit 709fb3
     return a pointer to the beginning of the multibyte sequence.
Packit 709fb3
Packit 709fb3
   mb_len (mbc)
Packit 709fb3
     returns the number of bytes occupied by the multibyte sequence.
Packit 709fb3
     Always > 0.
Packit 709fb3
Packit 709fb3
   mb_iseq (mbc, sc)
Packit 709fb3
     returns true if mbc is the standard ASCII character sc.
Packit 709fb3
Packit 709fb3
   mb_isnul (mbc)
Packit 709fb3
     returns true if mbc is the nul character.
Packit 709fb3
Packit 709fb3
   mb_cmp (mbc1, mbc2)
Packit 709fb3
     returns a positive, zero, or negative value depending on whether mbc1
Packit 709fb3
     sorts after, same or before mbc2.
Packit 709fb3
Packit 709fb3
   mb_casecmp (mbc1, mbc2)
Packit 709fb3
     returns a positive, zero, or negative value depending on whether mbc1
Packit 709fb3
     sorts after, same or before mbc2, modulo upper/lowercase conversion.
Packit 709fb3
Packit 709fb3
   mb_equal (mbc1, mbc2)
Packit 709fb3
     returns true if mbc1 and mbc2 are equal.
Packit 709fb3
Packit 709fb3
   mb_caseequal (mbc1, mbc2)
Packit 709fb3
     returns true if mbc1 and mbc2 are equal modulo upper/lowercase conversion.
Packit 709fb3
Packit 709fb3
   mb_isalnum (mbc)
Packit 709fb3
     returns true if mbc is alphanumeric.
Packit 709fb3
Packit 709fb3
   mb_isalpha (mbc)
Packit 709fb3
     returns true if mbc is alphabetic.
Packit 709fb3
Packit 709fb3
   mb_isascii(mbc)
Packit 709fb3
     returns true if mbc is plain ASCII.
Packit 709fb3
Packit 709fb3
   mb_isblank (mbc)
Packit 709fb3
     returns true if mbc is a blank.
Packit 709fb3
Packit 709fb3
   mb_iscntrl (mbc)
Packit 709fb3
     returns true if mbc is a control character.
Packit 709fb3
Packit 709fb3
   mb_isdigit (mbc)
Packit 709fb3
     returns true if mbc is a decimal digit.
Packit 709fb3
Packit 709fb3
   mb_isgraph (mbc)
Packit 709fb3
     returns true if mbc is a graphic character.
Packit 709fb3
Packit 709fb3
   mb_islower (mbc)
Packit 709fb3
     returns true if mbc is lowercase.
Packit 709fb3
Packit 709fb3
   mb_isprint (mbc)
Packit 709fb3
     returns true if mbc is a printable character.
Packit 709fb3
Packit 709fb3
   mb_ispunct (mbc)
Packit 709fb3
     returns true if mbc is a punctuation character.
Packit 709fb3
Packit 709fb3
   mb_isspace (mbc)
Packit 709fb3
     returns true if mbc is a space character.
Packit 709fb3
Packit 709fb3
   mb_isupper (mbc)
Packit 709fb3
     returns true if mbc is uppercase.
Packit 709fb3
Packit 709fb3
   mb_isxdigit (mbc)
Packit 709fb3
     returns true if mbc is a hexadecimal digit.
Packit 709fb3
Packit 709fb3
   mb_width (mbc)
Packit 709fb3
     returns the number of columns on the output device occupied by mbc.
Packit 709fb3
     Always >= 0.
Packit 709fb3
Packit 709fb3
   mb_putc (mbc, stream)
Packit 709fb3
     outputs mbc on stream, a byte oriented FILE stream opened for output.
Packit 709fb3
Packit 709fb3
   mb_setascii (&mbc, sc)
Packit 709fb3
     assigns the standard ASCII character sc to mbc.
Packit 709fb3
Packit 709fb3
   mb_copy (&destmbc, &srcmbc)
Packit 709fb3
     copies srcmbc to destmbc.
Packit 709fb3
Packit 709fb3
   Here are the function prototypes of the macros.
Packit 709fb3
Packit 709fb3
   extern const char *  mb_ptr (const mbchar_t mbc);
Packit 709fb3
   extern size_t        mb_len (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_iseq (const mbchar_t mbc, char sc);
Packit 709fb3
   extern bool          mb_isnul (const mbchar_t mbc);
Packit 709fb3
   extern int           mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2);
Packit 709fb3
   extern int           mb_casecmp (const mbchar_t mbc1, const mbchar_t mbc2);
Packit 709fb3
   extern bool          mb_equal (const mbchar_t mbc1, const mbchar_t mbc2);
Packit 709fb3
   extern bool          mb_caseequal (const mbchar_t mbc1, const mbchar_t mbc2);
Packit 709fb3
   extern bool          mb_isalnum (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isalpha (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isascii (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isblank (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_iscntrl (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isdigit (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isgraph (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_islower (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isprint (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_ispunct (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isspace (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isupper (const mbchar_t mbc);
Packit 709fb3
   extern bool          mb_isxdigit (const mbchar_t mbc);
Packit 709fb3
   extern int           mb_width (const mbchar_t mbc);
Packit 709fb3
   extern void          mb_putc (const mbchar_t mbc, FILE *stream);
Packit 709fb3
   extern void          mb_setascii (mbchar_t *new, char sc);
Packit 709fb3
   extern void          mb_copy (mbchar_t *new, const mbchar_t *old);
Packit 709fb3
 */
Packit 709fb3
Packit 709fb3
#ifndef _MBCHAR_H
Packit 709fb3
#define _MBCHAR_H 1
Packit 709fb3
Packit 709fb3
#include <stdbool.h>
Packit 709fb3
#include <string.h>
Packit 709fb3
Packit 709fb3
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
Packit 709fb3
   <wchar.h>.
Packit 709fb3
   BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
Packit 709fb3
   <wchar.h>.  */
Packit 709fb3
#include <stdio.h>
Packit 709fb3
#include <time.h>
Packit 709fb3
#include <wchar.h>
Packit 709fb3
#include <wctype.h>
Packit 709fb3
Packit 709fb3
#ifndef _GL_INLINE_HEADER_BEGIN
Packit 709fb3
 #error "Please include config.h first."
Packit 709fb3
#endif
Packit 709fb3
_GL_INLINE_HEADER_BEGIN
Packit 709fb3
#ifndef MBCHAR_INLINE
Packit 709fb3
# define MBCHAR_INLINE _GL_INLINE
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
#define MBCHAR_BUF_SIZE 24
Packit 709fb3
Packit 709fb3
struct mbchar
Packit 709fb3
{
Packit 709fb3
  const char *ptr;      /* pointer to current character */
Packit 709fb3
  size_t bytes;         /* number of bytes of current character, > 0 */
Packit 709fb3
  bool wc_valid;        /* true if wc is a valid wide character */
Packit 709fb3
  wchar_t wc;           /* if wc_valid: the current character */
Packit 709fb3
  char buf[MBCHAR_BUF_SIZE]; /* room for the bytes, used for file input only */
Packit 709fb3
};
Packit 709fb3
Packit 709fb3
/* EOF (not a real character) is represented with bytes = 0 and
Packit 709fb3
   wc_valid = false.  */
Packit 709fb3
Packit 709fb3
typedef struct mbchar mbchar_t;
Packit 709fb3
Packit 709fb3
/* Access the current character.  */
Packit 709fb3
#define mb_ptr(mbc) ((mbc).ptr)
Packit 709fb3
#define mb_len(mbc) ((mbc).bytes)
Packit 709fb3
Packit 709fb3
/* Comparison of characters.  */
Packit 709fb3
#define mb_iseq(mbc, sc) ((mbc).wc_valid && (mbc).wc == (sc))
Packit 709fb3
#define mb_isnul(mbc) ((mbc).wc_valid && (mbc).wc == 0)
Packit 709fb3
#define mb_cmp(mbc1, mbc2) \
Packit 709fb3
  ((mbc1).wc_valid                                                      \
Packit 709fb3
   ? ((mbc2).wc_valid                                                   \
Packit 709fb3
      ? (int) (mbc1).wc - (int) (mbc2).wc                               \
Packit 709fb3
      : -1)                                                             \
Packit 709fb3
   : ((mbc2).wc_valid                                                   \
Packit 709fb3
      ? 1                                                               \
Packit 709fb3
      : (mbc1).bytes == (mbc2).bytes                                    \
Packit 709fb3
        ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes)                 \
Packit 709fb3
        : (mbc1).bytes < (mbc2).bytes                                   \
Packit 709fb3
          ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \
Packit 709fb3
          : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1)))
Packit 709fb3
#define mb_casecmp(mbc1, mbc2) \
Packit 709fb3
  ((mbc1).wc_valid                                                      \
Packit 709fb3
   ? ((mbc2).wc_valid                                                   \
Packit 709fb3
      ? (int) towlower ((mbc1).wc) - (int) towlower ((mbc2).wc)         \
Packit 709fb3
      : -1)                                                             \
Packit 709fb3
   : ((mbc2).wc_valid                                                   \
Packit 709fb3
      ? 1                                                               \
Packit 709fb3
      : (mbc1).bytes == (mbc2).bytes                                    \
Packit 709fb3
        ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes)                 \
Packit 709fb3
        : (mbc1).bytes < (mbc2).bytes                                   \
Packit 709fb3
          ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \
Packit 709fb3
          : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1)))
Packit 709fb3
#define mb_equal(mbc1, mbc2) \
Packit 709fb3
  ((mbc1).wc_valid && (mbc2).wc_valid                                   \
Packit 709fb3
   ? (mbc1).wc == (mbc2).wc                                             \
Packit 709fb3
   : (mbc1).bytes == (mbc2).bytes                                       \
Packit 709fb3
     && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)
Packit 709fb3
#define mb_caseequal(mbc1, mbc2) \
Packit 709fb3
  ((mbc1).wc_valid && (mbc2).wc_valid                                   \
Packit 709fb3
   ? towlower ((mbc1).wc) == towlower ((mbc2).wc)                       \
Packit 709fb3
   : (mbc1).bytes == (mbc2).bytes                                       \
Packit 709fb3
     && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)
Packit 709fb3
Packit 709fb3
/* <ctype.h>, <wctype.h> classification.  */
Packit 709fb3
#define mb_isascii(mbc) \
Packit 709fb3
  ((mbc).wc_valid && (mbc).wc >= 0 && (mbc).wc <= 127)
Packit 709fb3
#define mb_isalnum(mbc) ((mbc).wc_valid && iswalnum ((mbc).wc))
Packit 709fb3
#define mb_isalpha(mbc) ((mbc).wc_valid && iswalpha ((mbc).wc))
Packit 709fb3
#define mb_isblank(mbc) ((mbc).wc_valid && iswblank ((mbc).wc))
Packit 709fb3
#define mb_iscntrl(mbc) ((mbc).wc_valid && iswcntrl ((mbc).wc))
Packit 709fb3
#define mb_isdigit(mbc) ((mbc).wc_valid && iswdigit ((mbc).wc))
Packit 709fb3
#define mb_isgraph(mbc) ((mbc).wc_valid && iswgraph ((mbc).wc))
Packit 709fb3
#define mb_islower(mbc) ((mbc).wc_valid && iswlower ((mbc).wc))
Packit 709fb3
#define mb_isprint(mbc) ((mbc).wc_valid && iswprint ((mbc).wc))
Packit 709fb3
#define mb_ispunct(mbc) ((mbc).wc_valid && iswpunct ((mbc).wc))
Packit 709fb3
#define mb_isspace(mbc) ((mbc).wc_valid && iswspace ((mbc).wc))
Packit 709fb3
#define mb_isupper(mbc) ((mbc).wc_valid && iswupper ((mbc).wc))
Packit 709fb3
#define mb_isxdigit(mbc) ((mbc).wc_valid && iswxdigit ((mbc).wc))
Packit 709fb3
Packit 709fb3
/* Extra <wchar.h> function.  */
Packit 709fb3
Packit 709fb3
/* Unprintable characters appear as a small box of width 1.  */
Packit 709fb3
#define MB_UNPRINTABLE_WIDTH 1
Packit 709fb3
Packit 709fb3
MBCHAR_INLINE int
Packit 709fb3
mb_width_aux (wint_t wc)
Packit 709fb3
{
Packit 709fb3
  int w = wcwidth (wc);
Packit 709fb3
  /* For unprintable characters, arbitrarily return 0 for control characters
Packit 709fb3
     and MB_UNPRINTABLE_WIDTH otherwise.  */
Packit 709fb3
  return (w >= 0 ? w : iswcntrl (wc) ? 0 : MB_UNPRINTABLE_WIDTH);
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#define mb_width(mbc) \
Packit 709fb3
  ((mbc).wc_valid ? mb_width_aux ((mbc).wc) : MB_UNPRINTABLE_WIDTH)
Packit 709fb3
Packit 709fb3
/* Output.  */
Packit 709fb3
#define mb_putc(mbc, stream)  fwrite ((mbc).ptr, 1, (mbc).bytes, (stream))
Packit 709fb3
Packit 709fb3
/* Assignment.  */
Packit 709fb3
#define mb_setascii(mbc, sc) \
Packit 709fb3
  ((mbc)->ptr = (mbc)->buf, (mbc)->bytes = 1, (mbc)->wc_valid = 1, \
Packit 709fb3
   (mbc)->wc = (mbc)->buf[0] = (sc))
Packit 709fb3
Packit 709fb3
/* Copying a character.  */
Packit 709fb3
MBCHAR_INLINE void
Packit 709fb3
mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc)
Packit 709fb3
{
Packit 709fb3
  if (old_mbc->ptr == &old_mbc->buf[0])
Packit 709fb3
    {
Packit 709fb3
      memcpy (&new_mbc->buf[0], &old_mbc->buf[0], old_mbc->bytes);
Packit 709fb3
      new_mbc->ptr = &new_mbc->buf[0];
Packit 709fb3
    }
Packit 709fb3
  else
Packit 709fb3
    new_mbc->ptr = old_mbc->ptr;
Packit 709fb3
  new_mbc->bytes = old_mbc->bytes;
Packit 709fb3
  if ((new_mbc->wc_valid = old_mbc->wc_valid))
Packit 709fb3
    new_mbc->wc = old_mbc->wc;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
Packit 709fb3
/* is_basic(c) tests whether the single-byte character c is in the
Packit 709fb3
   ISO C "basic character set".
Packit 709fb3
   This is a convenience function, and is in this file only to share code
Packit 709fb3
   between mbiter_multi.h and mbfile_multi.h.  */
Packit 709fb3
#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
Packit 709fb3
    && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
Packit 709fb3
    && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
Packit 709fb3
    && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
Packit 709fb3
    && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
Packit 709fb3
    && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
Packit 709fb3
    && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
Packit 709fb3
    && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
Packit 709fb3
    && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
Packit 709fb3
    && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
Packit 709fb3
    && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
Packit 709fb3
    && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
Packit 709fb3
    && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
Packit 709fb3
    && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
Packit 709fb3
    && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
Packit 709fb3
    && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
Packit 709fb3
    && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
Packit 709fb3
    && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
Packit 709fb3
    && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
Packit 709fb3
    && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
Packit 709fb3
    && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
Packit 709fb3
    && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
Packit 709fb3
    && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
Packit 709fb3
/* The character set is ISO-646, not EBCDIC. */
Packit 709fb3
# define IS_BASIC_ASCII 1
Packit 709fb3
Packit 709fb3
extern const unsigned int is_basic_table[];
Packit 709fb3
Packit 709fb3
MBCHAR_INLINE bool
Packit 709fb3
is_basic (char c)
Packit 709fb3
{
Packit 709fb3
  return (is_basic_table [(unsigned char) c >> 5] >> ((unsigned char) c & 31))
Packit 709fb3
         & 1;
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#else
Packit 709fb3
Packit 709fb3
MBCHAR_INLINE bool
Packit 709fb3
is_basic (char c)
Packit 709fb3
{
Packit 709fb3
  switch (c)
Packit 709fb3
    {
Packit 709fb3
    case '\t': case '\v': case '\f':
Packit 709fb3
    case ' ': case '!': case '"': case '#': case '%':
Packit 709fb3
    case '&': case '\'': case '(': case ')': case '*':
Packit 709fb3
    case '+': case ',': case '-': case '.': case '/':
Packit 709fb3
    case '0': case '1': case '2': case '3': case '4':
Packit 709fb3
    case '5': case '6': case '7': case '8': case '9':
Packit 709fb3
    case ':': case ';': case '<': case '=': case '>':
Packit 709fb3
    case '?':
Packit 709fb3
    case 'A': case 'B': case 'C': case 'D': case 'E':
Packit 709fb3
    case 'F': case 'G': case 'H': case 'I': case 'J':
Packit 709fb3
    case 'K': case 'L': case 'M': case 'N': case 'O':
Packit 709fb3
    case 'P': case 'Q': case 'R': case 'S': case 'T':
Packit 709fb3
    case 'U': case 'V': case 'W': case 'X': case 'Y':
Packit 709fb3
    case 'Z':
Packit 709fb3
    case '[': case '\\': case ']': case '^': case '_':
Packit 709fb3
    case 'a': case 'b': case 'c': case 'd': case 'e':
Packit 709fb3
    case 'f': case 'g': case 'h': case 'i': case 'j':
Packit 709fb3
    case 'k': case 'l': case 'm': case 'n': case 'o':
Packit 709fb3
    case 'p': case 'q': case 'r': case 's': case 't':
Packit 709fb3
    case 'u': case 'v': case 'w': case 'x': case 'y':
Packit 709fb3
    case 'z': case '{': case '|': case '}': case '~':
Packit 709fb3
      return 1;
Packit 709fb3
    default:
Packit 709fb3
      return 0;
Packit 709fb3
    }
Packit 709fb3
}
Packit 709fb3
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
_GL_INLINE_HEADER_END
Packit 709fb3
Packit 709fb3
#endif /* _MBCHAR_H */