Blame src/mbchar.c

Packit Service 8bf002
/*-
Packit Service 8bf002
 * Copyright (c) 1998, 2002-2008 Kiyoshi Matsui <kmatsui@t3.rim.or.jp>
Packit Service 8bf002
 * All rights reserved.
Packit Service 8bf002
 *
Packit Service 8bf002
 * Some parts of this code are derived from the public domain software
Packit Service 8bf002
 * DECUS cpp (1984,1985) written by Martin Minow.
Packit Service 8bf002
 *
Packit Service 8bf002
 * Redistribution and use in source and binary forms, with or without
Packit Service 8bf002
 * modification, are permitted provided that the following conditions
Packit Service 8bf002
 * are met:
Packit Service 8bf002
 * 1. Redistributions of source code must retain the above copyright
Packit Service 8bf002
 *    notice, this list of conditions and the following disclaimer.
Packit Service 8bf002
 * 2. Redistributions in binary form must reproduce the above copyright
Packit Service 8bf002
 *    notice, this list of conditions and the following disclaimer in the
Packit Service 8bf002
 *    documentation and/or other materials provided with the distribution.
Packit Service 8bf002
 *
Packit Service 8bf002
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
Packit Service 8bf002
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit Service 8bf002
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit Service 8bf002
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
Packit Service 8bf002
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
Packit Service 8bf002
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
Packit Service 8bf002
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
Packit Service 8bf002
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
Packit Service 8bf002
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
Packit Service 8bf002
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
Packit Service 8bf002
 * SUCH DAMAGE.
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
/*
Packit Service 8bf002
 *                          M B C H A R . C
Packit Service 8bf002
 *      C h a r a c t e r    h a n d l i n g    R o u t i n e s
Packit Service 8bf002
 *
Packit Service 8bf002
 * Character handling and multi-byte character handling routines are
Packit Service 8bf002
 * placed here.
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
#if PREPROCESSED
Packit Service 8bf002
#include    "mcpp.H"
Packit Service 8bf002
#else
Packit Service 8bf002
#include    "system.H"
Packit Service 8bf002
#include    "internal.H"
Packit Service 8bf002
#endif
Packit Service 8bf002
Packit Service 8bf002
/*
Packit Service 8bf002
 * Tables of character types and multi-byte character types.
Packit Service 8bf002
 *
Packit Service 8bf002
 * Some of these character attributes will be overwritten by
Packit Service 8bf002
 *      execution time option '-@post' or '-@old'.
Packit Service 8bf002
 * Warning on erroneous sequence will be issued from the caller routines:
Packit Service 8bf002
 * scan_quote(), scan_id() or scan_number().
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
/* Non-ASCII characters are always checked by mb_read().    */
Packit Service 8bf002
#define NA      0x4000  /* Non-ASCII characters */
Packit Service 8bf002
Packit Service 8bf002
/* Horizontal spaces (' ', '\t' and TOK_SEP)    */
Packit Service 8bf002
#define HSPA    (SPA | HSP)
Packit Service 8bf002
Packit Service 8bf002
short *     char_type;  /* Pointer to one of the following type_*[].    */
Packit Service 8bf002
Packit Service 8bf002
#define EJ1     0x100   /* 1st byte of EUC_JP   */
Packit Service 8bf002
#define EJ2     0x200   /* 2nd byte of EUC_JP   */
Packit Service 8bf002
#define GB1     0x400   /* 1st byte of GB2312   */
Packit Service 8bf002
#define GB2     0x800   /* 2nd byte of GB2312   */
Packit Service 8bf002
#define KS1     0x1000  /* 1st byte of KSC5601  */
Packit Service 8bf002
#define KS2     0x2000  /* 2nd byte of KSC5601  */
Packit Service 8bf002
Packit Service 8bf002
#define EJ12    (EJ1 | EJ2)     /* 1st byte or 2nd byte of EUC_JP   */
Packit Service 8bf002
#define GB12    (GB1 | GB2)
Packit Service 8bf002
#define KS12    (KS1 | KS2)
Packit Service 8bf002
#define EJ1N    (NA | EJ1)
Packit Service 8bf002
#define EU12N   (NA | EJ12 | GB12 | KS12)
Packit Service 8bf002
    /* 1st or 2nd byte of EUC_JP, GB2312 or KSC5601, or any other non-ASCII */
Packit Service 8bf002
Packit Service 8bf002
static short    type_euc[ UCHARMAX + 1] = {
Packit Service 8bf002
/*
Packit Service 8bf002
 * For EUC_JP, GB2312, KSC5601 or other similar multi-byte char encodings.
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
/* Character type codes */
Packit Service 8bf002
/*   0,     1,     2,     3,     4,     5,     6,     7,                    */
Packit Service 8bf002
/*   8,     9,     A,     B,     C,     D,     E,     F,       Hex          */
Packit Service 8bf002
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 00           */
Packit Service 8bf002
   000,   HSPA,  SPA,   SPA,   SPA,   SPA,   000,   000,    /* 08           */
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 10           */
Packit Service 8bf002
    /* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts()       */
Packit Service 8bf002
   000,   LET,   LET,   000,   000,   000,   000,   HSPA,   /* 18           */
Packit Service 8bf002
   HSPA,  PUNC,  QUO,   PUNC,  000,   PUNC,  PUNC,  QUO,    /* 20  !"#$%&'  */
Packit Service 8bf002
   PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  DOT,   PUNC,   /* 28 ()*+,-./  */
Packit Service 8bf002
   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,    /* 30 01234567  */
Packit Service 8bf002
   DIG,   DIG,   PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  PUNC,   /* 38 89:;<=>?  */
Packit Service 8bf002
Packit Service 8bf002
   000,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 40 @ABCDEFG  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 48 HIJKLMNO  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 50 PQRSTUVW  */
Packit Service 8bf002
   LET,   LET,   LET,   PUNC,  000,   PUNC,  PUNC,  LET,    /* 58 XYZ[\]^_  */
Packit Service 8bf002
   000,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 60 `abcdefg  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 68 hijklmno  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 70 pqrstuvw  */
Packit Service 8bf002
   LET,   LET,   LET,   PUNC,  PUNC,  PUNC,  PUNC,  000,    /* 78 xyz{|}~   */
Packit Service 8bf002
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   80 .. 87   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    EJ1N,  NA,     /*   88 .. 8F   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   90 .. 97   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   98 .. 9F   */
Packit Service 8bf002
   NA,    EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   A0 .. A7   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   A8 .. AF   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   B0 .. B7   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   B8 .. BF   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   C0 .. C7   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   C8 .. CF   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   D0 .. D7   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   D8 .. DF   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   E0 .. E7   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   E8 .. EF   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N,  /*   F0 .. F7   */
Packit Service 8bf002
   EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, NA,     /*   F8 .. FF   */
Packit Service 8bf002
};
Packit Service 8bf002
Packit Service 8bf002
static short    type_bsl[ UCHARMAX + 1] = {
Packit Service 8bf002
/*
Packit Service 8bf002
 * For SJIS, BIGFIVE or other similar encodings which may have '\\' value as
Packit Service 8bf002
 * the second byte of multi-byte character.
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
#define SJ1     0x100   /* 1st byte of SJIS     */
Packit Service 8bf002
#define SJ2     0x200   /* 2nd byte of SJIS     */
Packit Service 8bf002
#define BF1     0x400   /* 1st byte of BIGFIVE  */
Packit Service 8bf002
#define BF2     0x800   /* 2nd byte of BIGFIVE  */
Packit Service 8bf002
Packit Service 8bf002
#define SB2     (SJ2 | BF2)
Packit Service 8bf002
#define SJ2N    (NA | SJ2)
Packit Service 8bf002
#define SB2N    (NA | SJ2 | BF2)
Packit Service 8bf002
#define SJ12N   (NA | SJ1 | SJ2)
Packit Service 8bf002
#define BF12N   (NA | BF1 | BF2)
Packit Service 8bf002
#define SB12N   (NA | SJ1 | SJ2 | BF1 | BF2)
Packit Service 8bf002
#define S2B12N  (NA | SJ2 | BF1 | BF2)
Packit Service 8bf002
Packit Service 8bf002
#define LSB2    (LET | SB2)
Packit Service 8bf002
#define PSB2    (PUNC| SB2)
Packit Service 8bf002
Packit Service 8bf002
/* Character type codes */
Packit Service 8bf002
/*   0,     1,     2,     3,     4,     5,     6,     7,                    */
Packit Service 8bf002
/*   8,     9,     A,     B,     C,     D,     E,     F,       Hex          */
Packit Service 8bf002
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 00           */
Packit Service 8bf002
   000,   HSPA,  SPA,   SPA,   SPA,   SPA,   000,   000,    /* 08           */
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 10           */
Packit Service 8bf002
    /* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts()       */
Packit Service 8bf002
   000,   LET,   LET,   000,   000,   000,   000,   HSPA,   /* 18           */
Packit Service 8bf002
   HSPA,  PUNC,  QUO,   PUNC,  000,   PUNC,  PUNC,  QUO,    /* 20  !"#$%&'  */
Packit Service 8bf002
   PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  DOT,   PUNC,   /* 28 ()*+,-./  */
Packit Service 8bf002
   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,    /* 30 01234567  */
Packit Service 8bf002
   DIG,   DIG,   PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  PUNC,   /* 38 89:;<=>?  */
Packit Service 8bf002
Packit Service 8bf002
   SB2,   LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,   /* 40 @ABCDEFG  */
Packit Service 8bf002
   LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,   /* 48 HIJKLMNO  */
Packit Service 8bf002
   LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,   /* 50 PQRSTUVW  */
Packit Service 8bf002
   LSB2,  LSB2,  LSB2,  PSB2,  SB2,   PSB2,  PSB2,  LSB2,   /* 58 XYZ[\]^_  */
Packit Service 8bf002
   SB2,   LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,   /* 60 `abcdefg  */
Packit Service 8bf002
   LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,   /* 68 hijklmno  */
Packit Service 8bf002
   LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,  LSB2,   /* 70 pqrstuvw  */
Packit Service 8bf002
   LSB2,  LSB2,  LSB2,  PSB2,  PSB2,  PSB2,  PSB2,  000,    /* 78 xyz{|}~   */
Packit Service 8bf002
Packit Service 8bf002
   SB2N,  SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N,  /*   80 .. 87   */
Packit Service 8bf002
   SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N,  /*   88 .. 8F   */
Packit Service 8bf002
   SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N,  /*   90 .. 97   */
Packit Service 8bf002
   SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N,  /*   98 .. 9F   */
Packit Service 8bf002
   SJ2N,  S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   A0 .. A7   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   A8 .. AF   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   B0 .. B7   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   B8 .. BF   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   C0 .. C7   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   C8 .. CF   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   D0 .. D7   */
Packit Service 8bf002
   S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /*   D8 .. DF   */
Packit Service 8bf002
   SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N,  /*   E0 .. E7   */
Packit Service 8bf002
   SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N,  /*   E8 .. EF   */
Packit Service 8bf002
   SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N,  /*   F0 .. F7   */
Packit Service 8bf002
   SB12N, SB12N, SB12N, SB12N, SB12N, BF12N, BF12N, NA,     /*   F8 .. FF   */
Packit Service 8bf002
};
Packit Service 8bf002
Packit Service 8bf002
/*
Packit Service 8bf002
 * For ISO2022_JP multi-byte character encoding.
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
#define IS1     0x100   /* 1st byte of shift-sequence   */
Packit Service 8bf002
#define IS2     0x200   /* 2nd byte of shift-sequence   */
Packit Service 8bf002
#define IS3     0x400   /* 3rd byte of shift-sequence   */
Packit Service 8bf002
#define IS4     0x800   /* 4th byte of shift-sequence   */
Packit Service 8bf002
#define IJP     0x1000  /* 1st or 2nd byte of ISO-2022-JP (ISO-2022-JP1)    */
Packit Service 8bf002
Packit Service 8bf002
#define PIJP    (PUNC | IJP)
Packit Service 8bf002
#define QIJP    (QUO | IJP)
Packit Service 8bf002
#define DTJP    (DOT | IJP)
Packit Service 8bf002
#define DGJP    (DIG | IJP)
Packit Service 8bf002
#define LIJP    (LET | IJP)
Packit Service 8bf002
Packit Service 8bf002
#define JPS2    (IJP | IS2)
Packit Service 8bf002
#define PJPS23  (PIJP | IS2 | IS3)
Packit Service 8bf002
#define LJPS3   (LIJP | IS3)
Packit Service 8bf002
#define LJPS4   (LIJP | IS4)
Packit Service 8bf002
Packit Service 8bf002
static short    type_iso2022_jp[ UCHARMAX + 1] = {
Packit Service 8bf002
Packit Service 8bf002
/* Character type codes */
Packit Service 8bf002
/*   0,     1,     2,     3,     4,     5,     6,     7,                    */
Packit Service 8bf002
/*   8,     9,     A,     B,     C,     D,     E,     F,       Hex          */
Packit Service 8bf002
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 00           */
Packit Service 8bf002
   000,   HSPA,  SPA,   SPA,   SPA,   SPA,   000,   000,    /* 08           */
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 10           */
Packit Service 8bf002
    /* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts()       */
Packit Service 8bf002
   000,   LET,   LET,   IS1,   000,   000,   000,   HSPA,   /* 18           */
Packit Service 8bf002
   HSPA,  PIJP,  QIJP,  PIJP,  JPS2,  PIJP,  PIJP,  QIJP,   /* 20  !"#$%&'  */
Packit Service 8bf002
   PJPS23,PIJP,  PIJP,  PIJP,  PIJP,  PIJP,  DTJP,  PIJP,   /* 28 ()*+,-./  */
Packit Service 8bf002
   DGJP,  DGJP,  DGJP,  DGJP,  DGJP,  DGJP,  DGJP,  DGJP,   /* 30 01234567  */
Packit Service 8bf002
   DGJP,  DGJP,  PIJP,  PIJP,  PIJP,  PIJP,  PIJP,  PIJP,   /* 38 89:;<=>?  */
Packit Service 8bf002
Packit Service 8bf002
   IJP,   LIJP,  LJPS3, LIJP,  LJPS4, LIJP,  LIJP,  LIJP,   /* 40 @ABCDEFG  */
Packit Service 8bf002
   LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,   /* 48 HIJKLMNO  */
Packit Service 8bf002
   LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,   /* 50 PQRSTUVW  */
Packit Service 8bf002
   LIJP,  LIJP,  LIJP,  PIJP,  IJP,   PIJP,  PIJP,  LIJP,   /* 58 XYZ[\]^_  */
Packit Service 8bf002
   IJP,   LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,   /* 60 `abcdefg  */
Packit Service 8bf002
   LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,   /* 68 hijklmno  */
Packit Service 8bf002
   LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,  LIJP,   /* 70 pqrstuvw  */
Packit Service 8bf002
   LIJP,  LIJP,  LIJP,  PIJP,  PIJP,  PIJP,  PIJP,  000,    /* 78 xyz{|}~   */
Packit Service 8bf002
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   80 .. 87   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   88 .. 8F   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   90 .. 97   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   98 .. 9F   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   A0 .. A7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   A8 .. AF   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   B0 .. B7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   B8 .. BF   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   C0 .. C7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   C8 .. CF   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   D0 .. D7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   D8 .. DF   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   E0 .. E7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   E8 .. EF   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   F0 .. F7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   F8 .. FF   */
Packit Service 8bf002
};
Packit Service 8bf002
Packit Service 8bf002
/*
Packit Service 8bf002
 * For UTF8 multi-byte character encoding.
Packit Service 8bf002
 */
Packit Service 8bf002
Packit Service 8bf002
#define U2_1    0x100       /* 1st byte of 2-byte encoding of UTF8  */
Packit Service 8bf002
#define U3_1    0x200       /* 1st byte of 3-byte encoding of UTF8  */
Packit Service 8bf002
#define U4_1    0x400       /* 1st byte of 4-byte encoding of UTF8  */
Packit Service 8bf002
#define UCONT   0x800   /* Continuation of a 2, 3, or 4 byte UTF8 sequence  */
Packit Service 8bf002
#define U2_1N   (NA | U2_1)
Packit Service 8bf002
#define U3_1N   (NA | U3_1)
Packit Service 8bf002
#define U4_1N   (NA | U4_1)
Packit Service 8bf002
#define UCONTN  (NA | UCONT)
Packit Service 8bf002
Packit Service 8bf002
static short    type_utf8[ UCHARMAX + 1] = {
Packit Service 8bf002
Packit Service 8bf002
/* Character type codes */
Packit Service 8bf002
/*   0,     1,     2,     3,     4,     5,     6,     7,                    */
Packit Service 8bf002
/*   8,     9,     A,     B,     C,     D,     E,     F,       Hex          */
Packit Service 8bf002
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 00           */
Packit Service 8bf002
   000,   HSPA,  SPA,   SPA,   SPA,   SPA,   000,   000,    /* 08           */
Packit Service 8bf002
   000,   000,   000,   000,   000,   000,   000,   000,    /* 10           */
Packit Service 8bf002
    /* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts()       */
Packit Service 8bf002
   000,   LET,   LET,   000,   000,   000,   000,   HSPA,   /* 18           */
Packit Service 8bf002
   HSPA,  PUNC,  QUO,   PUNC,  000,   PUNC,  PUNC,  QUO,    /* 20  !"#$%&'  */
Packit Service 8bf002
   PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  DOT,   PUNC,   /* 28 ()*+,-./  */
Packit Service 8bf002
   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,   DIG,    /* 30 01234567  */
Packit Service 8bf002
   DIG,   DIG,   PUNC,  PUNC,  PUNC,  PUNC,  PUNC,  PUNC,   /* 38 89:;<=>?  */
Packit Service 8bf002
Packit Service 8bf002
   000,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 40 @ABCDEFG  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 48 HIJKLMNO  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 50 PQRSTUVW  */
Packit Service 8bf002
   LET,   LET,   LET,   PUNC,  000,   PUNC,  PUNC,  LET,    /* 58 XYZ[\]^_  */
Packit Service 8bf002
   000,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 60 `abcdefg  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 68 hijklmno  */
Packit Service 8bf002
   LET,   LET,   LET,   LET,   LET,   LET,   LET,   LET,    /* 70 pqrstuvw  */
Packit Service 8bf002
   LET,   LET,   LET,   PUNC,  PUNC,  PUNC,  PUNC,  000,    /* 78 xyz{|}~   */
Packit Service 8bf002
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   80 .. 87   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   88 .. 8F   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   90 .. 97   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   98 .. 9F   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   A0 .. A7   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   A8 .. AF   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   B0 .. B7   */
Packit Service 8bf002
   UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /*   B8 .. BF   */
Packit Service 8bf002
   NA,    NA,    U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N,  /*   C0 .. C7   */
Packit Service 8bf002
   U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N,  /*   C8 .. CF   */
Packit Service 8bf002
   U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N,  /*   D0 .. D7   */
Packit Service 8bf002
   U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N,  /*   D8 .. DF   */
Packit Service 8bf002
   U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N,  /*   E0 .. E7   */
Packit Service 8bf002
   U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N,  /*   E8 .. EF   */
Packit Service 8bf002
   U4_1N, U4_1N, U4_1N, U4_1N, U4_1N, NA,    NA,    NA,     /*   F0 .. F7   */
Packit Service 8bf002
   NA,    NA,    NA,    NA,    NA,    NA,    NA,    NA,     /*   F8 .. FF   */
Packit Service 8bf002
};
Packit Service 8bf002
Packit Service 8bf002
#define SETLOCALE       2       /* #pragma setlocale (not __setlocale)  */
Packit Service 8bf002
Packit Service 8bf002
#define NUM_ENCODING    8
Packit Service 8bf002
#define NUM_ALIAS       6
Packit Service 8bf002
Packit Service 8bf002
/* Names of encoding recognized.  Table for search_encoding().  */
Packit Service 8bf002
static const char * const   encoding_name[ NUM_ENCODING][ NUM_ALIAS] = {
Packit Service 8bf002
    /* Visual C full, Visual C short
Packit Service 8bf002
        , 4 miscellaneous  */
Packit Service 8bf002
    { "english",    "c"
Packit Service 8bf002
        , "c",      "en",   "latin",    "iso8859"},
Packit Service 8bf002
    { "",     ""
Packit Service 8bf002
        , "eucjp",  "euc",  "ujis",     ""},
Packit Service 8bf002
    { "chinesesimplified",  "chs"
Packit Service 8bf002
        , "gb2312", "cngb",     "euccn",    ""},
Packit Service 8bf002
    { "korean",   "kor"
Packit Service 8bf002
        , "ksc5601",    "ksx1001",  "wansung",  "euckr"},
Packit Service 8bf002
    { "japanese", "jpn"
Packit Service 8bf002
        , "sjis",   "shiftjis", "mskanji",  ""},
Packit Service 8bf002
    { "chinesetraditional", "cht"
Packit Service 8bf002
        , "bigfive",    "big5", "cnbig5",   "euctw"},
Packit Service 8bf002
    { "",     ""
Packit Service 8bf002
        , "iso2022jp",  "iso2022jp1",   "jis",  ""},
Packit Service 8bf002
    { "",     ""
Packit Service 8bf002
        , "utf8",   "utf",      "",     ""},
Packit Service 8bf002
};
Packit Service 8bf002
Packit Service 8bf002
static int      mbstart;
Packit Service 8bf002
static int      mb2;
Packit Service 8bf002
Packit Service 8bf002
static size_t   mb_read_2byte( int c1, char ** in_pp, char ** out_pp);
Packit Service 8bf002
                /* For 2-byte encodings of mbchar   */
Packit Service 8bf002
static const char *     search_encoding( char * norm, int alias);
Packit Service 8bf002
                /* Search encoding_name[][] table   */
Packit Service 8bf002
static void     strip_bar( char * string);
Packit Service 8bf002
                /* Remove '_', '-' or '.' in the string */
Packit Service 8bf002
static void     conv_case( char * name, char * lim, int upper);
Packit Service 8bf002
                /* Convert to upper/lower case      */
Packit Service 8bf002
static size_t   mb_read_iso2022_jp( int c1, char ** in_pp, char ** out_pp);
Packit Service 8bf002
                /* For ISO2022_JP encoding          */
Packit Service 8bf002
static size_t   mb_read_utf8( int c1, char ** in_pp, char ** out_pp);
Packit Service 8bf002
                /* For UTF8 mbchar encoding         */
Packit Service 8bf002
Packit Service 8bf002
#define NAMLEN          20
Packit Service 8bf002
#define UPPER           1               /* To upper */
Packit Service 8bf002
#define LOWER           0               /* To lower */
Packit Service 8bf002
Packit Service 8bf002
Packit Service 8bf002
const char *    set_encoding(
Packit Service 8bf002
    char *  name,       /* Name of encoding specified   */
Packit Service 8bf002
    char *  env,        /* Name of environment variable */
Packit Service 8bf002
    int     pragma
Packit Service 8bf002
        /* 2: #pragma setlocale, 1: #pragma __setlocale, 0: not #pragma */
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Search the encoding specified and re-initialize mbchar settings.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    const char *    unknown_encoding
Packit Service 8bf002
            = "Unknown encoding: %s%.0ld%.0s";          /* _W1_ */
Packit Service 8bf002
    const char *    too_long
Packit Service 8bf002
            = "Too long encoding name: %s%.0ld%.0s";    /* _E_  */
Packit Service 8bf002
    const char *    loc = "";
Packit Service 8bf002
    int     alias;
Packit Service 8bf002
    char    norm[ NAMLEN];
Packit Service 8bf002
            /*
Packit Service 8bf002
             * Normalized name (removed 'xxxxx.', stripped '_', '-', '.'
Packit Service 8bf002
             * and lowered.
Packit Service 8bf002
             */
Packit Service 8bf002
Packit Service 8bf002
    if (strlen( name) >= NAMLEN) {
Packit Service 8bf002
        if ((env || pragma) && (warn_level & 1)) {
Packit Service 8bf002
            cwarn( too_long, name, 0L, NULL);
Packit Service 8bf002
        } else {
Packit Service 8bf002
            mcpp_fprintf( ERR, too_long, name);
Packit Service 8bf002
            mcpp_fputc( '\n', ERR);
Packit Service 8bf002
        }
Packit Service 8bf002
    }
Packit Service 8bf002
    strcpy( norm, name);
Packit Service 8bf002
    if (norm[ 5] == '.')
Packit Service 8bf002
        memmove( norm, norm + 5, strlen( norm + 5) + 1);
Packit Service 8bf002
        /* Remove initial 'xxxxx.' as 'ja_JP.', 'en_US.' or any other   */
Packit Service 8bf002
    conv_case( norm, norm + strlen( norm), LOWER);
Packit Service 8bf002
    strip_bar( norm);
Packit Service 8bf002
Packit Service 8bf002
    if (strlen( name) == 0) {                       /* ""       */
Packit Service 8bf002
        mbchar = MBCHAR;    /* Restore to the default encoding  */
Packit Service 8bf002
    } else if (memcmp( norm, "iso8859", 7) == 0     /* iso8859* */
Packit Service 8bf002
            || memcmp( norm, "latin", 5) == 0       /* latin*   */
Packit Service 8bf002
            || memcmp( norm, "en", 2) == 0) {       /* en*      */
Packit Service 8bf002
        mbchar = 0;                 /* No multi-byte character  */
Packit Service 8bf002
    } else {
Packit Service 8bf002
        alias = 2;
Packit Service 8bf002
#if COMPILER == MSC
Packit Service 8bf002
        if (pragma == SETLOCALE)        /* #pragma setlocale    */
Packit Service 8bf002
            alias = 0;
Packit Service 8bf002
#endif
Packit Service 8bf002
        loc = search_encoding( norm, alias);        /* Search the name  */
Packit Service 8bf002
    }
Packit Service 8bf002
    if (loc == NULL) {
Packit Service 8bf002
        if ((env || pragma) && (warn_level & 1)) {
Packit Service 8bf002
            cwarn( unknown_encoding, name, 0L, NULL);
Packit Service 8bf002
        } else {                        /* -m option            */
Packit Service 8bf002
            mcpp_fprintf( ERR, unknown_encoding, name);
Packit Service 8bf002
            mcpp_fputc( '\n', ERR);
Packit Service 8bf002
        }
Packit Service 8bf002
    } else {
Packit Service 8bf002
        mb_init();                      /* Re-initialize        */
Packit Service 8bf002
    }
Packit Service 8bf002
    return  loc;
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
static const char * search_encoding(
Packit Service 8bf002
    char *  norm,           /* The name of encoding specified   */
Packit Service 8bf002
    int     alias           /* The number of alias to start searching   */
Packit Service 8bf002
)
Packit Service 8bf002
{
Packit Service 8bf002
    const char *    loc;
Packit Service 8bf002
    int             lo, al;
Packit Service 8bf002
Packit Service 8bf002
    for (lo = 0; lo < NUM_ENCODING; lo++) {
Packit Service 8bf002
        for (al = alias ; al < NUM_ALIAS; al++) {
Packit Service 8bf002
            loc = encoding_name[ lo][ al];
Packit Service 8bf002
            if (str_eq( loc, norm)) {
Packit Service 8bf002
                switch (lo) {
Packit Service 8bf002
                case 0  :   mbchar = 0;             break;
Packit Service 8bf002
                case 1  :   mbchar = EUC_JP;        break;
Packit Service 8bf002
                case 2  :   mbchar = GB2312;        break;
Packit Service 8bf002
                case 3  :   mbchar = KSC5601;       break;
Packit Service 8bf002
                case 4  :   mbchar = SJIS;          break;
Packit Service 8bf002
                case 5  :   mbchar = BIGFIVE;       break;
Packit Service 8bf002
                case 6  :   mbchar = ISO2022_JP;    break;
Packit Service 8bf002
                case 7  :   mbchar = UTF8;          break;
Packit Service 8bf002
                }
Packit Service 8bf002
                return  loc;
Packit Service 8bf002
            }
Packit Service 8bf002
        }
Packit Service 8bf002
    }
Packit Service 8bf002
    return  NULL;
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
static void strip_bar(
Packit Service 8bf002
    char *  string
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Strip '_', '-' or '.' in the string.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    char *  cp = string;
Packit Service 8bf002
Packit Service 8bf002
    while (*cp != EOS) {
Packit Service 8bf002
        if (*cp == '_' || *cp == '-' || *cp == '.')
Packit Service 8bf002
            memmove( cp, cp + 1, strlen( cp));
Packit Service 8bf002
        else
Packit Service 8bf002
            cp++;
Packit Service 8bf002
    }
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
static void     conv_case(
Packit Service 8bf002
    char *  name,                       /* (diretory) Name          */
Packit Service 8bf002
    char *  lim,                        /* End of (directory) name  */
Packit Service 8bf002
    int     upper                       /* TRUE if to upper         */
Packit Service 8bf002
)
Packit Service 8bf002
/* Convert a string to upper-case letters or lower-case letters in-place    */
Packit Service 8bf002
{
Packit Service 8bf002
    int     c;
Packit Service 8bf002
    char *  sp;
Packit Service 8bf002
Packit Service 8bf002
    for (sp = name; sp < lim; sp++) {
Packit Service 8bf002
        c = *sp & UCHARMAX;
Packit Service 8bf002
#if MBCHAR
Packit Service 8bf002
        if ((char_type[ c] & mbstart)) {
Packit Service 8bf002
            char    tmp[ PATHMAX+1];
Packit Service 8bf002
            char *  tp = tmp;
Packit Service 8bf002
            *tp++ = *sp++;
Packit Service 8bf002
            mb_read( c, &sp, &tp);
Packit Service 8bf002
        } else
Packit Service 8bf002
#endif
Packit Service 8bf002
        {
Packit Service 8bf002
            if (upper)
Packit Service 8bf002
                *sp = toupper( c);
Packit Service 8bf002
            else
Packit Service 8bf002
                *sp = tolower( c);
Packit Service 8bf002
        }
Packit Service 8bf002
    }
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
void    mb_init( void)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Initialize multi-byte character settings.
Packit Service 8bf002
 * First called prior to setting the 'mcpp_mode'.
Packit Service 8bf002
 * Will be called again each time the multibyte character encoding is changed.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    /*
Packit Service 8bf002
     * Select the character classification table, select the multi-byte
Packit Service 8bf002
     * character reading routine and decide whether multi-byte character
Packit Service 8bf002
     * may contain the byte of value 0x5c.
Packit Service 8bf002
     */
Packit Service 8bf002
    switch (mbchar) {
Packit Service 8bf002
    case 0      :
Packit Service 8bf002
    case EUC_JP     :
Packit Service 8bf002
    case GB2312     :
Packit Service 8bf002
    case KSC5601    :
Packit Service 8bf002
        char_type = type_euc;
Packit Service 8bf002
        bsl_in_mbchar = FALSE;
Packit Service 8bf002
        mb_read = mb_read_2byte;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case SJIS   :
Packit Service 8bf002
    case BIGFIVE    :
Packit Service 8bf002
        char_type = type_bsl;
Packit Service 8bf002
        bsl_in_mbchar = TRUE;
Packit Service 8bf002
        mb_read = mb_read_2byte;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case ISO2022_JP :
Packit Service 8bf002
        char_type = type_iso2022_jp;
Packit Service 8bf002
        bsl_in_mbchar = TRUE;
Packit Service 8bf002
        mb_read = mb_read_iso2022_jp;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case UTF8   :
Packit Service 8bf002
        char_type = type_utf8;
Packit Service 8bf002
        bsl_in_mbchar = FALSE;
Packit Service 8bf002
        mb_read = mb_read_utf8;
Packit Service 8bf002
        break;
Packit Service 8bf002
    }
Packit Service 8bf002
Packit Service 8bf002
    /* Set the bit patterns for character classification.   */
Packit Service 8bf002
    switch (mbchar) {
Packit Service 8bf002
    case 0      :
Packit Service 8bf002
        mbstart = 0;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case EUC_JP :
Packit Service 8bf002
        mbstart = EJ1;
Packit Service 8bf002
        mb2 = EJ2;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case GB2312 :
Packit Service 8bf002
        mbstart = GB1;
Packit Service 8bf002
        mb2 = GB2;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case KSC5601:
Packit Service 8bf002
        mbstart = KS1;
Packit Service 8bf002
        mb2 = KS2;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case SJIS   :
Packit Service 8bf002
        mbstart = SJ1;
Packit Service 8bf002
        mb2 = SJ2;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case BIGFIVE:
Packit Service 8bf002
        mbstart = BF1;
Packit Service 8bf002
        mb2 = BF2;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case ISO2022_JP :
Packit Service 8bf002
        mbstart = IS1;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case UTF8   :
Packit Service 8bf002
        mbstart = (U2_1 | U3_1 | U4_1);
Packit Service 8bf002
        break;
Packit Service 8bf002
    }
Packit Service 8bf002
    switch (mbchar) {
Packit Service 8bf002
    case 0      :
Packit Service 8bf002
        mbchk = 0;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case EUC_JP :
Packit Service 8bf002
    case GB2312 :
Packit Service 8bf002
    case KSC5601:
Packit Service 8bf002
    case SJIS   :
Packit Service 8bf002
    case BIGFIVE:
Packit Service 8bf002
    case UTF8   :
Packit Service 8bf002
        mbchk = NA;
Packit Service 8bf002
        break;
Packit Service 8bf002
    case ISO2022_JP :
Packit Service 8bf002
        mbchk = (IS1 | NA);
Packit Service 8bf002
        break;
Packit Service 8bf002
    }
Packit Service 8bf002
Packit Service 8bf002
    /*
Packit Service 8bf002
     * Set special handling for some encodings to supplement some compiler's
Packit Service 8bf002
     * deficiency.
Packit Service 8bf002
     */
Packit Service 8bf002
    switch (mbchar) {
Packit Service 8bf002
    case SJIS   :
Packit Service 8bf002
#if ! SJIS_IS_ESCAPE_FREE
Packit Service 8bf002
        bsl_need_escape = TRUE;
Packit Service 8bf002
#endif
Packit Service 8bf002
        break;
Packit Service 8bf002
    case BIGFIVE:
Packit Service 8bf002
#if ! BIGFIVE_IS_ESCAPE_FREE
Packit Service 8bf002
        bsl_need_escape = TRUE;
Packit Service 8bf002
#endif
Packit Service 8bf002
        break;
Packit Service 8bf002
    case ISO2022_JP :
Packit Service 8bf002
#if ! ISO2022_JP_IS_ESCAPE_FREE
Packit Service 8bf002
        bsl_need_escape = TRUE;
Packit Service 8bf002
#endif
Packit Service 8bf002
        break;
Packit Service 8bf002
    default :
Packit Service 8bf002
        bsl_need_escape = FALSE;
Packit Service 8bf002
        break;
Packit Service 8bf002
    }
Packit Service 8bf002
Packit Service 8bf002
    /*
Packit Service 8bf002
     * Modify magic characters in character type table.
Packit Service 8bf002
     * char_type[] table should be rewritten in accordance with the 'mcpp_mode'
Packit Service 8bf002
     * whenever the encoding is changed.
Packit Service 8bf002
     */
Packit Service 8bf002
    if (mcpp_mode) {                /* If mcpp_mode is already set  */
Packit Service 8bf002
        char_type[ DEF_MAGIC] = standard ? LET : 0;
Packit Service 8bf002
        char_type[ IN_SRC] = (mcpp_mode == STD) ? LET : 0;
Packit Service 8bf002
        char_type[ TOK_SEP] = (mcpp_mode == STD || mcpp_mode == OLD_PREP)
Packit Service 8bf002
                ? HSPA: 0;          /* TOK_SEP equals to COM_SEP    */
Packit Service 8bf002
    }
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
static size_t   mb_read_2byte(
Packit Service 8bf002
    int     c1,         /* The 1st byte of mbchar sequence (already read)   */
Packit Service 8bf002
    char ** in_pp,              /* Pointer to input     */
Packit Service 8bf002
    char ** out_pp              /* Pointer to output    */
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Multi-byte character reading routine for 2-byte encodings.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    int     error = FALSE;
Packit Service 8bf002
    size_t  len = 0;    /* Number of multi-byte characters read.    */
Packit Service 8bf002
    char *  in_p = *in_pp;
Packit Service 8bf002
    char *  out_p = *out_pp;
Packit Service 8bf002
Packit Service 8bf002
    if (! (char_type[ c1 & UCHARMAX] & mbstart))
Packit Service 8bf002
        return  MB_ERROR;           /* Not a multi-byte character   */
Packit Service 8bf002
Packit Service 8bf002
    do {
Packit Service 8bf002
        if (! (char_type[ (*out_p++ = *in_p++) & UCHARMAX] & mb2)) {
Packit Service 8bf002
            error = TRUE;
Packit Service 8bf002
            break;
Packit Service 8bf002
        }
Packit Service 8bf002
        len++;
Packit Service 8bf002
    } while (char_type[ (*out_p++ = *in_p++) & UCHARMAX] & mbstart);
Packit Service 8bf002
    *in_pp = --in_p;
Packit Service 8bf002
    *(--out_p) = EOS;
Packit Service 8bf002
    *out_pp = out_p;
Packit Service 8bf002
    return  error ? (len | MB_ERROR) : len;
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
static size_t   mb_read_iso2022_jp(
Packit Service 8bf002
    int     c1, /* The 1st byte of the sequence already read (always 0x1b). */
Packit Service 8bf002
    char ** in_pp,
Packit Service 8bf002
    char ** out_pp
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Multi-byte character reading routine for ISO2022_JP.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    int     error = FALSE;
Packit Service 8bf002
    size_t  len = 0;
Packit Service 8bf002
    char *  in_p = *in_pp;
Packit Service 8bf002
    char *  out_p = *out_pp;
Packit Service 8bf002
    int     c2, c3, c4;
Packit Service 8bf002
Packit Service 8bf002
    if (! (char_type[ c1 & UCHARMAX] & mbstart))
Packit Service 8bf002
        return  MB_ERROR;
Packit Service 8bf002
Packit Service 8bf002
    do {
Packit Service 8bf002
Packit Service 8bf002
        *out_p++ = c2 = *in_p++;
Packit Service 8bf002
        if (! (char_type[ c2 & UCHARMAX] & IS2)) {
Packit Service 8bf002
            error = TRUE;
Packit Service 8bf002
            break;
Packit Service 8bf002
        }
Packit Service 8bf002
        *out_p++ = c3 = *in_p++;
Packit Service 8bf002
        if (! (char_type[ c3 & UCHARMAX] & IS3)) {
Packit Service 8bf002
            error = TRUE;
Packit Service 8bf002
            break;
Packit Service 8bf002
        }
Packit Service 8bf002
Packit Service 8bf002
        switch (c2) {
Packit Service 8bf002
        case 0x24   :
Packit Service 8bf002
            switch (c3) {
Packit Service 8bf002
            case 0x42   :   /* 0x1b 0x24 0x42:  JIS X 0208-1983 */
Packit Service 8bf002
                break;
Packit Service 8bf002
            case 0x28   :
Packit Service 8bf002
                *out_p++ = c4 = *in_p++;
Packit Service 8bf002
                if (! (char_type[ c4 & UCHARMAX] & IS4))
Packit Service 8bf002
                    error = TRUE;
Packit Service 8bf002
                /* else:    0x1b 0x24 0x28 0x44:    JIS X 0212  */
Packit Service 8bf002
                break;
Packit Service 8bf002
            default :
Packit Service 8bf002
                error = TRUE;
Packit Service 8bf002
            }
Packit Service 8bf002
            break;
Packit Service 8bf002
        case 0x28   :
Packit Service 8bf002
            switch (c3) {
Packit Service 8bf002
            case 0x42   :   /* 0x1b 0x28 0x42:  ASCII   */
Packit Service 8bf002
                c1 = *out_p++ = *in_p++ & UCHARMAX;
Packit Service 8bf002
                continue;
Packit Service 8bf002
            default :
Packit Service 8bf002
                error = TRUE;
Packit Service 8bf002
            }
Packit Service 8bf002
            break;
Packit Service 8bf002
        }
Packit Service 8bf002
        if (error)
Packit Service 8bf002
            break;
Packit Service 8bf002
Packit Service 8bf002
        while (char_type[ c1 = *out_p++ = (*in_p++ & UCHARMAX)] & IJP) {
Packit Service 8bf002
            if (! (char_type[ *out_p++ = (*in_p++ & UCHARMAX)] & IJP)) {
Packit Service 8bf002
                error = TRUE;
Packit Service 8bf002
                break;
Packit Service 8bf002
            }
Packit Service 8bf002
            len++;          /* String of multi-byte characters  */
Packit Service 8bf002
        }
Packit Service 8bf002
        if (error)
Packit Service 8bf002
            break;
Packit Service 8bf002
Packit Service 8bf002
    } while (char_type[ c1] & IS1);     /* 0x1b:    start of shift-sequence */
Packit Service 8bf002
Packit Service 8bf002
    *in_pp = --in_p;
Packit Service 8bf002
    *(--out_p) = EOS;
Packit Service 8bf002
    *out_pp = out_p;
Packit Service 8bf002
    return  error ? (len | MB_ERROR) : len;
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
static size_t   mb_read_utf8(
Packit Service 8bf002
    int     c1,
Packit Service 8bf002
    char ** in_pp,
Packit Service 8bf002
    char ** out_pp
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Multi-byte character reading routine for UTF8.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    int     error = FALSE;
Packit Service 8bf002
    size_t  len = 0;
Packit Service 8bf002
    char *  in_p = *in_pp;
Packit Service 8bf002
    char *  out_p = *out_pp;
Packit Service 8bf002
Packit Service 8bf002
    if (! (char_type[ c1 & UCHARMAX] & mbstart))
Packit Service 8bf002
        return  MB_ERROR;
Packit Service 8bf002
Packit Service 8bf002
    do {
Packit Service 8bf002
        unsigned int    codepoint;
Packit Service 8bf002
        int             i, bytes;
Packit Service 8bf002
Packit Service 8bf002
        if ((char_type[ c1 & UCHARMAX] & U4_1) == U4_1)
Packit Service 8bf002
            bytes = 4;                          /* 4-byte character */
Packit Service 8bf002
        else if ((char_type[ c1 & UCHARMAX] & U3_1) == U3_1)
Packit Service 8bf002
            bytes = 3;                          /* 3-byte character */
Packit Service 8bf002
        else if ((char_type[ c1 & UCHARMAX] & U2_1) == U2_1)
Packit Service 8bf002
            bytes = 2;                          /* 2-byte character */
Packit Service 8bf002
Packit Service 8bf002
        /* Must ensure that the sequence is not reserved as a surrogate */
Packit Service 8bf002
        codepoint = ((2 << (6-bytes)) - 1) & c1;    /* mask off top bits    */
Packit Service 8bf002
Packit Service 8bf002
        /* All bytes left in the sequence must be in 0x80 - 0xBF    */
Packit Service 8bf002
        for (i = bytes - 1; i && !error; i--) {
Packit Service 8bf002
            codepoint = (codepoint << 6) + ((*in_p) & 0x3fU);
Packit Service 8bf002
            if (! (char_type[ (*out_p++ = *in_p++) & UCHARMAX] & UCONT))
Packit Service 8bf002
                error = TRUE;
Packit Service 8bf002
        }
Packit Service 8bf002
Packit Service 8bf002
        /* Check for overlong/underlong sequences */
Packit Service 8bf002
        if ((bytes == 2 && (codepoint < 0x80 || codepoint > 0x7FF))
Packit Service 8bf002
            || (bytes == 3 && (codepoint < 0x800 || codepoint > 0xFFFF))
Packit Service 8bf002
            || (bytes == 4 && (codepoint < 0x10000 || codepoint > 0x10FFFF)))
Packit Service 8bf002
            error = TRUE;
Packit Service 8bf002
        if ((codepoint >= 0xD800 && codepoint <= 0xDFFF)
Packit Service 8bf002
            /* Check for reserved surrogate codepoints */
Packit Service 8bf002
                || (codepoint >= 0xFFFE && codepoint <= 0xFFFF))
Packit Service 8bf002
                /* Illegal  */
Packit Service 8bf002
            error = TRUE;
Packit Service 8bf002
#if 0
Packit Service 8bf002
        printf( "codepoint:0x%x\n", codepoint);
Packit Service 8bf002
#endif
Packit Service 8bf002
        if (error)
Packit Service 8bf002
            break;
Packit Service 8bf002
        len++;
Packit Service 8bf002
    } while (char_type[ (*out_p++ = c1 = *in_p++) & UCHARMAX] & mbstart);
Packit Service 8bf002
                        /* Start of the next multi-byte character   */
Packit Service 8bf002
    *in_pp = --in_p;
Packit Service 8bf002
    *(--out_p) = EOS;
Packit Service 8bf002
    *out_pp = out_p;
Packit Service 8bf002
    return  error ? (len | MB_ERROR) : len;
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
uexpr_t     mb_eval(
Packit Service 8bf002
    char ** seq_pp
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Evaluate the value of a multi-byte character.
Packit Service 8bf002
 * This routine does not check the legality of the sequence.
Packit Service 8bf002
 * This routine is called from eval_char().
Packit Service 8bf002
 * This routine is never called in POST_STD mode.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    char *      seq = *seq_pp;
Packit Service 8bf002
    uexpr_t     val = 0;
Packit Service 8bf002
    int         c, c1;
Packit Service 8bf002
Packit Service 8bf002
    if (! (char_type[ c = *seq++ & UCHARMAX] & mbstart)) {
Packit Service 8bf002
        *seq_pp = seq;
Packit Service 8bf002
        return  c;                  /* Not a multi-byte character   */
Packit Service 8bf002
    }
Packit Service 8bf002
Packit Service 8bf002
    switch (mbchar) {
Packit Service 8bf002
    case EUC_JP :
Packit Service 8bf002
    case GB2312 :
Packit Service 8bf002
    case KSC5601:
Packit Service 8bf002
    case SJIS   :
Packit Service 8bf002
    case BIGFIVE:
Packit Service 8bf002
        val = (c << 8) + (*seq++ & UCHARMAX);
Packit Service 8bf002
        /* Evaluate the 2-byte sequence */
Packit Service 8bf002
        break;
Packit Service 8bf002
    case ISO2022_JP :
Packit Service 8bf002
        if (char_type[ c & UCHARMAX] & IS1) {   /* Skip shift-sequence  */
Packit Service 8bf002
            if (char_type[ c = *seq++ & UCHARMAX] & IS2) {
Packit Service 8bf002
                if (char_type[ c1 = *seq++ & UCHARMAX] & IS3) {
Packit Service 8bf002
                    if (c1 == 0x28)
Packit Service 8bf002
                        seq++;
Packit Service 8bf002
                    if (c == 0x28 && c1 == 0x42) {  /* Shift-out sequence   */
Packit Service 8bf002
                        val = 0;
Packit Service 8bf002
                        break;
Packit Service 8bf002
                    }
Packit Service 8bf002
                    c = *seq++ & UCHARMAX;
Packit Service 8bf002
                }
Packit Service 8bf002
            }
Packit Service 8bf002
        }
Packit Service 8bf002
        val = (c << 8) + (*seq++ & UCHARMAX);       /* Evaluate the 2-bytes */
Packit Service 8bf002
        break;
Packit Service 8bf002
    case UTF8   :   /* Evaluate the sequence of 2, 3 or 4 bytes as it is    */
Packit Service 8bf002
        val = (c << 8) + (*seq++ & UCHARMAX);
Packit Service 8bf002
        if (char_type[ c & UCHARMAX] & U3_1) {
Packit Service 8bf002
            val = (val << 8) + (*seq++ & UCHARMAX);
Packit Service 8bf002
        } else if (char_type[ c & UCHARMAX] & U4_1) {
Packit Service 8bf002
            val = (val << 8) + (*seq++ & UCHARMAX);
Packit Service 8bf002
            val = (val << 8) + (*seq++ & UCHARMAX);
Packit Service 8bf002
        }
Packit Service 8bf002
        break;
Packit Service 8bf002
    }
Packit Service 8bf002
Packit Service 8bf002
    *seq_pp = seq;
Packit Service 8bf002
    return  val;
Packit Service 8bf002
}
Packit Service 8bf002
Packit Service 8bf002
int  last_is_mbchar(
Packit Service 8bf002
    const char *  in,               /* Input physical line          */
Packit Service 8bf002
    int     len                     /* Length of the line minus 2   */
Packit Service 8bf002
)
Packit Service 8bf002
/*
Packit Service 8bf002
 * Return 2, if the last char of the line is second byte of SJIS or BIGFIVE,
Packit Service 8bf002
 * else return 0.
Packit Service 8bf002
 */
Packit Service 8bf002
{
Packit Service 8bf002
    const char *    cp = in + len;
Packit Service 8bf002
    const char * const      endp = in + len;    /* -> the char befor '\n'   */
Packit Service 8bf002
Packit Service 8bf002
    if ((mbchar & (SJIS | BIGFIVE)) == 0)
Packit Service 8bf002
        return  0;
Packit Service 8bf002
    while (in <= --cp) {                    /* Search backwardly    */
Packit Service 8bf002
        if ((char_type[ *cp & UCHARMAX] & mbstart) == 0)
Packit Service 8bf002
            break;                  /* Not the first byte of MBCHAR */
Packit Service 8bf002
    }
Packit Service 8bf002
    if ((endp - cp) & 1)
Packit Service 8bf002
        return  0;
Packit Service 8bf002
    else
Packit Service 8bf002
        return  2;
Packit Service 8bf002
}
Packit Service 8bf002