|
Packit Service |
a721b1 |
/* charset.h -- header file for the charset module.
|
|
Packit Service |
a721b1 |
Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
|
Packit Service |
a721b1 |
National Institute of Advanced Industrial Science and Technology (AIST)
|
|
Packit Service |
a721b1 |
Registration Number H15PRO112
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
This file is part of the m17n library.
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
The m17n library is free software; you can redistribute it and/or
|
|
Packit Service |
a721b1 |
modify it under the terms of the GNU Lesser General Public License
|
|
Packit Service |
a721b1 |
as published by the Free Software Foundation; either version 2.1 of
|
|
Packit Service |
a721b1 |
the License, or (at your option) any later version.
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
The m17n library is distributed in the hope that it will be useful,
|
|
Packit Service |
a721b1 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit Service |
a721b1 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit Service |
a721b1 |
Lesser General Public License for more details.
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
You should have received a copy of the GNU Lesser General Public
|
|
Packit Service |
a721b1 |
License along with the m17n library; if not, write to the Free
|
|
Packit Service |
a721b1 |
Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
Packit Service |
a721b1 |
Boston, MA 02110-1301 USA. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#ifndef _M17N_CHARSET_H_
|
|
Packit Service |
a721b1 |
#define _M17N_CHARSET_H_
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** @file charset.h
|
|
Packit Service |
a721b1 |
@brief Header for charset handlers.
|
|
Packit Service |
a721b1 |
*/
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
enum mcharset_method
|
|
Packit Service |
a721b1 |
{
|
|
Packit Service |
a721b1 |
MCHARSET_METHOD_OFFSET,
|
|
Packit Service |
a721b1 |
MCHARSET_METHOD_MAP,
|
|
Packit Service |
a721b1 |
MCHARSET_METHOD_DEFERRED,
|
|
Packit Service |
a721b1 |
MCHARSET_METHOD_SUBSET,
|
|
Packit Service |
a721b1 |
MCHARSET_METHOD_SUPERSET,
|
|
Packit Service |
a721b1 |
MCHARSET_METHOD_MAX
|
|
Packit Service |
a721b1 |
};
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Structure for charset. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
typedef struct MCharset MCharset;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
struct MCharset
|
|
Packit Service |
a721b1 |
{
|
|
Packit Service |
a721b1 |
/** The value is always 0 because all charsets are static. */
|
|
Packit Service |
a721b1 |
unsigned ref_count;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Symbol indicating the name of the charset. */
|
|
Packit Service |
a721b1 |
MSymbol name;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Number of dimensions of the charset. It must be 1, 2, 3, or
|
|
Packit Service |
a721b1 |
4. */
|
|
Packit Service |
a721b1 |
int dimension;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Byte code range of each dimension. <code_range>[4N] is a
|
|
Packit Service |
a721b1 |
minimum byte code of the (N+1)th dimension, <code_range>[4N+1]
|
|
Packit Service |
a721b1 |
is a maximum byte code of the (N+1)th dimension,
|
|
Packit Service |
a721b1 |
<code_range>[4N+2] is (<code_range>[4N+1] - <code_range>[4N] +
|
|
Packit Service |
a721b1 |
1), <code_range>[4N+3] is a number of characters contained in the
|
|
Packit Service |
a721b1 |
first to (N+1)th dimensions. We get "char-index" of a
|
|
Packit Service |
a721b1 |
"code-point" from this information. */
|
|
Packit Service |
a721b1 |
int code_range[16];
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** The minimum code-point calculated from <code_range>. It may be
|
|
Packit Service |
a721b1 |
smaller than <min_code>. */
|
|
Packit Service |
a721b1 |
int code_range_min_code;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Nonzero means there is no gap in code points of the charset. If
|
|
Packit Service |
a721b1 |
<dimension> is 1, <no_code_gap> is always 1. Otherwise,
|
|
Packit Service |
a721b1 |
<no_code_gap> is 1 iff <code_range>[4N] is zero and
|
|
Packit Service |
a721b1 |
<code_range>[4N+1] is 256 for N = 0..<dimension>-2. If
|
|
Packit Service |
a721b1 |
<no_code_gap> is nonzero, "char-index" is "code-point" -
|
|
Packit Service |
a721b1 |
<min_code>. */
|
|
Packit Service |
a721b1 |
int no_code_gap;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** If the byte code B is valid in the (N+1)th dimension,
|
|
Packit Service |
a721b1 |
(<code_range_mask>[B] & (1 << N)) is 1. Otherwise,
|
|
Packit Service |
a721b1 |
(<code_range_mask>[B] & (1 << N)) is 0. */
|
|
Packit Service |
a721b1 |
unsigned char code_range_mask[256];
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Minimum and maximum code-point of the charset. */
|
|
Packit Service |
a721b1 |
unsigned min_code, max_code;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Nonzero means the charset encodes ASCII characters as is. */
|
|
Packit Service |
a721b1 |
int ascii_compatible;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Minimum and maximum character of the charset. If
|
|
Packit Service |
a721b1 |
<ascii_compatible> is nonzero, <min_char> is actually the
|
|
Packit Service |
a721b1 |
minimum non-ASCII character of the charset. */
|
|
Packit Service |
a721b1 |
int min_char, max_char;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** ISO 2022 final byte of the charset. It must be in the range
|
|
Packit Service |
a721b1 |
48..127, or -1. The value -1 means that the charset is not
|
|
Packit Service |
a721b1 |
encodable by ISO 2022 based coding systems. */
|
|
Packit Service |
a721b1 |
int final_byte;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** ISO 2022 revision number of the charset, or -1. The value -1
|
|
Packit Service |
a721b1 |
means that the charset has no revision number. Used only when
|
|
Packit Service |
a721b1 |
<final_byte> is not -1. */
|
|
Packit Service |
a721b1 |
int revision;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Specify how to encode/decode code-point of the charset. It must
|
|
Packit Service |
a721b1 |
be Moffset, Mmap, Munify, Msubset, or Msuperset. */
|
|
Packit Service |
a721b1 |
MSymbol method;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Array of integers to decode a code-point of the charset. It is
|
|
Packit Service |
a721b1 |
indexed by a "char-index" of the code-point, and the
|
|
Packit Service |
a721b1 |
corresponding element is a character of the charset, or -1 if
|
|
Packit Service |
a721b1 |
the code point is not valid in the charset. Used only when
|
|
Packit Service |
a721b1 |
<method> is Mmap or Munify. */
|
|
Packit Service |
a721b1 |
int *decoder;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Char-table to encode a character of the charset. It is indexed
|
|
Packit Service |
a721b1 |
by a character code, and the corresponding element is a code
|
|
Packit Service |
a721b1 |
point of the character in the charset, or
|
|
Packit Service |
a721b1 |
MCHAR_INVALID_CODE if the character is not included in the
|
|
Packit Service |
a721b1 |
charset. Used only when <method> is Mmap or Munify. */
|
|
Packit Service |
a721b1 |
MCharTable *encoder;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
int unified_max;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Array of pointers to parent charsets. Used only when <method>
|
|
Packit Service |
a721b1 |
is Msubset or Msuperset. Atmost 8 parents are supported. */
|
|
Packit Service |
a721b1 |
MCharset *parents[8];
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/* Number of parent charsets. */
|
|
Packit Service |
a721b1 |
int nparents;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
unsigned subset_min_code, subset_max_code;
|
|
Packit Service |
a721b1 |
int subset_offset;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
int simple;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** If the charset is fully loaded (i.e. all the above member are
|
|
Packit Service |
a721b1 |
set to correct values), the value is 1. Otherwise, the value is
|
|
Packit Service |
a721b1 |
0. */
|
|
Packit Service |
a721b1 |
int fully_loaded;
|
|
Packit Service |
a721b1 |
};
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
extern MPlist *mcharset__cache;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Return a charset associated with the symbol CHARSET_SYM. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define MCHARSET(charset_sym) \
|
|
Packit Service |
a721b1 |
(((charset_sym) == MPLIST_KEY (mcharset__cache) \
|
|
Packit Service |
a721b1 |
|| (MPLIST_KEY (mcharset__cache) = (charset_sym), \
|
|
Packit Service |
a721b1 |
MPLIST_VAL (mcharset__cache) \
|
|
Packit Service |
a721b1 |
= (MCharset *) msymbol_get ((charset_sym), Mcharset))) \
|
|
Packit Service |
a721b1 |
? MPLIST_VAL (mcharset__cache) \
|
|
Packit Service |
a721b1 |
: mcharset__find (charset_sym))
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Return index of a character whose code-point in CHARSET is CODE.
|
|
Packit Service |
a721b1 |
If CODE is not valid, return -1. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define CODE_POINT_TO_INDEX(charset, code) \
|
|
Packit Service |
a721b1 |
((charset)->no_code_gap \
|
|
Packit Service |
a721b1 |
? (code) - (charset)->min_code \
|
|
Packit Service |
a721b1 |
: (((charset)->code_range_mask[(code) >> 24] & 0x8) \
|
|
Packit Service |
a721b1 |
&& ((charset)->code_range_mask[((code) >> 16) & 0xFF] & 0x4) \
|
|
Packit Service |
a721b1 |
&& ((charset)->code_range_mask[((code) >> 8) & 0xFF] & 0x2) \
|
|
Packit Service |
a721b1 |
&& ((charset)->code_range_mask[(code) & 0xFF] & 0x1)) \
|
|
Packit Service |
a721b1 |
? (((((code) >> 24) - (charset)->code_range[12]) \
|
|
Packit Service |
a721b1 |
* (charset)->code_range[11]) \
|
|
Packit Service |
a721b1 |
+ (((((code) >> 16) & 0xFF) - (charset)->code_range[8]) \
|
|
Packit Service |
a721b1 |
* (charset)->code_range[7]) \
|
|
Packit Service |
a721b1 |
+ (((((code) >> 8) & 0xFF) - (charset)->code_range[4]) \
|
|
Packit Service |
a721b1 |
* (charset)->code_range[3]) \
|
|
Packit Service |
a721b1 |
+ (((code) & 0xFF) - (charset)->code_range[0]) \
|
|
Packit Service |
a721b1 |
- ((charset)->min_code - (charset)->code_range_min_code)) \
|
|
Packit Service |
a721b1 |
: -1)
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/* Return code-point of a character whose index is IDX.
|
|
Packit Service |
a721b1 |
The validness of IDX is not checked. IDX may be modified. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define INDEX_TO_CODE_POINT(charset, idx) \
|
|
Packit Service |
a721b1 |
((charset)->no_code_gap \
|
|
Packit Service |
a721b1 |
? (idx) + (charset)->min_code \
|
|
Packit Service |
a721b1 |
: (idx += (charset)->min_code - (charset)->code_range_min_code, \
|
|
Packit Service |
a721b1 |
(((charset)->code_range[0] + (idx) % (charset)->code_range[2]) \
|
|
Packit Service |
a721b1 |
| (((charset)->code_range[4] \
|
|
Packit Service |
a721b1 |
+ ((idx) / (charset)->code_range[3] % (charset)->code_range[6])) \
|
|
Packit Service |
a721b1 |
<< 8) \
|
|
Packit Service |
a721b1 |
| (((charset)->code_range[8] \
|
|
Packit Service |
a721b1 |
+ ((idx) / (charset)->code_range[7] % (charset)->code_range[10])) \
|
|
Packit Service |
a721b1 |
<< 16) \
|
|
Packit Service |
a721b1 |
| (((charset)->code_range[12] + ((idx) / (charset)->code_range[11])) \
|
|
Packit Service |
a721b1 |
<< 24))))
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Return a character whose code-point in CHARSET is CODE. If CODE
|
|
Packit Service |
a721b1 |
is invalid, return -1. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define DECODE_CHAR(charset, code) \
|
|
Packit Service |
a721b1 |
(((code) < 128 && (charset)->ascii_compatible) \
|
|
Packit Service |
a721b1 |
? (int) (code) \
|
|
Packit Service |
a721b1 |
: ((code) < (charset)->min_code || (code) > (charset)->max_code) \
|
|
Packit Service |
a721b1 |
? -1 \
|
|
Packit Service |
a721b1 |
: ! (charset)->simple \
|
|
Packit Service |
a721b1 |
? mcharset__decode_char ((charset), (code)) \
|
|
Packit Service |
a721b1 |
: (charset)->method == Moffset \
|
|
Packit Service |
a721b1 |
? (code) - (charset)->min_code + (charset)->min_char \
|
|
Packit Service |
a721b1 |
: (charset)->decoder[(code) - (charset)->min_code])
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** Return a code-point in CHARSET for character C. If CHARSET
|
|
Packit Service |
a721b1 |
does not contain C, return MCHAR_INVALID_CODE. */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define ENCODE_CHAR(charset, c) \
|
|
Packit Service |
a721b1 |
(! (charset)->simple \
|
|
Packit Service |
a721b1 |
? mcharset__encode_char ((charset), (c)) \
|
|
Packit Service |
a721b1 |
: ((c) < (charset)->min_char || (c) > (charset)->max_char) \
|
|
Packit Service |
a721b1 |
? MCHAR_INVALID_CODE \
|
|
Packit Service |
a721b1 |
: (charset)->method == Moffset \
|
|
Packit Service |
a721b1 |
? (c) - (charset)->min_char + (charset)->min_code \
|
|
Packit Service |
a721b1 |
: (unsigned) mchartable_lookup ((charset)->encoder, (c)))
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
extern MCharset *mcharset__ascii;
|
|
Packit Service |
a721b1 |
extern MCharset *mcharset__binary;
|
|
Packit Service |
a721b1 |
extern MCharset *mcharset__m17n;
|
|
Packit Service |
a721b1 |
extern MCharset *mcharset__unicode;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define ISO_MAX_DIMENSION 3
|
|
Packit Service |
a721b1 |
#define ISO_MAX_CHARS 2
|
|
Packit Service |
a721b1 |
#define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
typedef struct
|
|
Packit Service |
a721b1 |
{
|
|
Packit Service |
a721b1 |
/* Table of ISO-2022 charsets. */
|
|
Packit Service |
a721b1 |
int size, inc, used;
|
|
Packit Service |
a721b1 |
MCharset **charsets;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
/** A 3-dimensional table indexed by "dimension", "chars", and
|
|
Packit Service |
a721b1 |
"final byte" of an ISO-2022 charset to get the correponding
|
|
Packit Service |
a721b1 |
charset. A charset that has a revision number is not stored in
|
|
Packit Service |
a721b1 |
this table. */
|
|
Packit Service |
a721b1 |
MCharset *classified[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
|
|
Packit Service |
a721b1 |
} MCharsetISO2022Table;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
extern MCharsetISO2022Table mcharset__iso_2022_table;
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#define MCHARSET_ISO_2022(dim, chars, final) \
|
|
Packit Service |
a721b1 |
mcharset__iso_2022_table.classified[(dim) - 1][(chars) == 96][(final)]
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
extern MCharset *mcharset__find (MSymbol name);
|
|
Packit Service |
a721b1 |
extern int mcharset__decode_char (MCharset *charset, unsigned code);
|
|
Packit Service |
a721b1 |
extern unsigned mcharset__encode_char (MCharset *charset, int c);
|
|
Packit Service |
a721b1 |
extern int mcharset__load_from_database ();
|
|
Packit Service |
a721b1 |
|
|
Packit Service |
a721b1 |
#endif /* _M17N_CHARSET_H_ */
|