Blame src/charset.h

Packit Service a721b1
/* charset.h -- header file for the charset module.
Packit Service a721b1
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Packit Service a721b1
     National Institute of Advanced Industrial Science and Technology (AIST)
Packit Service a721b1
     Registration Number H15PRO112
Packit Service a721b1
Packit Service a721b1
   This file is part of the m17n library.
Packit Service a721b1
Packit Service a721b1
   The m17n library is free software; you can redistribute it and/or
Packit Service a721b1
   modify it under the terms of the GNU Lesser General Public License
Packit Service a721b1
   as published by the Free Software Foundation; either version 2.1 of
Packit Service a721b1
   the License, or (at your option) any later version.
Packit Service a721b1
Packit Service a721b1
   The m17n library is distributed in the hope that it will be useful,
Packit Service a721b1
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service a721b1
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service a721b1
   Lesser General Public License for more details.
Packit Service a721b1
Packit Service a721b1
   You should have received a copy of the GNU Lesser General Public
Packit Service a721b1
   License along with the m17n library; if not, write to the Free
Packit Service a721b1
   Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Packit Service a721b1
   Boston, MA 02110-1301 USA.  */
Packit Service a721b1
Packit Service a721b1
#ifndef _M17N_CHARSET_H_
Packit Service a721b1
#define _M17N_CHARSET_H_
Packit Service a721b1
Packit Service a721b1
/** @file charset.h
Packit Service a721b1
    @brief Header for charset handlers.
Packit Service a721b1
*/
Packit Service a721b1
Packit Service a721b1
enum mcharset_method
Packit Service a721b1
  {
Packit Service a721b1
    MCHARSET_METHOD_OFFSET,
Packit Service a721b1
    MCHARSET_METHOD_MAP,
Packit Service a721b1
    MCHARSET_METHOD_DEFERRED,
Packit Service a721b1
    MCHARSET_METHOD_SUBSET,
Packit Service a721b1
    MCHARSET_METHOD_SUPERSET,
Packit Service a721b1
    MCHARSET_METHOD_MAX
Packit Service a721b1
  };
Packit Service a721b1
Packit Service a721b1
/** Structure for charset.  */
Packit Service a721b1
Packit Service a721b1
typedef struct MCharset MCharset;
Packit Service a721b1
Packit Service a721b1
struct MCharset
Packit Service a721b1
{
Packit Service a721b1
  /** The value is always 0 because all charsets are static.  */
Packit Service a721b1
  unsigned ref_count;
Packit Service a721b1
Packit Service a721b1
  /** Symbol indicating the name of the charset.  */
Packit Service a721b1
  MSymbol name;
Packit Service a721b1
Packit Service a721b1
  /** Number of dimensions of the charset.  It must be 1, 2, 3, or
Packit Service a721b1
      4.  */
Packit Service a721b1
  int dimension;
Packit Service a721b1
Packit Service a721b1
  /** Byte code range of each dimension.  <code_range>[4N] is a
Packit Service a721b1
      minimum byte code of the (N+1)th dimension, <code_range>[4N+1]
Packit Service a721b1
      is a maximum byte code of the (N+1)th dimension,
Packit Service a721b1
      <code_range>[4N+2] is (<code_range>[4N+1] - <code_range>[4N] +
Packit Service a721b1
      1), <code_range>[4N+3] is a number of characters contained in the
Packit Service a721b1
      first to (N+1)th dimensions.  We get "char-index" of a
Packit Service a721b1
      "code-point" from this information.  */
Packit Service a721b1
  int code_range[16];
Packit Service a721b1
Packit Service a721b1
  /** The minimum code-point calculated from <code_range>.  It may be
Packit Service a721b1
      smaller than <min_code>.  */
Packit Service a721b1
  int code_range_min_code;
Packit Service a721b1
Packit Service a721b1
  /** Nonzero means there is no gap in code points of the charset.  If
Packit Service a721b1
      <dimension> is 1, <no_code_gap> is always 1.  Otherwise,
Packit Service a721b1
      <no_code_gap> is 1 iff <code_range>[4N] is zero and
Packit Service a721b1
      <code_range>[4N+1] is 256 for N = 0..<dimension>-2.  If
Packit Service a721b1
      <no_code_gap> is nonzero, "char-index" is "code-point" -
Packit Service a721b1
      <min_code>.  */
Packit Service a721b1
  int no_code_gap;
Packit Service a721b1
Packit Service a721b1
  /** If the byte code B is valid in the (N+1)th dimension,
Packit Service a721b1
      (<code_range_mask>[B] & (1 << N)) is 1.  Otherwise,
Packit Service a721b1
      (<code_range_mask>[B] & (1 << N)) is 0.  */
Packit Service a721b1
  unsigned char code_range_mask[256];
Packit Service a721b1
Packit Service a721b1
  /** Minimum and maximum code-point of the charset.  */
Packit Service a721b1
  unsigned min_code, max_code;
Packit Service a721b1
Packit Service a721b1
  /** Nonzero means the charset encodes ASCII characters as is.  */
Packit Service a721b1
  int ascii_compatible;
Packit Service a721b1
Packit Service a721b1
  /** Minimum and maximum character of the charset.  If
Packit Service a721b1
      <ascii_compatible> is nonzero, <min_char> is actually the
Packit Service a721b1
      minimum non-ASCII character of the charset.  */
Packit Service a721b1
  int min_char, max_char;
Packit Service a721b1
Packit Service a721b1
  /** ISO 2022 final byte of the charset.  It must be in the range
Packit Service a721b1
      48..127, or -1.  The value -1 means that the charset is not
Packit Service a721b1
      encodable by ISO 2022 based coding systems.  */
Packit Service a721b1
  int final_byte;
Packit Service a721b1
Packit Service a721b1
  /** ISO 2022 revision number of the charset, or -1.  The value -1
Packit Service a721b1
      means that the charset has no revision number.  Used only when
Packit Service a721b1
      <final_byte> is not -1.  */
Packit Service a721b1
  int revision;
Packit Service a721b1
Packit Service a721b1
  /** Specify how to encode/decode code-point of the charset.  It must
Packit Service a721b1
      be Moffset, Mmap, Munify, Msubset, or Msuperset.  */
Packit Service a721b1
  MSymbol method;
Packit Service a721b1
Packit Service a721b1
  /** Array of integers to decode a code-point of the charset.  It is
Packit Service a721b1
      indexed by a "char-index" of the code-point, and the
Packit Service a721b1
      corresponding element is a character of the charset, or -1 if
Packit Service a721b1
      the code point is not valid in the charset.  Used only when
Packit Service a721b1
      <method> is Mmap or Munify.  */
Packit Service a721b1
  int *decoder;
Packit Service a721b1
Packit Service a721b1
  /** Char-table to encode a character of the charset.  It is indexed
Packit Service a721b1
      by a character code, and the corresponding element is a code
Packit Service a721b1
      point of the character in the charset, or
Packit Service a721b1
      MCHAR_INVALID_CODE if the character is not included in the
Packit Service a721b1
      charset.  Used only when <method> is Mmap or Munify.  */
Packit Service a721b1
  MCharTable *encoder;
Packit Service a721b1
Packit Service a721b1
  int unified_max;
Packit Service a721b1
Packit Service a721b1
  /** Array of pointers to parent charsets.  Used only when <method>
Packit Service a721b1
      is Msubset or Msuperset.  Atmost 8 parents are supported.  */
Packit Service a721b1
  MCharset *parents[8];
Packit Service a721b1
Packit Service a721b1
  /* Number of parent charsets.  */
Packit Service a721b1
  int nparents;
Packit Service a721b1
Packit Service a721b1
  unsigned subset_min_code, subset_max_code;
Packit Service a721b1
  int subset_offset;
Packit Service a721b1
Packit Service a721b1
  int simple;
Packit Service a721b1
Packit Service a721b1
  /** If the charset is fully loaded (i.e. all the above member are
Packit Service a721b1
      set to correct values), the value is 1.  Otherwise, the value is
Packit Service a721b1
      0.  */
Packit Service a721b1
  int fully_loaded;
Packit Service a721b1
};
Packit Service a721b1
Packit Service a721b1
extern MPlist *mcharset__cache;
Packit Service a721b1
Packit Service a721b1
/** Return a charset associated with the symbol CHARSET_SYM.  */
Packit Service a721b1
Packit Service a721b1
#define MCHARSET(charset_sym)					\
Packit Service a721b1
  (((charset_sym) == MPLIST_KEY (mcharset__cache)		\
Packit Service a721b1
    || (MPLIST_KEY (mcharset__cache) = (charset_sym),		\
Packit Service a721b1
	MPLIST_VAL (mcharset__cache)				\
Packit Service a721b1
	= (MCharset *) msymbol_get ((charset_sym), Mcharset)))	\
Packit Service a721b1
   ? MPLIST_VAL (mcharset__cache)				\
Packit Service a721b1
   : mcharset__find (charset_sym))
Packit Service a721b1
Packit Service a721b1
Packit Service a721b1
/** Return index of a character whose code-point in CHARSET is CODE.
Packit Service a721b1
    If CODE is not valid, return -1.  */
Packit Service a721b1
Packit Service a721b1
#define CODE_POINT_TO_INDEX(charset, code)				\
Packit Service a721b1
  ((charset)->no_code_gap						\
Packit Service a721b1
   ? (code) - (charset)->min_code					\
Packit Service a721b1
   : (((charset)->code_range_mask[(code) >> 24] & 0x8)			\
Packit Service a721b1
      && ((charset)->code_range_mask[((code) >> 16) & 0xFF] & 0x4)	\
Packit Service a721b1
      && ((charset)->code_range_mask[((code) >> 8) & 0xFF] & 0x2)	\
Packit Service a721b1
      && ((charset)->code_range_mask[(code) & 0xFF] & 0x1))		\
Packit Service a721b1
   ? (((((code) >> 24) - (charset)->code_range[12])			\
Packit Service a721b1
       * (charset)->code_range[11])					\
Packit Service a721b1
      + (((((code) >> 16) & 0xFF) - (charset)->code_range[8])		\
Packit Service a721b1
	 * (charset)->code_range[7])					\
Packit Service a721b1
      + (((((code) >> 8) & 0xFF) - (charset)->code_range[4])		\
Packit Service a721b1
	 * (charset)->code_range[3])					\
Packit Service a721b1
      + (((code) & 0xFF) - (charset)->code_range[0])			\
Packit Service a721b1
      - ((charset)->min_code - (charset)->code_range_min_code))		\
Packit Service a721b1
   : -1)
Packit Service a721b1
Packit Service a721b1
Packit Service a721b1
/* Return code-point of a character whose index is IDX.  
Packit Service a721b1
   The validness of IDX is not checked.  IDX may be modified.  */
Packit Service a721b1
Packit Service a721b1
#define INDEX_TO_CODE_POINT(charset, idx)				     \
Packit Service a721b1
  ((charset)->no_code_gap						     \
Packit Service a721b1
   ? (idx) + (charset)->min_code					     \
Packit Service a721b1
   : (idx += (charset)->min_code - (charset)->code_range_min_code,	     \
Packit Service a721b1
      (((charset)->code_range[0] + (idx) % (charset)->code_range[2])	     \
Packit Service a721b1
       | (((charset)->code_range[4]					     \
Packit Service a721b1
	   + ((idx) / (charset)->code_range[3] % (charset)->code_range[6]))  \
Packit Service a721b1
	  << 8)								     \
Packit Service a721b1
       | (((charset)->code_range[8]					     \
Packit Service a721b1
	   + ((idx) / (charset)->code_range[7] % (charset)->code_range[10])) \
Packit Service a721b1
	  << 16)							     \
Packit Service a721b1
       | (((charset)->code_range[12] + ((idx) / (charset)->code_range[11]))  \
Packit Service a721b1
	  << 24))))
Packit Service a721b1
Packit Service a721b1
Packit Service a721b1
/** Return a character whose code-point in CHARSET is CODE.  If CODE
Packit Service a721b1
    is invalid, return -1.  */
Packit Service a721b1
Packit Service a721b1
#define DECODE_CHAR(charset, code)					\
Packit Service a721b1
  (((code) < 128 && (charset)->ascii_compatible)			\
Packit Service a721b1
   ? (int) (code)							\
Packit Service a721b1
   : ((code) < (charset)->min_code || (code) > (charset)->max_code)	\
Packit Service a721b1
   ? -1									\
Packit Service a721b1
   : ! (charset)->simple						\
Packit Service a721b1
   ? mcharset__decode_char ((charset), (code))				\
Packit Service a721b1
   : (charset)->method == Moffset					\
Packit Service a721b1
   ? (code) - (charset)->min_code + (charset)->min_char			\
Packit Service a721b1
   : (charset)->decoder[(code) - (charset)->min_code])
Packit Service a721b1
Packit Service a721b1
Packit Service a721b1
/** Return a code-point in CHARSET for character C.  If CHARSET
Packit Service a721b1
    does not contain C, return MCHAR_INVALID_CODE.  */
Packit Service a721b1
Packit Service a721b1
#define ENCODE_CHAR(charset, c)					\
Packit Service a721b1
  (! (charset)->simple						\
Packit Service a721b1
   ? mcharset__encode_char ((charset), (c))			\
Packit Service a721b1
   : ((c) < (charset)->min_char || (c) > (charset)->max_char)	\
Packit Service a721b1
   ? MCHAR_INVALID_CODE						\
Packit Service a721b1
   : (charset)->method == Moffset				\
Packit Service a721b1
   ? (c) - (charset)->min_char + (charset)->min_code		\
Packit Service a721b1
   : (unsigned) mchartable_lookup ((charset)->encoder, (c)))
Packit Service a721b1
Packit Service a721b1
Packit Service a721b1
extern MCharset *mcharset__ascii;
Packit Service a721b1
extern MCharset *mcharset__binary;
Packit Service a721b1
extern MCharset *mcharset__m17n;
Packit Service a721b1
extern MCharset *mcharset__unicode;
Packit Service a721b1
Packit Service a721b1
#define ISO_MAX_DIMENSION 3
Packit Service a721b1
#define ISO_MAX_CHARS 2
Packit Service a721b1
#define ISO_MAX_FINAL 0x80	/* only 0x30..0xFF are used */
Packit Service a721b1
Packit Service a721b1
typedef struct
Packit Service a721b1
{
Packit Service a721b1
  /* Table of ISO-2022 charsets.  */
Packit Service a721b1
  int size, inc, used;
Packit Service a721b1
  MCharset **charsets;
Packit Service a721b1
Packit Service a721b1
  /** A 3-dimensional table indexed by "dimension", "chars", and
Packit Service a721b1
      "final byte" of an ISO-2022 charset to get the correponding
Packit Service a721b1
      charset.  A charset that has a revision number is not stored in
Packit Service a721b1
      this table.  */
Packit Service a721b1
  MCharset *classified[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
Packit Service a721b1
} MCharsetISO2022Table;
Packit Service a721b1
Packit Service a721b1
extern MCharsetISO2022Table mcharset__iso_2022_table;
Packit Service a721b1
Packit Service a721b1
#define MCHARSET_ISO_2022(dim, chars, final) \
Packit Service a721b1
  mcharset__iso_2022_table.classified[(dim) - 1][(chars) == 96][(final)]
Packit Service a721b1
Packit Service a721b1
extern MCharset *mcharset__find (MSymbol name);
Packit Service a721b1
extern int mcharset__decode_char (MCharset *charset, unsigned code);
Packit Service a721b1
extern unsigned mcharset__encode_char (MCharset *charset, int c);
Packit Service a721b1
extern int mcharset__load_from_database ();
Packit Service a721b1
Packit Service a721b1
#endif /* _M17N_CHARSET_H_ */