/*
* Copyright 1992, 1993 by TOSHIBA Corp.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose and without fee is hereby granted, provided
* that the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of TOSHIBA not be used in advertising
* or publicity pertaining to distribution of the software without specific,
* written prior permission. TOSHIBA make no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* TOSHIBA DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
* TOSHIBA BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Katsuhisa Yano TOSHIBA Corp.
* mopi@osa.ilab.toshiba.co.jp
*/
/*
* Copyright 1995 by FUJITSU LIMITED
* This is source code modified by FUJITSU LIMITED under the Joint
* Development Agreement for the CDE/Motif PST.
*
* Modifier: Takanori Tateno FUJITSU LIMITED
*
*/
/*
* 2000
* Modifier: Ivan Pascal The XFree86 Project
* Modifier: Bruno Haible The XFree86 Project
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "Xlibint.h"
#include "XlcPubI.h"
#include <X11/Xos.h>
#include <stdio.h>
/* ====================== Built-in Character Sets ====================== */
/*
* Static representation of a character set that can be used in Compound Text.
*/
typedef struct _CTDataRec {
const char name[19];
const char ct_sequence[5]; /* Compound Text encoding, ESC sequence */
} CTDataRec, *CTData;
static const CTDataRec default_ct_data[] =
{
/* */
/* X11 registry name MIME name ISO-IR ESC sequence */
/* */
/* Registered character sets with one byte per character */
{ "ISO8859-1:GL", /* US-ASCII 6 */ "\033(B" },
{ "ISO8859-1:GR", /* ISO-8859-1 100 */ "\033-A" },
{ "ISO8859-2:GR", /* ISO-8859-2 101 */ "\033-B" },
{ "ISO8859-3:GR", /* ISO-8859-3 109 */ "\033-C" },
{ "ISO8859-4:GR", /* ISO-8859-4 110 */ "\033-D" },
{ "ISO8859-5:GR", /* ISO-8859-5 144 */ "\033-L" },
{ "ISO8859-6:GR", /* ISO-8859-6 127 */ "\033-G" },
{ "ISO8859-7:GR", /* ISO-8859-7 126 */ "\033-F" },
{ "ISO8859-8:GR", /* ISO-8859-8 138 */ "\033-H" },
{ "ISO8859-9:GR", /* ISO-8859-9 148 */ "\033-M" },
{ "ISO8859-10:GR", /* ISO-8859-10 157 */ "\033-V" },
{ "ISO8859-11:GR", /* ISO-8859-11 166 */ "\033-T" },
{ "ISO8859-13:GR", /* ISO-8859-13 179 */ "\033-Y" },
{ "ISO8859-14:GR", /* ISO-8859-14 199 */ "\033-_" },
{ "ISO8859-15:GR", /* ISO-8859-15 203 */ "\033-b" },
{ "ISO8859-16:GR", /* ISO-8859-16 226 */ "\033-f" },
{ "JISX0201.1976-0:GL", /* ISO-646-JP 14 */ "\033(J" },
{ "JISX0201.1976-0:GR", "\033)I" },
#if 0
{ "TIS620-0:GR", /* TIS-620 166 */ "\033-T" },
#endif
/* Registered character sets with two byte per character */
{ "GB2312.1980-0:GL", /* GB_2312-80 58 */ "\033$(A" },
{ "GB2312.1980-0:GR", /* GB_2312-80 58 */ "\033$)A" },
{ "JISX0208.1983-0:GL", /* JIS_X0208-1983 87 */ "\033$(B" },
{ "JISX0208.1983-0:GR", /* JIS_X0208-1983 87 */ "\033$)B" },
{ "JISX0208.1990-0:GL", /* JIS_X0208-1990 168 */ "\033$(B" },
{ "JISX0208.1990-0:GR", /* JIS_X0208-1990 168 */ "\033$)B" },
{ "JISX0212.1990-0:GL", /* JIS_X0212-1990 159 */ "\033$(D" },
{ "JISX0212.1990-0:GR", /* JIS_X0212-1990 159 */ "\033$)D" },
{ "KSC5601.1987-0:GL", /* KS_C_5601-1987 149 */ "\033$(C" },
{ "KSC5601.1987-0:GR", /* KS_C_5601-1987 149 */ "\033$)C" },
{ "CNS11643.1986-1:GL", /* CNS 11643-1992 pl.1 171 */ "\033$(G" },
{ "CNS11643.1986-1:GR", /* CNS 11643-1992 pl.1 171 */ "\033$)G" },
{ "CNS11643.1986-2:GL", /* CNS 11643-1992 pl.2 172 */ "\033$(H" },
{ "CNS11643.1986-2:GR", /* CNS 11643-1992 pl.2 172 */ "\033$)H" },
{ "CNS11643.1992-3:GL", /* CNS 11643-1992 pl.3 183 */ "\033$(I" },
{ "CNS11643.1992-3:GR", /* CNS 11643-1992 pl.3 183 */ "\033$)I" },
{ "CNS11643.1992-4:GL", /* CNS 11643-1992 pl.4 184 */ "\033$(J" },
{ "CNS11643.1992-4:GR", /* CNS 11643-1992 pl.4 184 */ "\033$)J" },
{ "CNS11643.1992-5:GL", /* CNS 11643-1992 pl.5 185 */ "\033$(K" },
{ "CNS11643.1992-5:GR", /* CNS 11643-1992 pl.5 185 */ "\033$)K" },
{ "CNS11643.1992-6:GL", /* CNS 11643-1992 pl.6 186 */ "\033$(L" },
{ "CNS11643.1992-6:GR", /* CNS 11643-1992 pl.6 186 */ "\033$)L" },
{ "CNS11643.1992-7:GL", /* CNS 11643-1992 pl.7 187 */ "\033$(M" },
{ "CNS11643.1992-7:GR", /* CNS 11643-1992 pl.7 187 */ "\033$)M" },
/* Registered encodings with a varying number of bytes per character */
{ "ISO10646-1", /* UTF-8 196 */ "\033%G" },
/* Encodings without ISO-IR assigned escape sequence must be
defined in XLC_LOCALE files, using "\033%/1" or "\033%/2". */
/* Backward compatibility with XFree86 3.x */
#if 1
{ "ISO8859-14:GR", "\033%/1" },
{ "ISO8859-15:GR", "\033%/1" },
#endif
/* For use by utf8 -> ctext */
{ "BIG5-0:GLGR", "\033%/2"},
{ "BIG5HKSCS-0:GLGR", "\033%/2"},
{ "GBK-0:GLGR", "\033%/2"},
/* used by Emacs, but not backed by ISO-IR */
{ "BIG5-E0:GL", "\033$(0" },
{ "BIG5-E0:GR", "\033$)0" },
{ "BIG5-E1:GL", "\033$(1" },
{ "BIG5-E1:GR", "\033$)1" },
};
/* We represent UTF-8 as an XlcGLGR charset, not in extended segments. */
#define UTF8_IN_EXTSEQ 0
/* ======================= Parsing ESC Sequences ======================= */
#define XctC0 0x0000
#define XctHT 0x0009
#define XctNL 0x000a
#define XctESC 0x001b
#define XctGL 0x0020
#define XctC1 0x0080
#define XctCSI 0x009b
#define XctGR 0x00a0
#define XctSTX 0x0002
#define XctCntrlFunc 0x0023
#define XctMB 0x0024
#define XctOtherCoding 0x0025
#define XctGL94 0x0028
#define XctGR94 0x0029
#define XctGR96 0x002d
#define XctNonStandard 0x002f
#define XctIgnoreExt 0x0030
#define XctNotIgnoreExt 0x0031
#define XctLeftToRight 0x0031
#define XctRightToLeft 0x0032
#define XctDirection 0x005d
#define XctDirectionEnd 0x005d
#define XctGL94MB 0x2428
#define XctGR94MB 0x2429
#define XctExtSeg 0x252f
#define XctReturn 0x2540
/*
* Parses the header of a Compound Text segment, i.e. the charset designator.
* The string starts at *text and has *length bytes.
* Return value is one of:
* 0 (no valid charset designator),
* XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB,
* XctLeftToRight, XctRightToLeft, XctDirectionEnd,
* XctExtSeg, XctOtherCoding, XctReturn, XctIgnoreExt, XctNotIgnoreExt.
* If the return value is not 0, *text is incremented and *length decremented,
* to point past the charset designator. If the return value is one of
* XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB,
* XctExtSeg, XctOtherCoding, XctIgnoreExt, XctNotIgnoreExt,
* *final_byte is set to the "final byte" of the charset designator.
*/
static unsigned int
_XlcParseCT(
const char **text,
int *length,
unsigned char *final_byte)
{
unsigned int ret = 0;
unsigned char ch;
const unsigned char *str = (const unsigned char *) *text;
*final_byte = 0;
if (*length < 1)
return 0;
switch (ch = *str++) {
case XctESC:
if (*length < 2)
return 0;
switch (ch = *str++) {
case XctOtherCoding: /* % */
if (*length < 3)
return 0;
ch = *str++;
if (ch == XctNonStandard) { /* / */
if (*length < 4)
return 0;
ret = XctExtSeg;
ch = *str++;
} else if (ch == '@') {
ret = XctReturn;
} else {
ret = XctOtherCoding;
}
*final_byte = ch;
break;
case XctCntrlFunc: /* # */
if (*length < 4)
return 0;
*final_byte = *str++;
switch (*str++) {
case XctIgnoreExt: /* 0 */
ret = XctIgnoreExt;
break;
case XctNotIgnoreExt: /* 1 */
ret = XctNotIgnoreExt;
break;
default:
ret = 0;
break;
}
break;
case XctMB: /* $ */
if (*length < 4)
return 0;
ch = *str++;
switch (ch) {
case XctGL94: /* ( */
ret = XctGL94MB;
break;
case XctGR94: /* ) */
ret = XctGR94MB;
break;
default:
ret = 0;
break;
}
*final_byte = *str++;
break;
case XctGL94: /* ( */
if (*length < 3)
return 0;
ret = XctGL94;
*final_byte = *str++;
break;
case XctGR94: /* ) */
if (*length < 3)
return 0;
ret = XctGR94;
*final_byte = *str++;
break;
case XctGR96: /* - */
if (*length < 3)
return 0;
ret = XctGR96;
*final_byte = *str++;
break;
}
break;
case XctCSI:
/* direction */
if (*length < 2)
return 0;
switch (*str++) {
case XctLeftToRight:
if (*length < 3)
return 0;
if (*str++ == XctDirection)
ret = XctLeftToRight;
break;
case XctRightToLeft:
if (*length < 3)
return 0;
if (*str++ == XctDirection)
ret = XctRightToLeft;
break;
case XctDirectionEnd:
ret = XctDirectionEnd;
break;
}
break;
}
if (ret) {
*length -= (const char *) str - *text;
*text = (const char *) str;
}
return ret;
}
/*
* Fills into a freshly created XlcCharSet the fields that can be inferred
* from the ESC sequence. These are side, char_size, set_size.
* Returns True if the charset can be used with Compound Text.
*
* Used by _XlcCreateDefaultCharSet.
*/
Bool
_XlcParseCharSet(
XlcCharSet charset)
{
unsigned int type;
unsigned char final_byte;
const char *ptr = charset->ct_sequence;
int length;
int char_size;
if (*ptr == '\0')
return False;
length = strlen(ptr);
type = _XlcParseCT(&ptr, &length, &final_byte);
/* Check for validity and determine char_size.
char_size = 0 means varying number of bytes per character. */
switch (type) {
case XctGL94:
case XctGR94:
case XctGR96:
char_size = 1;
break;
case XctGL94MB:
case XctGR94MB:
char_size = (final_byte < 0x60 ? 2 : final_byte < 0x70 ? 3 : 4);
break;
case XctExtSeg:
char_size = final_byte - '0';
if (!(char_size >= 0 && char_size <= 4))
return False;
break;
case XctOtherCoding:
char_size = 0;
break;
default:
return False;
}
charset->char_size = char_size;
/* Fill in other values. */
switch (type) {
case XctGL94:
case XctGL94MB:
charset->side = XlcGL;
charset->set_size = 94;
break;
case XctGR94:
case XctGR94MB:
charset->side = XlcGR;
charset->set_size = 94;
break;
case XctGR96:
charset->side = XlcGR;
charset->set_size = 96;
break;
case XctExtSeg:
case XctOtherCoding:
charset->side = XlcGLGR;
charset->set_size = 0;
break;
}
return True;
}
/* =============== Management of the List of Character Sets =============== */
/*
* Representation of a character set that can be used for Compound Text,
* at run time.
* Note: This information is not contained in the XlcCharSet, because
* multiple ESC sequences may be used for the same XlcCharSet.
*/
typedef struct _CTInfoRec {
XlcCharSet charset;
const char *ct_sequence; /* Compound Text ESC sequence */
unsigned int type;
unsigned char final_byte;
/* If type == XctExtSeg: */
const char *ext_segment; /* extended segment name, then '\002' */
int ext_segment_len; /* length of above, including final '\002' */
struct _CTInfoRec *next;
} CTInfoRec, *CTInfo;
/*
* List of character sets that can be used for Compound Text,
* Includes all that are listed in default_ct_data, but more can be added
* at runtime through _XlcAddCT.
*/
static CTInfo ct_list = NULL;
static CTInfo ct_list_end = NULL;
/*
* Returns a Compound Text info record for an ESC sequence.
* The first part of the ESC sequence has already been parsed into 'type'
* and 'final_byte'. The remainder starts at 'text', at least 'text_len'
* bytes (only used if type == XctExtSeg).
*/
static CTInfo
_XlcGetCTInfo(
unsigned int type,
unsigned char final_byte,
const char *text,
int text_len)
{
CTInfo ct_info;
for (ct_info = ct_list; ct_info; ct_info = ct_info->next)
if (ct_info->type == type
&& ct_info->final_byte == final_byte
&& (type != XctExtSeg
|| (text_len >= ct_info->ext_segment_len
&& memcmp(text, ct_info->ext_segment,
ct_info->ext_segment_len) == 0)))
return ct_info;
return (CTInfo) NULL;
}
/* Returns the Compound Text info for a given XlcCharSet.
Returns NULL if none is found. */
static CTInfo
_XlcGetCTInfoFromCharSet(
XlcCharSet charset)
{
CTInfo ct_info;
for (ct_info = ct_list; ct_info; ct_info = ct_info->next)
if (ct_info->charset == charset)
return ct_info;
return (CTInfo) NULL;
}
/* Creates a new XlcCharSet, given its name (including side suffix) and
Compound Text ESC sequence (normally at most 4 bytes), and makes it
eligible for Compound Text processing. */
XlcCharSet
_XlcAddCT(
const char *name,
const char *ct_sequence)
{
CTInfo ct_info, existing_info;
XlcCharSet charset;
const char *ct_ptr;
int length;
unsigned int type;
unsigned char final_byte;
charset = _XlcGetCharSet(name);
if (charset != NULL) {
/* Even if the charset already exists, it is OK to register a second
Compound Text sequence for it. */
} else {
/* Attempt to create the charset. */
charset = _XlcCreateDefaultCharSet(name, ct_sequence);
if (charset == NULL)
return (XlcCharSet) NULL;
_XlcAddCharSet(charset);
}
/* Allocate a CTinfo record. */
length = strlen(ct_sequence);
ct_info = Xmalloc(sizeof(CTInfoRec) + length+1);
if (ct_info == NULL)
return charset;
ct_info->charset = charset;
ct_info->ct_sequence = strcpy((char *) (ct_info + 1), ct_sequence);
/* Parse the Compound Text sequence. */
ct_ptr = ct_sequence;
type = _XlcParseCT(&ct_ptr, &length, &final_byte);
ct_info->type = type;
ct_info->final_byte = final_byte;
switch (type) {
case XctGL94:
case XctGR94:
case XctGR96:
case XctGL94MB:
case XctGR94MB:
case XctOtherCoding:
ct_info->ext_segment = NULL;
ct_info->ext_segment_len = 0;
break;
case XctExtSeg: {
/* By convention, the extended segment name is the encoding_name
in lowercase. */
const char *q = charset->encoding_name;
int n = strlen(q);
char *p;
/* Ensure ct_info->ext_segment_len <= 0x3fff - 6. */
if (n > 0x3fff - 6 - 1) {
Xfree(ct_info);
return charset;
}
p = Xmalloc(n+1);
if (p == NULL) {
Xfree(ct_info);
return charset;
}
ct_info->ext_segment = p;
ct_info->ext_segment_len = n+1;
for ( ; n > 0; p++, q++, n--)
*p = (*q >= 'A' && *q <= 'Z' ? *q - 'A' + 'a' : *q);
*p = XctSTX;
break;
}
default:
Xfree(ct_info);
return (XlcCharSet) NULL;
}
/* Insert it into the list, if not already present. */
existing_info =
_XlcGetCTInfo(type, ct_info->final_byte,
ct_info->ext_segment, ct_info->ext_segment_len);
if (existing_info == NULL) {
/* Insert it at the end. If there are duplicates CTinfo entries
for the same XlcCharSet, we want the first (standard) one to
override the second (user defined) one. */
ct_info->next = NULL;
if (ct_list_end)
ct_list_end->next = ct_info;
else
ct_list = ct_info;
ct_list_end = ct_info;
} else {
if (existing_info->charset != charset
/* We have a conflict, with one exception: JISX0208.1983-0 and
JISX0208.1990-0 are the same for all practical purposes. */
&& !(strncmp(existing_info->charset->name, "JISX0208", 8) == 0
&& strncmp(charset->name, "JISX0208", 8) == 0)) {
fprintf(stderr,
"Xlib: charsets %s and %s have the same CT sequence\n",
charset->name, existing_info->charset->name);
if (strcmp(charset->ct_sequence, ct_sequence) == 0)
charset->ct_sequence = "";
}
Xfree(ct_info);
}
return charset;
}
/* ========== Converters String <--> CharSet <--> Compound Text ========== */
/*
* Structure representing the parse state of a Compound Text string.
*/
typedef struct _StateRec {
XlcCharSet charset; /* The charset of the current segment */
XlcCharSet GL_charset; /* The charset responsible for 0x00..0x7F */
XlcCharSet GR_charset; /* The charset responsible for 0x80..0xFF */
XlcCharSet Other_charset; /* != NULL if currently in an other segment */
int ext_seg_left; /* > 0 if currently in an extended segment */
} StateRec, *State;
/* Subroutine for parsing an ESC sequence. */
typedef enum {
resOK, /* Charset saved in 'state', sequence skipped */
resNotInList, /* Charset not found, sequence skipped */
resNotCTSeq /* EscSeq not recognized, pointers not changed */
} CheckResult;
static CheckResult
_XlcCheckCTSequence(
State state,
const char **ctext,
int *ctext_len)
{
XlcCharSet charset;
CTInfo ct_info;
const char *tmp_ctext = *ctext;
int tmp_ctext_len = *ctext_len;
unsigned int type;
unsigned char final_byte;
int ext_seg_left = 0;
/* Check for validity. */
type = _XlcParseCT(&tmp_ctext, &tmp_ctext_len, &final_byte);
switch (type) {
case XctGL94:
case XctGR94:
case XctGR96:
case XctGL94MB:
case XctGR94MB:
case XctOtherCoding:
*ctext = tmp_ctext;
*ctext_len = tmp_ctext_len;
break;
case XctReturn:
*ctext = tmp_ctext;
*ctext_len = tmp_ctext_len;
state->Other_charset = NULL;
return resOK;
case XctExtSeg:
if (tmp_ctext_len > 2
&& (tmp_ctext[0] & 0x80) && (tmp_ctext[0] & 0x80)) {
unsigned int msb = tmp_ctext[0] & 0x7f;
unsigned int lsb = tmp_ctext[1] & 0x7f;
ext_seg_left = (msb << 7) + lsb;
if (ext_seg_left <= tmp_ctext_len - 2) {
*ctext = tmp_ctext + 2;
*ctext_len = tmp_ctext_len - 2;
break;
}
}
return resNotCTSeq;
default:
return resNotCTSeq;
}
ct_info = _XlcGetCTInfo(type, final_byte, *ctext, ext_seg_left);
if (ct_info) {
charset = ct_info->charset;
state->ext_seg_left = ext_seg_left;
if (type == XctExtSeg) {
state->charset = charset;
/* Skip past the extended segment name and the separator. */
*ctext += ct_info->ext_segment_len;
*ctext_len -= ct_info->ext_segment_len;
state->ext_seg_left -= ct_info->ext_segment_len;
} else if (type == XctOtherCoding) {
state->Other_charset = charset;
} else {
if (charset->side == XlcGL) {
state->GL_charset = charset;
} else if (charset->side == XlcGR) {
state->GR_charset = charset;
} else {
state->GL_charset = charset;
state->GR_charset = charset;
}
}
return resOK;
} else {
state->ext_seg_left = 0;
if (type == XctExtSeg) {
/* Skip the entire extended segment. */
*ctext += ext_seg_left;
*ctext_len -= ext_seg_left;
}
return resNotInList;
}
}
static void
init_state(
XlcConv conv)
{
State state = (State) conv->state;
static XlcCharSet default_GL_charset = NULL;
static XlcCharSet default_GR_charset = NULL;
if (default_GL_charset == NULL) {
default_GL_charset = _XlcGetCharSet("ISO8859-1:GL");
default_GR_charset = _XlcGetCharSet("ISO8859-1:GR");
}
/* The initial state is ISO-8859-1 on both sides. */
state->GL_charset = state->charset = default_GL_charset;
state->GR_charset = default_GR_charset;
state->Other_charset = NULL;
state->ext_seg_left = 0;
}
/* from XlcNCompoundText to XlcNCharSet */
static int
cttocs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
State state = (State) conv->state;
XlcCharSet charset = NULL;
const char *ctptr;
char *bufptr;
int ctext_len, buf_len;
int unconv_num = 0;
ctptr = (const char *) *from;
bufptr = (char *) *to;
ctext_len = *from_left;
buf_len = *to_left;
while (ctext_len > 0 && buf_len > 0) {
if (state->ext_seg_left == 0) {
/* Not in the middle of an extended segment; look at next byte. */
unsigned char ch = *ctptr;
XlcCharSet ch_charset;
if (ch == XctESC) {
CheckResult ret =
_XlcCheckCTSequence(state, &ctptr, &ctext_len);
if (ret == resOK)
/* state has been modified. */
continue;
if (ret == resNotInList) {
/* XXX Just continue with previous charset. */
unconv_num++;
continue;
}
} else if (ch == XctCSI) {
/* XXX Simply ignore the XctLeftToRight, XctRightToLeft,
XctDirectionEnd sequences for the moment. */
unsigned char dummy;
if (_XlcParseCT(&ctptr, &ctext_len, &dummy)) {
unconv_num++;
continue;
}
}
/* Find the charset which is responsible for this byte. */
ch_charset = (state->Other_charset != NULL ? state->Other_charset :
(ch & 0x80 ? state->GR_charset : state->GL_charset));
/* Set the charset of this run, or continue the current run,
or stop the current run. */
if (charset) {
if (charset != ch_charset)
break;
} else {
state->charset = charset = ch_charset;
}
/* We don't want to split a character into multiple pieces. */
if (buf_len < 6) {
if (charset->char_size > 0) {
if (buf_len < charset->char_size)
break;
} else {
/* char_size == 0 is tricky. The code here is good only
for valid UTF-8 input. */
if (charset->ct_sequence[0] == XctESC
&& charset->ct_sequence[1] == XctOtherCoding
&& charset->ct_sequence[2] == 'G') {
int char_size = (ch < 0xc0 ? 1 :
ch < 0xe0 ? 2 :
ch < 0xf0 ? 3 :
ch < 0xf8 ? 4 :
ch < 0xfc ? 5 :
6);
if (buf_len < char_size)
break;
}
}
}
*bufptr++ = *ctptr++;
ctext_len--;
buf_len--;
} else {
/* Copy as much as possible from the current extended segment
to the buffer. */
int char_size;
/* Set the charset of this run, or continue the current run,
or stop the current run. */
if (charset) {
if (charset != state->charset)
break;
} else {
charset = state->charset;
}
char_size = charset->char_size;
if (state->ext_seg_left <= buf_len || char_size > 0) {
int n = (state->ext_seg_left <= buf_len
? state->ext_seg_left
: (buf_len / char_size) * char_size);
memcpy(bufptr, ctptr, n);
ctptr += n; ctext_len -= n;
bufptr += n; buf_len -= n;
state->ext_seg_left -= n;
} else {
#if UTF8_IN_EXTSEQ
/* char_size == 0 is tricky. The code here is good only
for valid UTF-8 input. */
if (strcmp(charset->name, "ISO10646-1") == 0) {
unsigned char ch = *ctptr;
int char_size = (ch < 0xc0 ? 1 :
ch < 0xe0 ? 2 :
ch < 0xf0 ? 3 :
ch < 0xf8 ? 4 :
ch < 0xfc ? 5 :
6);
int i;
if (buf_len < char_size)
break;
/* A small loop is faster than calling memcpy. */
for (i = char_size; i > 0; i--)
*bufptr++ = *ctptr++;
ctext_len -= char_size;
buf_len -= char_size;
state->ext_seg_left -= char_size;
} else
#endif
{
/* Here ctext_len >= state->ext_seg_left > buf_len.
We may be splitting a character into multiple pieces.
Oh well. */
int n = buf_len;
memcpy(bufptr, ctptr, n);
ctptr += n; ctext_len -= n;
bufptr += n; buf_len -= n;
state->ext_seg_left -= n;
}
}
}
}
/* 'charset' is the charset for the current run. In some cases,
'state->charset' contains the charset for the next run. Therefore,
return 'charset'.
'charset' may still be NULL only if no output was produced. */
if (num_args > 0)
*((XlcCharSet *) args[0]) = charset;
*from_left -= ctptr - *((const char **) from);
*from = (XPointer) ctptr;
*to_left -= bufptr - *((char **) to);
*to = (XPointer) bufptr;
return unconv_num;
}
/* from XlcNCharSet to XlcNCompoundText */
static int
cstoct(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
State state = (State) conv->state;
XlcSide side;
unsigned char min_ch = 0, max_ch = 0;
int length, unconv_num;
CTInfo ct_info;
XlcCharSet charset;
const char *csptr;
char *ctptr;
int csstr_len, ct_len;
char *ext_segment_start;
int char_size;
/* One argument is required, of type XlcCharSet. */
if (num_args < 1)
return -1;
csptr = *((const char **) from);
ctptr = *((char **) to);
csstr_len = *from_left;
ct_len = *to_left;
charset = (XlcCharSet) args[0];
ct_info = _XlcGetCTInfoFromCharSet(charset);
if (ct_info == NULL)
return -1;
side = charset->side;
length = strlen(ct_info->ct_sequence);
ext_segment_start = NULL;
if (ct_info->type == XctOtherCoding) {
/* Output the Escape sequence for switching to the charset, and
reserve room now for the XctReturn sequence at the end. */
if (ct_len < length + 3)
return -1;
memcpy(ctptr, ct_info->ct_sequence, length);
ctptr += length;
ct_len -= length + 3;
} else
/* Test whether the charset is already active. */
if (((side == XlcGR || side == XlcGLGR)
&& charset != state->GR_charset)
|| ((side == XlcGL || side == XlcGLGR)
&& charset != state->GL_charset)) {
/* Output the Escape sequence for switching to the charset. */
if (ct_info->type == XctExtSeg) {
if (ct_len < length + 2 + ct_info->ext_segment_len)
return -1;
memcpy(ctptr, ct_info->ct_sequence, length);
ctptr += length;
ct_len -= length;
ctptr += 2;
ct_len -= 2;
ext_segment_start = ctptr;
/* The size of an extended segment must fit in 14 bits. */
if (ct_len > 0x3fff)
ct_len = 0x3fff;
memcpy(ctptr, ct_info->ext_segment, ct_info->ext_segment_len);
ctptr += ct_info->ext_segment_len;
ct_len -= ct_info->ext_segment_len;
} else {
if (ct_len < length)
return -1;
memcpy(ctptr, ct_info->ct_sequence, length);
ctptr += length;
ct_len -= length;
}
}
/* If the charset has side GL or GR, prepare remapping the characters
to the correct side. */
if (charset->set_size) {
min_ch = 0x20;
max_ch = 0x7f;
if (charset->set_size == 94) {
max_ch--;
if (charset->char_size > 1 || side == XlcGR)
min_ch++;
}
}
/* Actually copy the contents. */
unconv_num = 0;
char_size = charset->char_size;
if (char_size == 1) {
while (csstr_len > 0 && ct_len > 0) {
if (charset->set_size) {
/* The CompoundText specification says that the only
control characters allowed are 0x09, 0x0a, 0x1b, 0x9b.
Therefore here we eliminate other control characters. */
unsigned char ch = *((const unsigned char *) csptr) & 0x7f;
if (!((ch >= min_ch && ch <= max_ch)
|| (side == XlcGL
&& (ch == 0x00 || ch == 0x09 || ch == 0x0a))
|| ((side == XlcGL || side == XlcGR)
&& (ch == 0x1b)))) {
csptr++;
csstr_len--;
unconv_num++;
continue;
}
}
if (side == XlcGL)
*ctptr++ = *csptr++ & 0x7f;
else if (side == XlcGR)
*ctptr++ = *csptr++ | 0x80;
else
*ctptr++ = *csptr++;
csstr_len--;
ct_len--;
}
} else if (char_size > 1) {
while (csstr_len >= char_size && ct_len >= char_size) {
if (side == XlcGL) {
int i;
for (i = char_size; i > 0; i--)
*ctptr++ = *csptr++ & 0x7f;
} else if (side == XlcGR) {
int i;
for (i = char_size; i > 0; i--)
*ctptr++ = *csptr++ | 0x80;
} else {
int i;
for (i = char_size; i > 0; i--)
*ctptr++ = *csptr++;
}
csstr_len -= char_size;
ct_len -= char_size;
}
} else {
/* char_size = 0. The code here is good only for valid UTF-8 input. */
if ((charset->ct_sequence[0] == XctESC
&& charset->ct_sequence[1] == XctOtherCoding
&& charset->ct_sequence[2] == 'G')
#if UTF8_IN_EXTSEQ
|| strcmp(charset->name, "ISO10646-1") == 0
#endif
) {
while (csstr_len > 0 && ct_len > 0) {
unsigned char ch = * (const unsigned char *) csptr;
int ch_size = (ch < 0xc0 ? 1 :
ch < 0xe0 ? 2 :
ch < 0xf0 ? 3 :
ch < 0xf8 ? 4 :
ch < 0xfc ? 5 :
6);
int i;
if (!(csstr_len >= ch_size && ct_len >= ch_size))
break;
for (i = ch_size; i > 0; i--)
*ctptr++ = *csptr++;
csstr_len -= ch_size;
ct_len -= ch_size;
}
} else {
while (csstr_len > 0 && ct_len > 0) {
*ctptr++ = *csptr++;
csstr_len--;
ct_len--;
}
}
}
if (ct_info->type == XctOtherCoding) {
/* Terminate with an XctReturn sequence. */
ctptr[0] = XctESC;
ctptr[1] = XctOtherCoding;
ctptr[2] = '@';
ctptr += 3;
} else if (ext_segment_start != NULL) {
/* Backpatch the extended segment's length. */
int ext_segment_length = ctptr - ext_segment_start;
*(ext_segment_start - 2) = (ext_segment_length >> 7) | 0x80;
*(ext_segment_start - 1) = (ext_segment_length & 0x7f) | 0x80;
} else {
if (side == XlcGR || side == XlcGLGR)
state->GR_charset = charset;
if (side == XlcGL || side == XlcGLGR)
state->GL_charset = charset;
}
*from_left -= csptr - *((const char **) from);
*from = (XPointer) csptr;
*to_left -= ctptr - *((char **) to);
*to = (XPointer) ctptr;
return 0;
}
/* from XlcNString to XlcNCharSet */
static int
strtocs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
State state = (State) conv->state;
const char *src;
char *dst;
unsigned char side;
int length;
src = (const char *) *from;
dst = (char *) *to;
length = min(*from_left, *to_left);
side = *((const unsigned char *) src) & 0x80;
while (side == (*((const unsigned char *) src) & 0x80) && length-- > 0)
*dst++ = *src++;
*from_left -= src - (const char *) *from;
*from = (XPointer) src;
*to_left -= dst - (char *) *to;
*to = (XPointer) dst;
if (num_args > 0)
*((XlcCharSet *)args[0]) = (side ? state->GR_charset : state->GL_charset);
return 0;
}
/* from XlcNCharSet to XlcNString */
static int
cstostr(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
State state = (State) conv->state;
const char *csptr;
char *string_ptr;
int csstr_len, str_len;
unsigned char ch;
int unconv_num = 0;
/* This converter can only convert from ISO8859-1:GL and ISO8859-1:GR. */
if (num_args < 1
|| !((XlcCharSet) args[0] == state->GL_charset
|| (XlcCharSet) args[0] == state->GR_charset))
return -1;
csptr = *((const char **) from);
string_ptr = *((char **) to);
csstr_len = *from_left;
str_len = *to_left;
while (csstr_len > 0 && str_len > 0) {
ch = *((const unsigned char *) csptr++);
csstr_len--;
/* Citing ICCCM: "STRING as a type specifies the ISO Latin-1 character
set plus the control characters TAB and NEWLINE." */
if ((ch < 0x20 && ch != 0x00 && ch != 0x09 && ch != 0x0a)
|| (ch >= 0x7f && ch < 0xa0)) {
unconv_num++;
continue;
}
*((unsigned char *) string_ptr++) = ch;
str_len--;
}
*from_left -= csptr - *((const char **) from);
*from = (XPointer) csptr;
*to_left -= string_ptr - *((char **) to);
*to = (XPointer) string_ptr;
return unconv_num;
}
static XlcConv
create_conv(
XlcConvMethods methods)
{
XlcConv conv;
conv = Xmalloc(sizeof(XlcConvRec) + sizeof(StateRec));
if (conv == NULL)
return (XlcConv) NULL;
conv->state = (XPointer) &conv[1];
conv->methods = methods;
init_state(conv);
return conv;
}
static void
close_converter(
XlcConv conv)
{
/* conv->state is allocated together with conv, free both at once. */
Xfree(conv);
}
static XlcConvMethodsRec cttocs_methods = {
close_converter,
cttocs,
init_state
};
static XlcConv
open_cttocs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(&cttocs_methods);
}
static XlcConvMethodsRec cstoct_methods = {
close_converter,
cstoct,
init_state
};
static XlcConv
open_cstoct(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(&cstoct_methods);
}
static XlcConvMethodsRec strtocs_methods = {
close_converter,
strtocs,
init_state
};
static XlcConv
open_strtocs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(&strtocs_methods);
}
static XlcConvMethodsRec cstostr_methods = {
close_converter,
cstostr,
init_state
};
static XlcConv
open_cstostr(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(&cstostr_methods);
}
/* =========================== Initialization =========================== */
Bool
_XlcInitCTInfo(void)
{
if (ct_list == NULL) {
const CTDataRec *ct_data;
int num;
XlcCharSet charset;
/* Initialize ct_list. */
num = sizeof(default_ct_data) / sizeof(CTDataRec);
for (ct_data = default_ct_data; num > 0; ct_data++, num--) {
charset = _XlcAddCT(ct_data->name, ct_data->ct_sequence);
if (charset == NULL)
continue;
if (strncmp(charset->ct_sequence, "\x1b\x25\x2f", 3) != 0)
charset->source = CSsrcStd;
else
charset->source = CSsrcXLC;
}
/* Register CompoundText and CharSet converters. */
_XlcSetConverter((XLCd) NULL, XlcNCompoundText,
(XLCd) NULL, XlcNCharSet,
open_cttocs);
_XlcSetConverter((XLCd) NULL, XlcNString,
(XLCd) NULL, XlcNCharSet,
open_strtocs);
_XlcSetConverter((XLCd) NULL, XlcNCharSet,
(XLCd) NULL, XlcNCompoundText,
open_cstoct);
_XlcSetConverter((XLCd) NULL, XlcNCharSet,
(XLCd) NULL, XlcNString,
open_cstostr);
}
return True;
}