|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* $LynxId: UCAux.c,v 1.51 2016/04/17 22:18:15 tom Exp $
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#include <HTUtils.h>
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#include <HTCJK.h>
|
|
Packit |
f574b8 |
#include <UCMap.h>
|
|
Packit |
f574b8 |
#include <UCDefs.h>
|
|
Packit |
f574b8 |
#include <HTStream.h>
|
|
Packit |
f574b8 |
#include <UCAux.h>
|
|
Packit |
f574b8 |
#include <LYCharSets.h>
|
|
Packit |
f574b8 |
#include <LYCurses.h>
|
|
Packit |
f574b8 |
#include <LYStrings.h>
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
BOOL UCCanUniTranslateFrom(int from)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
if (from < 0)
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
#ifndef EXP_JAPANESEUTF8_SUPPORT
|
|
Packit |
f574b8 |
if (LYCharSet_UC[from].enc == UCT_ENC_CJK)
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
if (!strcmp(LYCharSet_UC[from].MIMEname, "x-transparent"))
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/* others YES */
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
BOOL UCCanTranslateUniTo(int to)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
if (to < 0)
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
return YES; /* well at least some characters... */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
BOOL UCCanTranslateFromTo(int from,
|
|
Packit |
f574b8 |
int to)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
if (from == to)
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
if (from < 0 || to < 0)
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
if (from == LATIN1)
|
|
Packit |
f574b8 |
return UCCanTranslateUniTo(to);
|
|
Packit |
f574b8 |
if (to == LATIN1 || LYCharSet_UC[to].enc == UCT_ENC_UTF8)
|
|
Packit |
f574b8 |
return UCCanUniTranslateFrom(from);
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *fromname = LYCharSet_UC[from].MIMEname;
|
|
Packit |
f574b8 |
const char *toname = LYCharSet_UC[to].MIMEname;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!strcmp(fromname, "x-transparent") ||
|
|
Packit |
f574b8 |
!strcmp(toname, "x-transparent")) {
|
|
Packit |
f574b8 |
return YES; /* ??? */
|
|
Packit |
f574b8 |
} else if (!strcmp(fromname, "us-ascii")) {
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (LYCharSet_UC[from].enc == UCT_ENC_CJK) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* CJK mode may be off (i.e., !IS_CJK_TTY) because the current
|
|
Packit |
f574b8 |
* document is not CJK, but the check may be for capability in
|
|
Packit |
f574b8 |
* relation to another document, for which CJK mode might be turned
|
|
Packit |
f574b8 |
* on when retrieved. Thus, when the from charset is CJK, check if
|
|
Packit |
f574b8 |
* the to charset is CJK, and return NO or YES in relation to that.
|
|
Packit |
f574b8 |
* - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (LYCharSet_UC[to].enc != UCT_ENC_CJK)
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
if ((!strcmp(toname, "euc-jp") ||
|
|
Packit |
f574b8 |
!strcmp(toname, "shift_jis")) &&
|
|
Packit |
f574b8 |
(!strcmp(fromname, "euc-jp") ||
|
|
Packit |
f574b8 |
!strcmp(fromname, "shift_jis")))
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* The euc-cn and euc-kr charsets were handled by the (from == to)
|
|
Packit |
f574b8 |
* above, so we need not check those. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
return YES; /* others YES */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Returns YES if no translation necessary (because
|
|
Packit |
f574b8 |
* charsets are equal, are equivalent, etc.).
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
BOOL UCNeedNotTranslate(int from,
|
|
Packit |
f574b8 |
int to)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
const char *fromname;
|
|
Packit |
f574b8 |
const char *toname;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (from == to)
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
if (from < 0)
|
|
Packit |
f574b8 |
return NO; /* ??? */
|
|
Packit |
f574b8 |
if (LYCharSet_UC[from].enc == UCT_ENC_7BIT) {
|
|
Packit |
f574b8 |
return YES; /* Only 7bit chars. */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
fromname = LYCharSet_UC[from].MIMEname;
|
|
Packit |
f574b8 |
if (!strcmp(fromname, "x-transparent") ||
|
|
Packit |
f574b8 |
!strcmp(fromname, "us-ascii")) {
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (to < 0)
|
|
Packit |
f574b8 |
return NO; /* ??? */
|
|
Packit |
f574b8 |
if (to == LATIN1) {
|
|
Packit |
f574b8 |
if (LYCharSet_UC[from].codepoints & (UCT_CP_SUBSETOF_LAT1))
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
toname = LYCharSet_UC[to].MIMEname;
|
|
Packit |
f574b8 |
if (!strcmp(toname, "x-transparent")) {
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (LYCharSet_UC[to].enc == UCT_ENC_UTF8) {
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (from == LATIN1) {
|
|
Packit |
f574b8 |
if (LYCharSet_UC[from].codepoints & (UCT_CP_SUPERSETOF_LAT1))
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (LYCharSet_UC[from].enc == UCT_ENC_CJK) {
|
|
Packit |
f574b8 |
if (!IS_CJK_TTY) /* Use that global flag, for now. */
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
if (HTCJK == JAPANESE &&
|
|
Packit |
f574b8 |
(!strcmp(fromname, "euc-jp") ||
|
|
Packit |
f574b8 |
!strcmp(fromname, "shift_jis")))
|
|
Packit |
f574b8 |
return YES; /* translate internally by lynx, no unicode */
|
|
Packit |
f574b8 |
return NO; /* If not handled by (from == to) above. */
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* The idea here is that any stage of the stream pipe which is interested
|
|
Packit |
f574b8 |
* in some charset dependent processing will call this function.
|
|
Packit |
f574b8 |
* Given input and output charsets, this function will set various flags
|
|
Packit |
f574b8 |
* in a UCTransParams structure that _suggest_ to the caller what to do.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* Should be called once when a stage starts processing text (and the
|
|
Packit |
f574b8 |
* input and output charsets are known), or whenever one of input or
|
|
Packit |
f574b8 |
* output charsets has changed (e.g., by SGML.c stage after HTML.c stage
|
|
Packit |
f574b8 |
* has processed a META tag).
|
|
Packit |
f574b8 |
* The global flags (LYRawMode, HTPassEightBitRaw etc.) are currently
|
|
Packit |
f574b8 |
* not taken into account here (except for HTCJK, somewhat), it's still
|
|
Packit |
f574b8 |
* up to the caller to do something about them. - KW
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
void UCSetTransParams(UCTransParams * pT, int cs_in,
|
|
Packit |
f574b8 |
const LYUCcharset *p_in,
|
|
Packit |
f574b8 |
int cs_out,
|
|
Packit |
f574b8 |
const LYUCcharset *p_out)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
CTRACE((tfp, "UCSetTransParams: from %s(%d) to %s(%d)\n",
|
|
Packit |
f574b8 |
p_in->MIMEname, UCGetLYhndl_byMIME(p_in->MIMEname),
|
|
Packit |
f574b8 |
p_out->MIMEname, UCGetLYhndl_byMIME(p_out->MIMEname)));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Initialize this element to FALSE, and set it TRUE below if we're dealing
|
|
Packit |
f574b8 |
* with VISCII. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->trans_C0_to_uni = FALSE;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* The "transparent" display character set is a "super raw mode". - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->transp = (BOOL) (!strcmp(p_in->MIMEname, "x-transparent") ||
|
|
Packit |
f574b8 |
!strcmp(p_out->MIMEname, "x-transparent"));
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* UCS-2 is handled as a special case in SGML_write().
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->ucs_mode = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (pT->transp) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Set up the structure for "transparent". - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->do_cjk = FALSE;
|
|
Packit |
f574b8 |
pT->decode_utf8 = FALSE;
|
|
Packit |
f574b8 |
pT->output_utf8 = FALSE; /* We may, but won't know about it. - KW */
|
|
Packit |
f574b8 |
pT->do_8bitraw = TRUE;
|
|
Packit |
f574b8 |
pT->use_raw_char_in = TRUE;
|
|
Packit |
f574b8 |
pT->strip_raw_char_in = FALSE;
|
|
Packit |
f574b8 |
pT->pass_160_173_raw = TRUE;
|
|
Packit |
f574b8 |
pT->repl_translated_C0 = (BOOL) (p_out->enc == UCT_ENC_8BIT_C0);
|
|
Packit |
f574b8 |
pT->trans_C0_to_uni = (BOOL) (p_in->enc == UCT_ENC_8BIT_C0 ||
|
|
Packit |
f574b8 |
p_out->enc == UCT_ENC_8BIT_C0);
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Initialize local flags. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
BOOL intm_ucs = FALSE;
|
|
Packit |
f574b8 |
BOOL use_ucs = FALSE;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Set this element if we want to treat the input as CJK. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->do_cjk = (BOOL) ((p_in->enc == UCT_ENC_CJK) && IS_CJK_TTY);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Set these elements based on whether we are dealing with UTF-8. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->decode_utf8 = (BOOL) (p_in->enc == UCT_ENC_UTF8);
|
|
Packit |
f574b8 |
pT->output_utf8 = (BOOL) (p_out->enc == UCT_ENC_UTF8);
|
|
Packit |
f574b8 |
if (pT->do_cjk) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Set up the structure for a CJK input with
|
|
Packit |
f574b8 |
* a CJK output (IS_CJK_TTY). - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->trans_to_uni = FALSE;
|
|
Packit |
f574b8 |
pT->do_8bitraw = FALSE;
|
|
Packit |
f574b8 |
pT->pass_160_173_raw = TRUE;
|
|
Packit |
f574b8 |
pT->use_raw_char_in = FALSE; /* Not used for CJK. - KW */
|
|
Packit |
f574b8 |
pT->repl_translated_C0 = FALSE;
|
|
Packit |
f574b8 |
pT->trans_from_uni = FALSE; /* Not used for CJK. - KW */
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Set up for all other charset combinations. The intm_ucs flag is
|
|
Packit |
f574b8 |
* set TRUE if the input charset is iso-8859-1 or UTF-8, or largely
|
|
Packit |
f574b8 |
* equivalent to them, i.e., if we have UCS without having to do a
|
|
Packit |
f574b8 |
* table translation.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
intm_ucs = (BOOL) (cs_in == LATIN1 || pT->decode_utf8 ||
|
|
Packit |
f574b8 |
(p_in->codepoints &
|
|
Packit |
f574b8 |
(UCT_CP_SUBSETOF_LAT1 | UCT_CP_SUBSETOF_UCS2)));
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* pT->trans_to_uni is set TRUE if we do not have that as input
|
|
Packit |
f574b8 |
* already, and we can translate to Unicode. Note that UTF-8
|
|
Packit |
f574b8 |
* always is converted to Unicode in functions that use the
|
|
Packit |
f574b8 |
* transformation structure, so it is treated as already Unicode
|
|
Packit |
f574b8 |
* here.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->trans_to_uni = (BOOL) (!intm_ucs &&
|
|
Packit |
f574b8 |
UCCanUniTranslateFrom(cs_in));
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* We set this if we are translating to Unicode and what normally
|
|
Packit |
f574b8 |
* are low value control characters in fact are encoding octets for
|
|
Packit |
f574b8 |
* the input charset (presently, this applies to VISCII). - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->trans_C0_to_uni = (BOOL) (pT->trans_to_uni &&
|
|
Packit |
f574b8 |
p_in->enc == UCT_ENC_8BIT_C0);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* We set this, presently, for VISCII. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->repl_translated_C0 = (BOOL) (p_out->enc == UCT_ENC_8BIT_C0);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Currently unused for any charset combination.
|
|
Packit |
f574b8 |
* Should always be FALSE
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->strip_raw_char_in = FALSE;
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* use_ucs should be set TRUE if we have or will create Unicode
|
|
Packit |
f574b8 |
* values for input octets or UTF multibytes. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
use_ucs = (BOOL) (intm_ucs || pT->trans_to_uni);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This is set TRUE if use_ucs was set FALSE. It is complementary
|
|
Packit |
f574b8 |
* to the HTPassEightBitRaw flag, which is set TRUE or FALSE
|
|
Packit |
f574b8 |
* elsewhere based on the raw mode setting in relation to the
|
|
Packit |
f574b8 |
* current Display Character Set. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->do_8bitraw = (BOOL) (!use_ucs);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This is set TRUE when 160 and 173 should not be treated as nbsp
|
|
Packit |
f574b8 |
* and shy, respectively. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->pass_160_173_raw = (BOOL) (!use_ucs &&
|
|
Packit |
f574b8 |
!(p_in->like8859 & UCT_R_8859SPECL));
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This is set when the input and output charsets match, and they
|
|
Packit |
f574b8 |
* are not ones which should go through a Unicode translation
|
|
Packit |
f574b8 |
* process anyway. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->use_raw_char_in = (BOOL) (!pT->output_utf8 &&
|
|
Packit |
f574b8 |
cs_in == cs_out &&
|
|
Packit |
f574b8 |
!pT->trans_C0_to_uni);
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This should be set TRUE when we expect to have done translation
|
|
Packit |
f574b8 |
* to Unicode or had the equivalent as input, can translate it to
|
|
Packit |
f574b8 |
* our output charset, and normally want to do so. The latter
|
|
Packit |
f574b8 |
* depends on the pT->do_8bitraw and pT->use_raw_char_in values set
|
|
Packit |
f574b8 |
* above, but also on HTPassEightBitRaw in any functions which use
|
|
Packit |
f574b8 |
* the transformation structure.. - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
pT->trans_from_uni = (BOOL) (use_ucs && !pT->do_8bitraw &&
|
|
Packit |
f574b8 |
!pT->use_raw_char_in &&
|
|
Packit |
f574b8 |
UCCanTranslateUniTo(cs_out));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This function initializes the transformation
|
|
Packit |
f574b8 |
* structure by setting all its elements to
|
|
Packit |
f574b8 |
* FALSE. - KW
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
void UCTransParams_clear(UCTransParams * pT)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
pT->transp = FALSE;
|
|
Packit |
f574b8 |
pT->do_cjk = FALSE;
|
|
Packit |
f574b8 |
pT->decode_utf8 = FALSE;
|
|
Packit |
f574b8 |
pT->output_utf8 = FALSE;
|
|
Packit |
f574b8 |
pT->do_8bitraw = FALSE;
|
|
Packit |
f574b8 |
pT->use_raw_char_in = FALSE;
|
|
Packit |
f574b8 |
pT->strip_raw_char_in = FALSE;
|
|
Packit |
f574b8 |
pT->pass_160_173_raw = FALSE;
|
|
Packit |
f574b8 |
pT->trans_to_uni = FALSE;
|
|
Packit |
f574b8 |
pT->trans_C0_to_uni = FALSE;
|
|
Packit |
f574b8 |
pT->repl_translated_C0 = FALSE;
|
|
Packit |
f574b8 |
pT->trans_from_uni = FALSE;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* If terminal is in UTF-8 mode, it probably cannot understand box drawing
|
|
Packit |
f574b8 |
* chars as the 8-bit (n)curses handles them. (This may also be true for other
|
|
Packit |
f574b8 |
* display character sets, but isn't currently checked.) In that case set the
|
|
Packit |
f574b8 |
* chars for horizontal and vertical drawing chars to displayable ASCII chars
|
|
Packit |
f574b8 |
* if '0' was requested. They'll stay as they are otherwise. -KW, TD
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* If we're able to obtain a character set based on the locale settings,
|
|
Packit |
f574b8 |
* assume that the user has setup $TERM and the fonts already so line-drawing
|
|
Packit |
f574b8 |
* works.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
void UCSetBoxChars(int cset,
|
|
Packit |
f574b8 |
int *pvert_out,
|
|
Packit |
f574b8 |
int *phori_out,
|
|
Packit |
f574b8 |
int vert_in,
|
|
Packit |
f574b8 |
int hori_in)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
BOOL fix_lines = FALSE;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (cset >= 0) {
|
|
Packit |
f574b8 |
#ifndef WIDEC_CURSES
|
|
Packit |
f574b8 |
if (LYCharSet_UC[cset].enc == UCT_ENC_UTF8) {
|
|
Packit |
f574b8 |
fix_lines = TRUE;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* If we've identified a charset that works, require it.
|
|
Packit |
f574b8 |
* This is important if we have loaded a font, which would
|
|
Packit |
f574b8 |
* confuse curses.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
/* US-ASCII vs Latin-1 is safe (usually) */
|
|
Packit |
f574b8 |
if ((cset == US_ASCII
|
|
Packit |
f574b8 |
|| cset == LATIN1)
|
|
Packit |
f574b8 |
&& (linedrawing_char_set == US_ASCII
|
|
Packit |
f574b8 |
|| linedrawing_char_set == LATIN1)) {
|
|
Packit |
f574b8 |
#if (defined(FANCY_CURSES) && defined(A_ALTCHARSET)) || defined(USE_SLANG)
|
|
Packit |
f574b8 |
vert_in = 0;
|
|
Packit |
f574b8 |
hori_in = 0;
|
|
Packit |
f574b8 |
#else
|
|
Packit |
f574b8 |
;
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#ifdef EXP_CHARTRANS_AUTOSWITCH
|
|
Packit |
f574b8 |
#if defined(NCURSES_VERSION) || defined(HAVE_TIGETSTR)
|
|
Packit |
f574b8 |
else {
|
|
Packit |
f574b8 |
static BOOL first = TRUE;
|
|
Packit |
f574b8 |
static int last_cset = -99;
|
|
Packit |
f574b8 |
static BOOL last_result = TRUE;
|
|
Packit |
f574b8 |
/* *INDENT-OFF* */
|
|
Packit |
f574b8 |
static struct {
|
|
Packit |
f574b8 |
int mapping;
|
|
Packit |
f574b8 |
UCode_t internal;
|
|
Packit |
f574b8 |
int external;
|
|
Packit |
f574b8 |
} table[] = {
|
|
Packit |
f574b8 |
{ 'j', 0x2518, 0 }, /* BOX DRAWINGS LIGHT UP AND LEFT */
|
|
Packit |
f574b8 |
{ 'k', 0x2510, 0 }, /* BOX DRAWINGS LIGHT DOWN AND LEFT */
|
|
Packit |
f574b8 |
{ 'l', 0x250c, 0 }, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
|
|
Packit |
f574b8 |
{ 'm', 0x2514, 0 }, /* BOX DRAWINGS LIGHT UP AND RIGHT */
|
|
Packit |
f574b8 |
{ 'n', 0x253c, 0 }, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */
|
|
Packit |
f574b8 |
{ 'q', 0x2500, 0 }, /* BOX DRAWINGS LIGHT HORIZONTAL */
|
|
Packit |
f574b8 |
{ 't', 0x251c, 0 }, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
|
|
Packit |
f574b8 |
{ 'u', 0x2524, 0 }, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */
|
|
Packit |
f574b8 |
{ 'v', 0x2534, 0 }, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */
|
|
Packit |
f574b8 |
{ 'w', 0x252c, 0 }, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */
|
|
Packit |
f574b8 |
{ 'x', 0x2502, 0 }, /* BOX DRAWINGS LIGHT VERTICAL */
|
|
Packit |
f574b8 |
};
|
|
Packit |
f574b8 |
/* *INDENT-ON* */
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
unsigned n;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (first) {
|
|
Packit |
f574b8 |
static char acsc_name[] = "acsc";
|
|
Packit |
f574b8 |
char *map = tigetstr(acsc_name);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (map != 0) {
|
|
Packit |
f574b8 |
CTRACE((tfp, "build terminal line-drawing map\n"));
|
|
Packit |
f574b8 |
while (map[0] != 0 && map[1] != 0) {
|
|
Packit |
f574b8 |
for (n = 0; n < TABLESIZE(table); ++n) {
|
|
Packit |
f574b8 |
if (table[n].mapping == map[0]) {
|
|
Packit |
f574b8 |
table[n].external = UCH(map[1]);
|
|
Packit |
f574b8 |
CTRACE((tfp,
|
|
Packit |
f574b8 |
" map[%c] %#" PRI_UCode_t " -> %#x\n",
|
|
Packit |
f574b8 |
table[n].mapping,
|
|
Packit |
f574b8 |
table[n].internal,
|
|
Packit |
f574b8 |
table[n].external));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
map += 2;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
first = FALSE;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (cset == last_cset) {
|
|
Packit |
f574b8 |
fix_lines = last_result;
|
|
Packit |
f574b8 |
} else if (cset == UTF8_handle) {
|
|
Packit |
f574b8 |
last_result = FALSE;
|
|
Packit |
f574b8 |
last_cset = cset;
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
CTRACE((tfp, "check terminal line-drawing map\n"));
|
|
Packit |
f574b8 |
for (n = 0; n < TABLESIZE(table); ++n) {
|
|
Packit |
f574b8 |
int test = UCTransUniChar(table[n].internal, cset);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (test != table[n].external) {
|
|
Packit |
f574b8 |
CTRACE((tfp,
|
|
Packit |
f574b8 |
"line-drawing map %c mismatch (have %#x, want %#x)\n",
|
|
Packit |
f574b8 |
table[n].mapping,
|
|
Packit |
f574b8 |
test, table[n].external));
|
|
Packit |
f574b8 |
fix_lines = TRUE;
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
last_result = fix_lines;
|
|
Packit |
f574b8 |
last_cset = cset;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#else
|
|
Packit |
f574b8 |
else if (cset != linedrawing_char_set && linedrawing_char_set >= 0) {
|
|
Packit |
f574b8 |
fix_lines = TRUE;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (fix_lines) {
|
|
Packit |
f574b8 |
if (!vert_in)
|
|
Packit |
f574b8 |
vert_in = '|';
|
|
Packit |
f574b8 |
if (!hori_in)
|
|
Packit |
f574b8 |
hori_in = '-';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*pvert_out = vert_in;
|
|
Packit |
f574b8 |
*phori_out = hori_in;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Given an output target HTStream* (can also be a HTStructured* via
|
|
Packit |
f574b8 |
* typecast), the target stream's put_character method, and a Unicode
|
|
Packit |
f574b8 |
* character, CPutUtf8_charstring() will either output the UTF8
|
|
Packit |
f574b8 |
* encoding of the Unicode and return YES, or do nothing and return
|
|
Packit |
f574b8 |
* NO (if conversion would be unnecessary or the Unicode character is
|
|
Packit |
f574b8 |
* considered invalid).
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* [Could be used more generally, but is currently only used for &#nnnnn
|
|
Packit |
f574b8 |
* stuff - generation of UTF8 from 8-bit encoded charsets not yet done
|
|
Packit |
f574b8 |
* by SGML.c etc.]
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
#define PUTC(ch) ((*myPutc)(target, (char)(ch)))
|
|
Packit |
f574b8 |
#define PUTC2(ch) ((*myPutc)(target,(char)(0x80|(0x3f &(ch)))))
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
BOOL UCPutUtf8_charstring(HTStream *target, putc_func_t *myPutc, UCode_t code)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
if (code < 128)
|
|
Packit |
f574b8 |
return NO; /* indicate to caller we didn't handle it */
|
|
Packit |
f574b8 |
else if (code < 0x800L) {
|
|
Packit |
f574b8 |
PUTC(0xc0 | (code >> 6));
|
|
Packit |
f574b8 |
PUTC2(code);
|
|
Packit |
f574b8 |
} else if (code < 0x10000L) {
|
|
Packit |
f574b8 |
PUTC(0xe0 | (code >> 12));
|
|
Packit |
f574b8 |
PUTC2(code >> 6);
|
|
Packit |
f574b8 |
PUTC2(code);
|
|
Packit |
f574b8 |
} else if (code < 0x200000L) {
|
|
Packit |
f574b8 |
PUTC(0xf0 | (code >> 18));
|
|
Packit |
f574b8 |
PUTC2(code >> 12);
|
|
Packit |
f574b8 |
PUTC2(code >> 6);
|
|
Packit |
f574b8 |
PUTC2(code);
|
|
Packit |
f574b8 |
} else if (code < 0x4000000L) {
|
|
Packit |
f574b8 |
PUTC(0xf8 | (code >> 24));
|
|
Packit |
f574b8 |
PUTC2(code >> 18);
|
|
Packit |
f574b8 |
PUTC2(code >> 12);
|
|
Packit |
f574b8 |
PUTC2(code >> 6);
|
|
Packit |
f574b8 |
PUTC2(code);
|
|
Packit |
f574b8 |
} else if (code <= 0x7fffffffL) {
|
|
Packit |
f574b8 |
PUTC(0xfc | (code >> 30));
|
|
Packit |
f574b8 |
PUTC2(code >> 24);
|
|
Packit |
f574b8 |
PUTC2(code >> 18);
|
|
Packit |
f574b8 |
PUTC2(code >> 12);
|
|
Packit |
f574b8 |
PUTC2(code >> 6);
|
|
Packit |
f574b8 |
PUTC2(code);
|
|
Packit |
f574b8 |
} else
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* This function converts a Unicode (UCode_t) value
|
|
Packit |
f574b8 |
* to a multibyte UTF-8 character, which is loaded
|
|
Packit |
f574b8 |
* into the buffer received as an argument. The
|
|
Packit |
f574b8 |
* buffer should be large enough to hold at least
|
|
Packit |
f574b8 |
* seven characters (but should be declared as 8
|
|
Packit |
f574b8 |
* to minimize byte alignment problems with some
|
|
Packit |
f574b8 |
* compilers). - FM
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
BOOL UCConvertUniToUtf8(UCode_t code, char *buffer)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
char *ch = buffer;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!ch)
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (code <= 0 || code > 0x7fffffffL) {
|
|
Packit |
f574b8 |
*ch = '\0';
|
|
Packit |
f574b8 |
return NO;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (code < 0x800L) {
|
|
Packit |
f574b8 |
*ch++ = (char) (0xc0 | (code >> 6));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code)));
|
|
Packit |
f574b8 |
*ch = '\0';
|
|
Packit |
f574b8 |
} else if (code < 0x10000L) {
|
|
Packit |
f574b8 |
*ch++ = (char) (0xe0 | (code >> 12));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 6)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code)));
|
|
Packit |
f574b8 |
*ch = '\0';
|
|
Packit |
f574b8 |
} else if (code < 0x200000L) {
|
|
Packit |
f574b8 |
*ch++ = (char) (0xf0 | (code >> 18));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 12)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 6)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code)));
|
|
Packit |
f574b8 |
*ch = '\0';
|
|
Packit |
f574b8 |
} else if (code < 0x4000000L) {
|
|
Packit |
f574b8 |
*ch++ = (char) (0xf8 | (code >> 24));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 18)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 12)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 6)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code)));
|
|
Packit |
f574b8 |
*ch = '\0';
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
*ch++ = (char) (0xfc | (code >> 30));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 24)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 18)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 12)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code >> 6)));
|
|
Packit |
f574b8 |
*ch++ = (char) (0x80 | (0x3f & (code)));
|
|
Packit |
f574b8 |
*ch = '\0';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
return YES;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Get UCS character code for one character from UTF-8 encoded string.
|
|
Packit |
f574b8 |
*
|
|
Packit |
f574b8 |
* On entry:
|
|
Packit |
f574b8 |
* *ppuni should point to beginning of UTF-8 encoding character
|
|
Packit |
f574b8 |
* On exit:
|
|
Packit |
f574b8 |
* *ppuni is advanced to point to the last byte of UTF-8 sequence,
|
|
Packit |
f574b8 |
* if there was a valid one; otherwise unchanged.
|
|
Packit |
f574b8 |
* returns the UCS value
|
|
Packit |
f574b8 |
* returns negative value on error (invalid UTF-8 sequence)
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
UCode_t UCGetUniFromUtf8String(const char **ppuni)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
UCode_t uc_out = 0;
|
|
Packit |
f574b8 |
const char *p = *ppuni;
|
|
Packit |
f574b8 |
int utf_count, i;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (!(**ppuni & 0x80))
|
|
Packit |
f574b8 |
return (UCode_t) **ppuni; /* ASCII range character */
|
|
Packit |
f574b8 |
else if (!(**ppuni & 0x40))
|
|
Packit |
f574b8 |
return (-1); /* not a valid UTF-8 start */
|
|
Packit |
f574b8 |
if ((*p & 0xe0) == 0xc0) {
|
|
Packit |
f574b8 |
utf_count = 1;
|
|
Packit |
f574b8 |
} else if ((*p & 0xf0) == 0xe0) {
|
|
Packit |
f574b8 |
utf_count = 2;
|
|
Packit |
f574b8 |
} else if ((*p & 0xf8) == 0xf0) {
|
|
Packit |
f574b8 |
utf_count = 3;
|
|
Packit |
f574b8 |
} else if ((*p & 0xfc) == 0xf8) {
|
|
Packit |
f574b8 |
utf_count = 4;
|
|
Packit |
f574b8 |
} else if ((*p & 0xfe) == 0xfc) {
|
|
Packit |
f574b8 |
utf_count = 5;
|
|
Packit |
f574b8 |
} else { /* garbage */
|
|
Packit |
f574b8 |
return (-1);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
for (p = *ppuni, i = 0; i < utf_count; i++) {
|
|
Packit |
f574b8 |
if ((*(++p) & 0xc0) != 0x80)
|
|
Packit |
f574b8 |
return (-1);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
p = *ppuni;
|
|
Packit |
f574b8 |
switch (utf_count) {
|
|
Packit |
f574b8 |
case 1:
|
|
Packit |
f574b8 |
uc_out = (((*p & 0x1f) << 6) |
|
|
Packit |
f574b8 |
(*(p + 1) & 0x3f));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 2:
|
|
Packit |
f574b8 |
uc_out = (((((*p & 0x0f) << 6) |
|
|
Packit |
f574b8 |
(*(p + 1) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 2) & 0x3f));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 3:
|
|
Packit |
f574b8 |
uc_out = (((((((*p & 0x07) << 6) |
|
|
Packit |
f574b8 |
(*(p + 1) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 2) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 3) & 0x3f));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 4:
|
|
Packit |
f574b8 |
uc_out = (((((((((*p & 0x03) << 6) |
|
|
Packit |
f574b8 |
(*(p + 1) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 2) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 3) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 4) & 0x3f));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 5:
|
|
Packit |
f574b8 |
uc_out = (((((((((((*p & 0x01) << 6) |
|
|
Packit |
f574b8 |
(*(p + 1) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 2) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 3) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 4) & 0x3f)) << 6) |
|
|
Packit |
f574b8 |
(*(p + 5) & 0x3f));
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
*ppuni = p + utf_count;
|
|
Packit |
f574b8 |
return uc_out;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Combine UTF-8 into Unicode. Incomplete characters are either ignored, or
|
|
Packit |
f574b8 |
* returned as the UCS replacement character.
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
dUTF8 HTDecodeUTF8(UTFDecodeState * me, int *c_in_out, UCode_t *result)
|
|
Packit |
f574b8 |
{
|
|
Packit |
f574b8 |
dUTF8 rc = dUTF8_ok;
|
|
Packit |
f574b8 |
int c = *c_in_out;
|
|
Packit |
f574b8 |
unsigned uc = UCH(c);
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (TOASCII(uc) > 127) {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* continue a multibyte character...
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
if (me->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) {
|
|
Packit |
f574b8 |
if (me->utf_count <= 0) {
|
|
Packit |
f574b8 |
me->utf_char = UCS_REPL;
|
|
Packit |
f574b8 |
} else if (me->utf_count == 1) {
|
|
Packit |
f574b8 |
int limit = (int) (me->utf_buf_p - me->utf_buf) + 1;
|
|
Packit |
f574b8 |
int maybe = 0;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* Check for overlong sequences (from comment in xterm):
|
|
Packit |
f574b8 |
* 1100000x 10xxxxxx
|
|
Packit |
f574b8 |
* 11100000 100xxxxx 10xxxxxx
|
|
Packit |
f574b8 |
* 11110000 1000xxxx 10xxxxxx 10xxxxxx
|
|
Packit |
f574b8 |
* 11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
Packit |
f574b8 |
* 11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
switch (limit) {
|
|
Packit |
f574b8 |
case 2:
|
|
Packit |
f574b8 |
maybe = (UCH(me->utf_buf[0]) & 0xfe) == 0xc0;
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 3:
|
|
Packit |
f574b8 |
maybe = ((UCH(me->utf_buf[0]) == 0xe0) &&
|
|
Packit |
f574b8 |
(UCH(me->utf_buf[1]) & 0xf0) == 0x80);
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 4:
|
|
Packit |
f574b8 |
maybe = ((UCH(me->utf_buf[0]) == 0xf0) &&
|
|
Packit |
f574b8 |
(UCH(me->utf_buf[1]) & 0xf8) == 0x80);
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
case 5:
|
|
Packit |
f574b8 |
maybe = ((UCH(me->utf_buf[0]) == 0xf8) &&
|
|
Packit |
f574b8 |
(UCH(me->utf_buf[1]) & 0xfd) == 0x80);
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (maybe) {
|
|
Packit |
f574b8 |
while (limit-- > 2) {
|
|
Packit |
f574b8 |
if ((UCH(me->utf_buf[limit]) & 0xc0) != 0x80) {
|
|
Packit |
f574b8 |
maybe = 0;
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (maybe) {
|
|
Packit |
f574b8 |
me->utf_char = UCS_REPL;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
if (me->utf_char == UCS_REPL) {
|
|
Packit |
f574b8 |
rc = dUTF8_err;
|
|
Packit |
f574b8 |
} else if (me->utf_char || ((uc & 0x7f) >> (7 - me->utf_count))) {
|
|
Packit |
f574b8 |
me->utf_char = (me->utf_char << 6) | (TOASCII(c) & 0x3f);
|
|
Packit |
f574b8 |
if ((me->utf_char >= 0xd800 &&
|
|
Packit |
f574b8 |
me->utf_char <= 0xdfff) ||
|
|
Packit |
f574b8 |
(me->utf_char == 0xfffe) ||
|
|
Packit |
f574b8 |
(me->utf_char == UCS_HIDE)) {
|
|
Packit |
f574b8 |
me->utf_char = UCS_REPL;
|
|
Packit |
f574b8 |
rc = dUTF8_err;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
me->utf_char = UCS_REPL;
|
|
Packit |
f574b8 |
rc = dUTF8_err;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
me->utf_count--;
|
|
Packit |
f574b8 |
*(me->utf_buf_p) = (char) c;
|
|
Packit |
f574b8 |
(me->utf_buf_p)++;
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
if (me->utf_count == 0) {
|
|
Packit |
f574b8 |
*(me->utf_buf_p) = '\0';
|
|
Packit |
f574b8 |
*result = me->utf_char;
|
|
Packit |
f574b8 |
if (*result < 256) {
|
|
Packit |
f574b8 |
*c_in_out = UCH(*result & 0xff);
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
switch (*result) {
|
|
Packit |
f574b8 |
case 0x200e: /* left-to-right mark */
|
|
Packit |
f574b8 |
case 0x200f: /* right-to-left mark */
|
|
Packit |
f574b8 |
/* lynx does not use these */
|
|
Packit |
f574b8 |
*result = '\0';
|
|
Packit |
f574b8 |
break;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
rc = dUTF8_more;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
/*
|
|
Packit |
f574b8 |
* begin a multibyte character
|
|
Packit |
f574b8 |
*/
|
|
Packit |
f574b8 |
rc = dUTF8_more;
|
|
Packit |
f574b8 |
me->utf_buf_p = me->utf_buf;
|
|
Packit |
f574b8 |
*(me->utf_buf_p) = (char) c;
|
|
Packit |
f574b8 |
(me->utf_buf_p)++;
|
|
Packit |
f574b8 |
if ((uc & 0xe0) == 0xc0) {
|
|
Packit |
f574b8 |
me->utf_count = 1;
|
|
Packit |
f574b8 |
me->utf_char = (uc & 0x1f);
|
|
Packit |
f574b8 |
} else if ((uc & 0xf0) == 0xe0) {
|
|
Packit |
f574b8 |
me->utf_count = 2;
|
|
Packit |
f574b8 |
me->utf_char = (uc & 0x0f);
|
|
Packit |
f574b8 |
} else if ((uc & 0xf8) == 0xf0) {
|
|
Packit |
f574b8 |
me->utf_count = 3;
|
|
Packit |
f574b8 |
me->utf_char = (uc & 0x07);
|
|
Packit |
f574b8 |
} else if ((uc & 0xfc) == 0xf8) {
|
|
Packit |
f574b8 |
me->utf_count = 4;
|
|
Packit |
f574b8 |
me->utf_char = (uc & 0x03);
|
|
Packit |
f574b8 |
} else if ((uc & 0xfe) == 0xfc) {
|
|
Packit |
f574b8 |
me->utf_count = 5;
|
|
Packit |
f574b8 |
me->utf_char = (uc & 0x01);
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
me->utf_count = 0;
|
|
Packit |
f574b8 |
me->utf_buf_p = me->utf_buf;
|
|
Packit |
f574b8 |
*(me->utf_buf_p) = '\0';
|
|
Packit |
f574b8 |
rc = dUTF8_err;
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
me->utf_count = 0;
|
|
Packit |
f574b8 |
me->utf_buf_p = me->utf_buf;
|
|
Packit |
f574b8 |
*(me->utf_buf_p) = '\0';
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
|
|
Packit |
f574b8 |
#if 0
|
|
Packit |
f574b8 |
if (rc != dUTF8_ok) {
|
|
Packit |
f574b8 |
CTRACE((tfp, "UTF8 %#x ->%#x %s\n",
|
|
Packit |
f574b8 |
uc, UCH(*c_in_out),
|
|
Packit |
f574b8 |
(rc == dUTF8_err) ? "err" : "more"));
|
|
Packit |
f574b8 |
} else {
|
|
Packit |
f574b8 |
if (*result > 127) {
|
|
Packit |
f574b8 |
CTRACE((tfp, "UTF8 %#x == %#x\n", uc, (int) *result));
|
|
Packit |
f574b8 |
} else if (c != UCS_REPL && !isspace(c)) {
|
|
Packit |
f574b8 |
CTRACE((tfp, "CHAR %#x == %c (%#x)\n", uc, uc, (int) *result));
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
}
|
|
Packit |
f574b8 |
#endif
|
|
Packit |
f574b8 |
return rc;
|
|
Packit |
f574b8 |
}
|