Blob Blame History Raw
/*****************************************************************************

NAME:
   charset.c -- provide charset support for bogofilter's lexer.

Note:

   Character translation is done to make life easier for the lexer.
   Text is changed only after the message has been saved for
   passthrough.  The end user (mail reader) never sees any changes -
   only the lexer.

AUTHOR:
   David Relson <relson@osagesoftware.com>

******************************************************************************/

#include "common.h"

#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#include "charset.h"
#include "convert_charset.h"
#ifndef	DISABLE_UNICODE
#include "convert_unicode.h"
#endif
#include "xmalloc.h"
#include "xstrdup.h"

#define	SP	' '

#ifndef	DEFAULT_CHARSET
#define	DEFAULT_CHARSET	"us-ascii"
#endif

byte charset_table[256];
byte casefold_table[256];

#define	DEBUG
#undef	DEBUG

#ifndef	DEBUG
#define	PRINT_CHARSET_TABLE
#else
#define	PRINT_CHARSET_TABLE	print_charset_table()
#undef	DEBUG_GENERAL
#define	DEBUG_GENERAL(level)	(verbose >= level)
#endif

#ifdef	DEBUG
static void print_charset_table(void)
{
    int c,r,i;
    char ch;
    if (!DEBUG_GENERAL(1))
	return;
    printf( "\n" );
    for (r=0; r<4; r+=1) {
	for (c=0; c<64; c+=1) {
	    i=r*64+c;
	    ch=charset_table[i];
	    if (ch != 0x08 && ch != 0x09 && ch != '\n' && ch != '\r')
		printf(" %02X.%2c.%02X", i, ch, ch);
	    else
		printf(" %02X.%02X.%02X", i, ch, ch);
	    if ((c & 15) == 15)
		printf( "\n" );
	}
    }
    printf( "\n" );
}
#endif

void init_charset_table(const char *charset_name)
{
#ifdef	DISABLE_UNICODE
    init_charset_table_orig(charset_name);
#else
#ifdef	ENABLE_UNICODE
    init_charset_table_iconv(charset_name, charset_unicode);
#else
    if (encoding != E_UNICODE)
	init_charset_table_orig(charset_name);
    else
	init_charset_table_iconv(charset_name, charset_unicode);
#endif
#endif
    return;
}

/* like set_charset() but charset is in form blabla="CharsetName" */
void got_charset(const char *charset)
{
    set_charset(strchr(charset, '=') + 1);
}

/* like got_charset() but charset is pure charset name */
void set_charset(const char *charset)
{
    bool q = (charset[0] == '"');
    char *s, *d;
    char *t = xstrdup(charset + q);

    for (s = d = t; *s != '\0'; s++)
    {
	char c = tolower((unsigned char)*s);	/* map upper case to lower */
	if (c == '_')		/* map underscore to dash */
	    c = '-';
	if (c == '-' &&		/* map "iso-" to "iso"     */
	    d - t == 3 &&	/* ensure 3 chars avail    */
	    memcmp(t, "iso", 3) == 0)
	    continue;
	if (q && c == '"')
	    break;
	*d++ = c;
    }
    *d = '\0';
    if (DEBUG_CONFIG(0))
       fprintf(dbgout, "got_charset( '%s' )\n", t);
    init_charset_table( t );
    xfree(t);
}