|
Packit |
e4b6da |
/* vim: sta et sw=4
|
|
Packit |
e4b6da |
*/
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/*
|
|
Packit |
e4b6da |
* $Id: utf8trans.c,v 1.12 2006/04/13 01:00:01 stevecheng Exp $
|
|
Packit |
e4b6da |
*
|
|
Packit |
e4b6da |
* (C) 2001 Steve Cheng <stevecheng@users.sourceforge.net>
|
|
Packit |
e4b6da |
*
|
|
Packit |
e4b6da |
* See ../COPYING for the copyright status of this software.
|
|
Packit |
e4b6da |
*
|
|
Packit |
e4b6da |
*/
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_CONFIG_H
|
|
Packit |
e4b6da |
#include "config.h"
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#define _GNU_SOURCE /* For getline */
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#include <stdio.h>
|
|
Packit |
e4b6da |
#include <stdlib.h>
|
|
Packit |
e4b6da |
#include <string.h>
|
|
Packit |
e4b6da |
#include <errno.h>
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_UNISTD_H
|
|
Packit |
e4b6da |
#include <unistd.h>
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#include "mtable.h"
|
|
Packit |
e4b6da |
#include "strings_buffer.h"
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* UCS-4 character */
|
|
Packit |
e4b6da |
typedef unsigned int CHAR;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_GETOPT_H
|
|
Packit |
e4b6da |
#include <getopt.h>
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_GETOPT_LONG
|
|
Packit |
e4b6da |
/* Long-option specification */
|
|
Packit |
e4b6da |
struct option long_options[] =
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
{ "version", 0, 0, 'v' },
|
|
Packit |
e4b6da |
{ "help", 0, 0, 'h' },
|
|
Packit |
e4b6da |
{ "modify", 0, 0, 'm' },
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
{ 0, 0, 0, 0 }
|
|
Packit |
e4b6da |
};
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
const char *prog_name;
|
|
Packit |
e4b6da |
const char *charmap_filename = NULL;
|
|
Packit |
e4b6da |
mtable_t charmap_table;
|
|
Packit |
e4b6da |
static int charmap_table_exponents[] = { 8, 8, 8, 8, 0 };
|
|
Packit |
e4b6da |
strings_buffer_t charmap_strings;
|
|
Packit |
e4b6da |
int modify_in_place = 0;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
int do_options(int argc, char *argv[]);
|
|
Packit |
e4b6da |
void print_version(void);
|
|
Packit |
e4b6da |
void print_usage();
|
|
Packit |
e4b6da |
void add_translation(CHAR codepoint, char *translation);
|
|
Packit |
e4b6da |
void parse_charmap(FILE *stream);
|
|
Packit |
e4b6da |
char *encode_utf8(CHAR codepoint);
|
|
Packit |
e4b6da |
void translate(FILE *in, FILE *out);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifndef HAVE_GETLINE
|
|
Packit |
e4b6da |
ssize_t getline(char **lineptr, size_t *n, FILE *stream);
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
int
|
|
Packit |
e4b6da |
main(int argc, char *argv[])
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
FILE *charmap_f;
|
|
Packit |
e4b6da |
int optind;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
prog_name = argv[0];
|
|
Packit |
e4b6da |
optind = do_options(argc, argv);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
charmap_table = mtable_new(charmap_table_exponents);
|
|
Packit |
e4b6da |
charmap_strings = strings_buffer_new(4096);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Read translation spec */
|
|
Packit |
e4b6da |
charmap_filename = argv[optind];
|
|
Packit |
e4b6da |
charmap_f = fopen(charmap_filename, "r");
|
|
Packit |
e4b6da |
if(!charmap_f) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s: %s\n",
|
|
Packit |
e4b6da |
prog_name,
|
|
Packit |
e4b6da |
charmap_filename,
|
|
Packit |
e4b6da |
strerror(errno));
|
|
Packit |
e4b6da |
exit(1);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
parse_charmap(charmap_f);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
fclose(charmap_f);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
optind++;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(!argv[optind]) {
|
|
Packit |
e4b6da |
translate(stdin, stdout);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
else {
|
|
Packit |
e4b6da |
int i;
|
|
Packit |
e4b6da |
FILE *f, *out;
|
|
Packit |
e4b6da |
for(i = optind; argv[i]; i++) {
|
|
Packit |
e4b6da |
out = stdout;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
f = fopen(argv[i], "r");
|
|
Packit |
e4b6da |
if(!f) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s: %s\n",
|
|
Packit |
e4b6da |
prog_name,
|
|
Packit |
e4b6da |
argv[i],
|
|
Packit |
e4b6da |
strerror(errno));
|
|
Packit |
e4b6da |
exit(1);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_UNISTD_H
|
|
Packit |
e4b6da |
if(modify_in_place) {
|
|
Packit |
e4b6da |
if(unlink(argv[i]) < 0) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s: %s\n",
|
|
Packit |
e4b6da |
prog_name,
|
|
Packit |
e4b6da |
argv[i],
|
|
Packit |
e4b6da |
strerror(errno));
|
|
Packit |
e4b6da |
fclose(f);
|
|
Packit |
e4b6da |
continue;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
out = fopen(argv[i], "w");
|
|
Packit |
e4b6da |
if(!out) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s: %s\n",
|
|
Packit |
e4b6da |
prog_name,
|
|
Packit |
e4b6da |
argv[i],
|
|
Packit |
e4b6da |
strerror(errno));
|
|
Packit |
e4b6da |
fclose(f);
|
|
Packit |
e4b6da |
continue;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
translate(f, out);
|
|
Packit |
e4b6da |
fclose(f);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(modify_in_place)
|
|
Packit |
e4b6da |
fclose(out);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
mtable_delete(charmap_table);
|
|
Packit |
e4b6da |
strings_buffer_delete(charmap_strings);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return 0;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
void
|
|
Packit |
e4b6da |
print_version(void)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
puts("utf8trans (part of docbook2X"
|
|
Packit |
e4b6da |
#ifdef HAVE_CONFIG_H
|
|
Packit |
e4b6da |
VERSION
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
")");
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
puts("$Revision: 1.12 $ $Date: 2006/04/13 01:00:01 $");
|
|
Packit |
e4b6da |
puts("<URL:http://docbook2x.sourceforge.net/>\n");
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
puts("Copyright (C) 2000-2004 Steve Cheng\n"
|
|
Packit |
e4b6da |
"This is free software; see the source for copying conditions.\n"
|
|
Packit |
e4b6da |
"There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR\n"
|
|
Packit |
e4b6da |
"A PARTICULAR PURPOSE.");
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
void
|
|
Packit |
e4b6da |
print_usage()
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
printf("Usage: %s [options] CHARMAP [FILES...]\n", prog_name);
|
|
Packit |
e4b6da |
puts("Transliterate UTF-8 characters according to a table.\n");
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_UNISTD_H
|
|
Packit |
e4b6da |
#ifdef HAVE_GETOPT_LONG
|
|
Packit |
e4b6da |
puts(" -m, --modify modify given files in-place\n"
|
|
Packit |
e4b6da |
" -v, --version display version information and exit\n"
|
|
Packit |
e4b6da |
" -h, --help display this usage information\n");
|
|
Packit |
e4b6da |
#else
|
|
Packit |
e4b6da |
puts(" -m modify given files in-place\n"
|
|
Packit |
e4b6da |
" -v display version information and exit\n"
|
|
Packit |
e4b6da |
" -h display this usage information\n");
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
puts("See utf8trans(1) for details on this program.\n");
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
int
|
|
Packit |
e4b6da |
do_options(int argc, char *argv[])
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
#ifdef HAVE_UNISTD_H /* On a Unix, so have some version of getopt */
|
|
Packit |
e4b6da |
int optc;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#ifdef HAVE_GETOPT_LONG
|
|
Packit |
e4b6da |
while((optc = getopt_long(argc, argv, "vhm",
|
|
Packit |
e4b6da |
long_options, NULL)) != -1)
|
|
Packit |
e4b6da |
#else
|
|
Packit |
e4b6da |
while((optc = getopt(argc, argv, "vhm")) != -1)
|
|
Packit |
e4b6da |
#endif /* HAVE_GETOPT_LONG */
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
switch(optc) {
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* --version */
|
|
Packit |
e4b6da |
case 'v':
|
|
Packit |
e4b6da |
print_version();
|
|
Packit |
e4b6da |
exit(0);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* --help */
|
|
Packit |
e4b6da |
case 'h':
|
|
Packit |
e4b6da |
print_usage();
|
|
Packit |
e4b6da |
exit(0);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* --modify */
|
|
Packit |
e4b6da |
case 'm':
|
|
Packit |
e4b6da |
modify_in_place = 1;
|
|
Packit |
e4b6da |
break;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
case '?':
|
|
Packit |
e4b6da |
default:
|
|
Packit |
e4b6da |
exit(1);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(optind > argc-1) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s: must specify charmap\n", prog_name);
|
|
Packit |
e4b6da |
exit(1);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return optind;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#else /* No getopt, so don't process any options.
|
|
Packit |
e4b6da |
They are all trivial, so that justifies ignoring them. */
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(argc < 2) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s: must specify charmap\n", prog_name);
|
|
Packit |
e4b6da |
exit(1);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return 1;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#endif
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
void
|
|
Packit |
e4b6da |
add_translation(CHAR codepoint, char *translation)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
char *s = strings_buffer_add(&charmap_strings, translation);
|
|
Packit |
e4b6da |
mtable_set(charmap_table, codepoint, s);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
char *
|
|
Packit |
e4b6da |
get_translation(CHAR codepoint)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
char *translation = mtable_get(charmap_table, codepoint);
|
|
Packit |
e4b6da |
if(translation != NULL)
|
|
Packit |
e4b6da |
return translation;
|
|
Packit |
e4b6da |
else
|
|
Packit |
e4b6da |
return encode_utf8(codepoint);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
char *
|
|
Packit |
e4b6da |
encode_utf8(CHAR c)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
static char buf[7];
|
|
Packit |
e4b6da |
char *p = buf;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(c < 0x80) {
|
|
Packit |
e4b6da |
*p++ = c;
|
|
Packit |
e4b6da |
*p++ = '\0';
|
|
Packit |
e4b6da |
} else if(c < 0x800) {
|
|
Packit |
e4b6da |
*p++ = 0xC0 | (c>>6);
|
|
Packit |
e4b6da |
*p++ = 0x80 | (c & 0x3F);
|
|
Packit |
e4b6da |
*p++ = '\0';
|
|
Packit |
e4b6da |
} else if(c < 0x10000) {
|
|
Packit |
e4b6da |
*p++ = 0xE0 | (c>>12);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>6) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | (c & 0x3F);
|
|
Packit |
e4b6da |
*p++ = '\0';
|
|
Packit |
e4b6da |
} else if (c < 0x200000) {
|
|
Packit |
e4b6da |
*p++ = 0xF0 | (c>>18);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>12) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>6) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | (c & 0x3F);
|
|
Packit |
e4b6da |
*p++ = '\0';
|
|
Packit |
e4b6da |
} else if (c < 0x4000000) {
|
|
Packit |
e4b6da |
*p++ = 0xF8 | (c>>24);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>18) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>12) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>6) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | (c & 0x3F);
|
|
Packit |
e4b6da |
*p++ = '\0';
|
|
Packit |
e4b6da |
} else if (c < 0x80000000) {
|
|
Packit |
e4b6da |
*p++ = 0xFC | (c>>30);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>24) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>18) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>12) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | ((c>>6) & 0x3F);
|
|
Packit |
e4b6da |
*p++ = 0x80 | (c & 0x3F);
|
|
Packit |
e4b6da |
*p++ = '\0';
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
/* Oops */
|
|
Packit |
e4b6da |
abort();
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return buf;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* 0 to 9, a to f, A to F */
|
|
Packit |
e4b6da |
#define IS_HEXDIGIT(c) (((c) >= 48 && (c) <= 57) || \
|
|
Packit |
e4b6da |
((c) >= 97 && (c) <= 102) || \
|
|
Packit |
e4b6da |
((c) >= 65 && (c) <= 70))
|
|
Packit |
e4b6da |
#define IS_SPACE(c) ((c) == ' ' || (c) == '\t')
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
void parse_charmap(FILE *stream)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
char *buf = NULL;
|
|
Packit |
e4b6da |
size_t bufsize = 0;
|
|
Packit |
e4b6da |
char *p, *c, *t;
|
|
Packit |
e4b6da |
int linecount = 0;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
CHAR codepoint;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
while(!feof(stream)) {
|
|
Packit |
e4b6da |
linecount++;
|
|
Packit |
e4b6da |
if(getline(&buf, &bufsize, stream) == -1) {
|
|
Packit |
e4b6da |
if(!feof(stream)) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s: %s",
|
|
Packit |
e4b6da |
prog_name, charmap_filename,
|
|
Packit |
e4b6da |
strerror(errno));
|
|
Packit |
e4b6da |
exit(2);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
goto nextline;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Chomp newline */
|
|
Packit |
e4b6da |
p = buf + (strlen(buf)-1);
|
|
Packit |
e4b6da |
if(*p == '\n') *p = '\0';
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Skip to codepoint */
|
|
Packit |
e4b6da |
for(c = buf; *c && IS_SPACE(*c); c++);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Skip empty lines and comment lines */
|
|
Packit |
e4b6da |
if(*c == '\0' || *c == '#')
|
|
Packit |
e4b6da |
goto nextline;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
t = NULL;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Parse the codepoint (a number in hex) */
|
|
Packit |
e4b6da |
for(p = c; *p; p++) {
|
|
Packit |
e4b6da |
if(!IS_HEXDIGIT(*p)) {
|
|
Packit |
e4b6da |
if(!IS_SPACE(*p)) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s:%d: %s",
|
|
Packit |
e4b6da |
prog_name, charmap_filename, linecount,
|
|
Packit |
e4b6da |
"(parsing codepoint) invalid hex number\n");
|
|
Packit |
e4b6da |
goto nextline;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
*p = '\0';
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(sscanf(c, "%x", &codepoint) != 1) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s:%d: %s",
|
|
Packit |
e4b6da |
prog_name, charmap_filename, linecount,
|
|
Packit |
e4b6da |
"(parsing codepoint) invalid hex number\n");
|
|
Packit |
e4b6da |
goto nextline;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
t = ++p;
|
|
Packit |
e4b6da |
break;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(t) {
|
|
Packit |
e4b6da |
add_translation(codepoint, t);
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
/* No translation text */
|
|
Packit |
e4b6da |
if(sscanf(c, "%x", &codepoint) != 1) {
|
|
Packit |
e4b6da |
fprintf(stderr, "%s:%s:%d: %s",
|
|
Packit |
e4b6da |
prog_name, charmap_filename, linecount,
|
|
Packit |
e4b6da |
"(parsing codepoint) invalid hex number\n");
|
|
Packit |
e4b6da |
goto nextline;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
add_translation(codepoint, "");
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
nextline: ;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(buf)
|
|
Packit |
e4b6da |
free(buf);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
CHAR
|
|
Packit |
e4b6da |
read_utf8_char(FILE *stream)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
CHAR character;
|
|
Packit |
e4b6da |
int b, n, i;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
b = fgetc(stream);
|
|
Packit |
e4b6da |
if(b == EOF)
|
|
Packit |
e4b6da |
return 0xFFFFFFFF;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* UTF-8 sequence leading byte */
|
|
Packit |
e4b6da |
if((b & 0xC0) == 0xC0) {
|
|
Packit |
e4b6da |
/* Count bytes and eat lead bits */
|
|
Packit |
e4b6da |
for(n = 0; b & 0x80; b<<=1, n++);
|
|
Packit |
e4b6da |
b = (b & 0xFF) >> n;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(n > 6 || n < 2) return 0xFFFD;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
switch(n) {
|
|
Packit |
e4b6da |
case 6: b <<= 6;
|
|
Packit |
e4b6da |
case 5: b <<= 6;
|
|
Packit |
e4b6da |
case 4: b <<= 6;
|
|
Packit |
e4b6da |
case 3: b <<= 6;
|
|
Packit |
e4b6da |
case 2: b <<= 6;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
character = b;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
for(i = n; i>1; i--) {
|
|
Packit |
e4b6da |
b = fgetc(stream);
|
|
Packit |
e4b6da |
if(b == EOF) return 0xFFFD;
|
|
Packit |
e4b6da |
if((b & 0xC0) != 0x80) return 0xFFFD;
|
|
Packit |
e4b6da |
b &= 0x3F;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
switch(i) {
|
|
Packit |
e4b6da |
case 6: b <<= 6;
|
|
Packit |
e4b6da |
case 5: b <<= 6;
|
|
Packit |
e4b6da |
case 4: b <<= 6;
|
|
Packit |
e4b6da |
case 3: b <<= 6;
|
|
Packit |
e4b6da |
case 2: ;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
character |= b;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Check for overlong sequences */
|
|
Packit |
e4b6da |
switch(n) {
|
|
Packit |
e4b6da |
case 6: if(character < 0x4000000) return 0xFFFD;
|
|
Packit |
e4b6da |
case 5: if(character < 0x200000) return 0xFFFD;
|
|
Packit |
e4b6da |
case 4: if(character < 0x10000) return 0xFFFD;
|
|
Packit |
e4b6da |
case 3: if(character < 0x800) return 0xFFFD;
|
|
Packit |
e4b6da |
case 2: if(character < 0x80) return 0xFFFD;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return character;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* UTF-8 sequence continuation byte */
|
|
Packit |
e4b6da |
else if((b & 0xC0) == 0x80) {
|
|
Packit |
e4b6da |
return 0xFFFD;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* ASCII character */
|
|
Packit |
e4b6da |
else {
|
|
Packit |
e4b6da |
return (CHAR)b;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
void
|
|
Packit |
e4b6da |
translate(FILE *in, FILE *out)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
CHAR character;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
while(!feof(in))
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
character = read_utf8_char(in);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(character == 0xFFFFFFFF)
|
|
Packit |
e4b6da |
break;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
/* Don't lose null characters in input */
|
|
Packit |
e4b6da |
if(character == 0 && !mtable_get(charmap_table, 0))
|
|
Packit |
e4b6da |
fputc(0, out);
|
|
Packit |
e4b6da |
else
|
|
Packit |
e4b6da |
fputs(get_translation(character), out);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#if !HAVE_GETLINE
|
|
Packit |
e4b6da |
ssize_t getline(char **lineptr, size_t *n, FILE *stream)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
ssize_t k = 0;
|
|
Packit |
e4b6da |
int c;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(!*lineptr) {
|
|
Packit |
e4b6da |
*lineptr = malloc(256);
|
|
Packit |
e4b6da |
if(!*lineptr)
|
|
Packit |
e4b6da |
return -1;
|
|
Packit |
e4b6da |
*n = 256;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
do {
|
|
Packit |
e4b6da |
c = fgetc(stream);
|
|
Packit |
e4b6da |
if(c == EOF) {
|
|
Packit |
e4b6da |
if(k == 0) {
|
|
Packit |
e4b6da |
(*lineptr)[0] = 0;
|
|
Packit |
e4b6da |
return -1;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
break;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(k == *n - 1) {
|
|
Packit |
e4b6da |
char *p = realloc(*lineptr, *n *2);
|
|
Packit |
e4b6da |
if(!p)
|
|
Packit |
e4b6da |
return -1;
|
|
Packit |
e4b6da |
*lineptr = p;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
(*lineptr)[k++] = c;
|
|
Packit |
e4b6da |
} while(c != '\n');
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
(*lineptr)[k] = 0;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return k;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
#endif
|