/* genlex: Generate lexical tables for non-ASCII charsets. */
#include "config.h"
#include "std.h"
#include "tools.h"
#define CANON_ASCII_NONSGML 255 /* Canonical non-SGML character in ASCII. */
#define CANON_ASCII_DATACHAR 254 /* Canonical DATACHAR in ASCII. */
extern unsigned char charset[];
extern UNCH *lextabs[];
extern UNCH lextran[];
static char *lextabnames[] = {
"lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke"
};
static VOID print_tab(s, t)
char *s;
UNCH *t;
{
int i;
printf("UNCH %s[] = {\n", s);
for (i = 0; i < 256; i++)
printf("%2d,%c", t[i], (i + 1) % 16 == 0 ? '\n' : ' ');
fputs("};\n\n", stdout);
}
int main(argc, argv)
int argc;
char **argv;
{
int i;
UNCH tab[256];
char special[256];
/* Shunned character numbers in the reference concrete syntax. */
static UNCH refshun[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
};
char shunned[256];
char *program_name;
program_name = strrchr(argv[0], '/');
if (program_name)
program_name++;
else
program_name = argv[0];
/* Check that the mapping is 1-1. */
for (i = 0; i < 256; i++)
tab[i] = 0;
for (i = 0; i < 256; i++)
tab[charset[i]] = 1;
for (i = 0; i < 256; i++)
if (!tab[i]) {
fprintf(stderr, "%s: bad mapping: no character mapped to %d\n",
program_name, i);
exit(EXIT_FAILURE);
}
/* Compute special. */
for (i = 0; i < 256; i++)
special[i] = 0;
for (i = 0; lextabs[i]; i++) {
int j;
for (j = 0; j < 256; j++)
if (lextabs[i][j] != lextabs[i][CANON_ASCII_NONSGML]
&& lextabs[i][j] != lextabs[i][CANON_ASCII_DATACHAR])
special[charset[j]] = 1;
}
/* Compute shunned. */
for (i = 0; i < 256; i++)
shunned[i] = 0;
for (i = 0; i < sizeof(refshun); i++)
shunned[refshun[i]] = 1;
printf("/* This file was automatically generated by %s. Do not edit. */\n\n",
program_name);
fputs("#include \"config.h\"\n#include \"entity.h\"\n#include \"sgmldecl.h\"\n\n",
stdout);
/* Generate each of the lexical tables. */
for (i = 0; lextabs[i]; i++) {
int j;
for (j = 0; j < 256; j++)
tab[charset[j]] = lextabs[i][j];
for (j = 0; j < 256; j++)
if (!special[j]) {
if (shunned[j])
tab[j] = lextabs[i][CANON_ASCII_NONSGML];
else
tab[j] = lextabs[i][CANON_ASCII_DATACHAR];
}
print_tab(lextabnames[i], tab);
}
/* Generate lextran. */
for (i = 0; i < 256; i++)
tab[charset[i]] = charset[lextran[i]];
print_tab("lextran", tab);
/* Generate asciicharset. */
fputs("int asciicharset[] = {\n", stdout);
for (i = 0; i < 128; i++)
printf("%3d,%c", charset[i], (i + 1) % 16 == 0 ? '\n' : ' ');
for (i = 128; i < 256; i++)
printf("UNUSED,%c", (i + 1) % 8 == 0 ? '\n' : ' ');
fputs("};\n", stdout);
exit(EXIT_SUCCESS);
}