Blob Blame History Raw
/* genlex: Generate lexical tables for non-ASCII charsets. */

#include "config.h"
#include "std.h"
#include "tools.h"

#define CANON_ASCII_NONSGML 255  /* Canonical non-SGML character in ASCII. */
#define CANON_ASCII_DATACHAR 254 /* Canonical DATACHAR in ASCII. */

extern unsigned char charset[];
extern UNCH *lextabs[];
extern UNCH lextran[];

static char *lextabnames[] = {
     "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke"
};

static VOID print_tab(s, t)
     char *s;
     UNCH *t;
{
  int i;
  printf("UNCH %s[] = {\n", s);
  for (i = 0; i < 256; i++)
    printf("%2d,%c", t[i], (i + 1) % 16 == 0 ? '\n' : ' ');
  fputs("};\n\n", stdout);
}

int main(argc, argv)
     int argc;
     char **argv;
{
  int i;
  UNCH tab[256];
  char special[256];
  /* Shunned character numbers in the reference concrete syntax. */
  static UNCH refshun[] = { 
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
    19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
  };
  char shunned[256];
  char *program_name;

  program_name = strrchr(argv[0], '/');
  if (program_name)
    program_name++;
  else
    program_name = argv[0];

  /* Check that the mapping is 1-1. */
  for (i = 0; i < 256; i++)
    tab[i] = 0;
  for (i = 0; i < 256; i++)
    tab[charset[i]] = 1;
  for (i = 0; i < 256; i++)
    if (!tab[i]) {
      fprintf(stderr, "%s: bad mapping: no character mapped to %d\n",
	      program_name, i);
      exit(EXIT_FAILURE);
    }

  /* Compute special. */
  for (i = 0; i < 256; i++)
    special[i] = 0;
  for (i = 0; lextabs[i]; i++) {
    int j;
    for (j = 0; j < 256; j++)
    if (lextabs[i][j] != lextabs[i][CANON_ASCII_NONSGML]
	&& lextabs[i][j] != lextabs[i][CANON_ASCII_DATACHAR])
      special[charset[j]] = 1;
  }

  /* Compute shunned. */
  for (i = 0; i < 256; i++)
    shunned[i] = 0;
  for (i = 0; i < sizeof(refshun); i++)
    shunned[refshun[i]] = 1;

  printf("/* This file was automatically generated by %s.  Do not edit. */\n\n",
	program_name);
  fputs("#include \"config.h\"\n#include \"entity.h\"\n#include \"sgmldecl.h\"\n\n",
	stdout);

  /* Generate each of the lexical tables. */
  for (i = 0; lextabs[i]; i++) {
    int j;
    for (j = 0; j < 256; j++)
      tab[charset[j]] = lextabs[i][j];

    for (j = 0; j < 256; j++)
      if (!special[j]) {
	if (shunned[j]) 
	  tab[j] = lextabs[i][CANON_ASCII_NONSGML];
	else
	  tab[j] = lextabs[i][CANON_ASCII_DATACHAR];
      }
    print_tab(lextabnames[i], tab);
  }

  /* Generate lextran. */
  for (i = 0; i < 256; i++)
    tab[charset[i]] = charset[lextran[i]];
  print_tab("lextran", tab);

  /* Generate asciicharset. */
  fputs("int asciicharset[] = {\n", stdout);
  for (i = 0; i < 128; i++)
    printf("%3d,%c", charset[i], (i + 1) % 16 == 0 ? '\n' : ' ');
  for (i = 128; i < 256; i++)
    printf("UNUSED,%c", (i + 1) % 8 == 0 ? '\n' : ' ');
  fputs("};\n", stdout);

  exit(EXIT_SUCCESS);
}