Blob Blame History Raw
/* sgmldecl.c -
   SGML declaration parsing.

   Written by James Clark (jjc@jclark.com).
*/

#include "sgmlincl.h"

/* Symbolic names for the error numbers that are be generated only by
this module. */

#define E_STANDARD 163
#define E_SIGNIFICANT 164
#define E_BADLIT 165
#define E_SCOPE 166
#define E_XNUM 167
#define E_BADVERSION 168
#define E_NMUNSUP 169
#define E_XNMLIT 170
#define E_CHARDESC 171
#define E_CHARDUP 172
#define E_CHARRANGE 173
#define E_7BIT 174
#define E_CHARMISSING 175
#define E_SHUNNED 176
#define E_NONSGML 177
#define E_CAPSET 178
#define E_CAPMISSING 179
#define E_SYNTAX 180
#define E_CHARNUM 181
#define E_SWITCHES 182
#define E_INSTANCE 183
#define E_ZEROFEATURE 184
#define E_YESNO 185
#define E_CAPACITY 186
#define E_NOTSUPPORTED 187
#define E_FORMAL 189
#define E_BADCLASS 190
#define E_MUSTBENON 191
#define E_BADBASECHAR 199
#define E_SYNREFUNUSED 200
#define E_SYNREFUNDESC 201
#define E_SYNREFUNKNOWN 202
#define E_SYNREFUNKNOWNSET 203
#define E_FUNDUP 204
#define E_BADFUN 205
#define E_FUNCHAR 206
#define E_GENDELIM 207
#define E_SRDELIM 208
#define E_BADKEY 209
#define E_BADQUANTITY 210
#define E_BADNAME 211
#define E_REFNAME 212
#define E_DUPNAME 213
#define E_QUANTITY 214
#define E_QTOOBIG 215
#define E_NMSTRTCNT 219
#define E_NMCHARCNT 220
#define E_NMDUP 221
#define E_NMBAD 222
#define E_NMMINUS 223
#define E_UNKNOWNSET 227

#define CANON_NMC '.'		/* Canonical name character. */
#define CANON_NMS 'A'		/* Canonical name start character. */
#define CANON_MIN ':'		/* Canonical minimum data character. */

#define SUCCESS 1
#define FAIL 0
#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
#define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0)

static UNCH standard[] = "ISO 8879:1986";

#define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN"
#define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN"

static UNCH (*newkey)[REFNAMELEN+1] = 0;

struct pmap {
     char *name;
     UNIV value;
};

/* The reference capacity set. */
#define REFCAPSET \
{ 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \
35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L }

long refcapset[NCAPACITY] = REFCAPSET;

/* A pmap of known capacity sets. */

static struct pmap capset_map[] = {
     { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset },
     { 0 },
};

/* Table of capacity names.  Must match *CAP in sgmldecl.h. */

char *captab[] = {
     "TOTALCAP",
     "ENTCAP",
     "ENTCHCAP",
     "ELEMCAP",
     "GRPCAP",
     "EXGRPCAP",
     "EXNMCAP",
     "ATTCAP",
     "ATTCHCAP",
     "AVGRPCAP",
     "NOTCAP",
     "NOTCHCAP",
     "IDCAP",
     "IDREFCAP",
     "MAPCAP",
     "LKSETCAP",
     "LKNMCAP",
};

/* The default SGML declaration. */
#define MAXNUMBER 99999999L

/* Reference quantity set */

#define REFATTCNT 40
#define REFATTSPLEN 960
#define REFBSEQLEN 960
#define REFDTAGLEN 16
#define REFDTEMPLEN 16
#define REFENTLVL 16
#define REFGRPCNT 32
#define REFGRPGTCNT 96
#define REFGRPLVL 16
#define REFNORMSEP 2
#define REFPILEN 240
#define REFTAGLEN 960
#define REFTAGLVL 24

#define ALLOC_MAX 65534

#define BIGINT 30000

#define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2)
#define MAXATTSPLEN BIGINT
#define MAXBSEQLEN BIGINT
#define MAXDTAGLEN 16
#define MAXDTEMPLEN 16
#define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1)
#define MAXGRPCNT MAXGRPGTCNT
/* Must be between 96 and 253 */
#define MAXGRPGTCNT 253
#define MAXGRPLVL MAXGRPGTCNT
#define MAXLITLEN BIGINT
/* This guarantees that NAMELEN < LITLEN (ie there's always space for a name
in a buffer intended for a literal.) */
#define MAXNAMELEN (REFLITLEN - 1)
#define MAXNORMSEP 2
#define MAXPILEN BIGINT
#define MAXTAGLEN BIGINT
#define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1)

/* Table of quantity names.  Must match Q* in sgmldecl.h. */

static char *quantity_names[] = {
    "ATTCNT",   
    "ATTSPLEN", 
    "BSEQLEN",  
    "DTAGLEN",  
    "DTEMPLEN", 
    "ENTLVL",   
    "GRPCNT",   
    "GRPGTCNT", 
    "GRPLVL",   
    "LITLEN",   
    "NAMELEN",  
    "NORMSEP",  
    "PILEN",    
    "TAGLEN",   
    "TAGLVL",    
};

static int max_quantity[] = {
    MAXATTCNT,
    MAXATTSPLEN,
    MAXBSEQLEN,
    MAXDTAGLEN,
    MAXDTEMPLEN,
    MAXENTLVL,
    MAXGRPCNT,
    MAXGRPGTCNT,
    MAXGRPLVL,
    MAXLITLEN,
    MAXNAMELEN,
    MAXNORMSEP,
    MAXPILEN,
    MAXTAGLEN,
    MAXTAGLVL,
};

static char *quantity_changed;

/* Non-zero means the APPINFO parameter was not NONE. */
static int appinfosw = 0;

struct sgmldecl sd = {
     REFCAPSET,			/* capacity */
#ifdef SUPPORT_SUBDOC
     MAXNUMBER,			/* subdoc */
#else /* not SUPPORT_SUBDOC */
     0,				/* subdoc */
#endif /* not SUPPORT_SUBDOC */
     1,				/* formal */
     1,				/* omittag */
     1,				/* shorttag */
     1,				/* shortref */
     { 1, 0 },			/* general/entity name case translation */
     {				/* reference quantity set */
	  REFATTCNT,
	  REFATTSPLEN,
	  REFBSEQLEN,
	  REFDTAGLEN,
	  REFDTEMPLEN,
	  REFENTLVL,
	  REFGRPCNT,
	  REFGRPGTCNT,
	  REFGRPLVL,
	  REFLITLEN,
	  REFNAMELEN,
	  REFNORMSEP,
	  REFPILEN,
	  REFTAGLEN,
	  REFTAGLVL,
     },
};

static int systemcharset[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
};

static struct pmap charset_map[] = {
     { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */
     { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */
     { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
				/* system character set */
     { 0 }
};

static int synrefcharset[256];	/* the syntax reference character set */

#define CHAR_NONSGML 01
#define CHAR_SIGNIFICANT 02
#define CHAR_MAGIC 04
#define CHAR_SHUNNED 010

static UNCH char_flags[256];
static int done_nonsgml = 0;
static UNCH *nlextoke = 0;	/* new lextoke */
static UNCH *nlextran = 0;	/* new lextran */


static UNCH kcharset[] = "CHARSET";
static UNCH kbaseset[] = "BASESET";
static UNCH kdescset[] = "DESCSET";
static UNCH kunused[] = "UNUSED";
static UNCH kcapacity[] = "CAPACITY";
static UNCH kpublic[] = "PUBLIC";
static UNCH ksgmlref[] = "SGMLREF";
static UNCH kscope[] = "SCOPE";
static UNCH kdocument[] = "DOCUMENT";
static UNCH kinstance[] = "INSTANCE";
static UNCH ksyntax[] = "SYNTAX";
static UNCH kswitches[] = "SWITCHES";
static UNCH kfeatures[] = "FEATURES";
static UNCH kminimize[] = "MINIMIZE";
static UNCH kdatatag[] = "DATATAG";
static UNCH komittag[] = "OMITTAG";
static UNCH krank[] = "RANK";
static UNCH kshorttag[] = "SHORTTAG";
static UNCH klink[] = "LINK";
static UNCH ksimple[] = "SIMPLE";
static UNCH kimplicit[] = "IMPLICIT";
static UNCH kexplicit[] = "EXPLICIT";
static UNCH kother[] = "OTHER";
static UNCH kconcur[] = "CONCUR";
static UNCH ksubdoc[] = "SUBDOC";
static UNCH kformal[] = "FORMAL";
static UNCH kyes[] = "YES";
static UNCH kno[] = "NO";
static UNCH kappinfo[] = "APPINFO";
static UNCH knone[] = "NONE";
static UNCH kshunchar[] = "SHUNCHAR";
static UNCH kcontrols[] = "CONTROLS";
static UNCH kfunction[] = "FUNCTION";
static UNCH krs[] = "RS";
static UNCH kre[] = "RE";
static UNCH kspace[] = "SPACE";
static UNCH knaming[] = "NAMING";
static UNCH klcnmstrt[] = "LCNMSTRT";
static UNCH kucnmstrt[] = "UCNMSTRT";
static UNCH klcnmchar[] = "LCNMCHAR";
static UNCH kucnmchar[] = "UCNMCHAR";
static UNCH knamecase[] = "NAMECASE";
static UNCH kdelim[] = "DELIM";
static UNCH kgeneral[] = "GENERAL";
static UNCH kentity[] = "ENTITY";
static UNCH kshortref[] = "SHORTREF";
static UNCH knames[] = "NAMES";
static UNCH kquantity[] = "QUANTITY";

#define sderr mderr

static UNIV pmaplookup P((struct pmap *, char *));
static UNCH *ltous P((long));
static VOID sdfixstandard P((UNCH *));
static int sdparm P((UNCH *, struct parse *));
static int sdname P((UNCH *, UNCH *));
static int sdckname P((UNCH *, UNCH *));
static int sdversion P((UNCH *));
static int sdcharset P((UNCH *));
static int sdcsdesc P((UNCH *, int *));
static int sdpubcapacity P((UNCH *));
static int sdcapacity P((UNCH *));
static int sdscope P((UNCH *));
static VOID setlexical P((void));
static VOID noemptytag P((void));
static int sdpubsyntax P((UNCH *));
static int sdsyntax P((UNCH *));
static int sdxsyntax P((UNCH *));
static int sdtranscharnum P((UNCH *));
static int sdtranschar P((int));
static int sdshunchar P((UNCH *));
static int sdsynref P((UNCH *));
static int sdfunction P((UNCH *));
static int sdnaming P((UNCH *));
static int sddelim P((UNCH *));
static int sdnames P((UNCH *));
static int sdquantity P((UNCH *));
static int sdfeatures P((UNCH *));
static int sdappinfo P((UNCH *));

static VOID bufsalloc P((void));
static VOID bufsrealloc P((void));

/* Parse the SGML declaration. Return non-zero if there was some appinfo. */

int sgmldecl()
{
     int i;
     int errsw = 0;
     UNCH endbuf[REFNAMELEN+2];	/* buffer for parsing terminating > */
     static int (*section[]) P((UNCH *)) = {
	  sdversion,
	  sdcharset,
	  sdcapacity,
	  sdscope,
	  sdsyntax,
	  sdfeatures,
	  sdappinfo,
     };
     /* These are needed if we use mderr. */
     parmno = 0;
     mdname = sgmlkey;
     subdcl = NULL;
     for (i = 0; i < SIZEOF(section); i++)
	  if ((*section[i])(tbuf) == FAIL) {
	       errsw = 1;
	       break;
	  }
     if (!errsw)
	  setlexical();
     bufsrealloc();
     /* Parse the >.  Don't overwrite the appinfo. */
     if (!errsw)
	  sdparm(endbuf, 0);
     /* We must exit if we hit end of document. */
     if (pcbsd.action == EOD_)
	  exiterr(161, &pcbsd);
     if (!errsw && pcbsd.action != ESGD)
	  sderr(126, (UNCH *)0, (UNCH *)0);
     return appinfosw;
}

/* Parse the literal (which should contain the version of the
standard) at the beginning of a SGML declaration. */

static int sdversion(tbuf)
UNCH *tbuf;
{
     if (sdparm(tbuf, &pcblitv) != LIT1) {
	  sderr(123, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     sdfixstandard(tbuf);
     if (ustrcmp(tbuf, standard) != 0)
	  sderr(E_BADVERSION, tbuf, standard);
     return SUCCESS;
}

/* Parse the CHARSET section. Use one token lookahead. */

static int sdcharset(tbuf)
UNCH *tbuf;
{
     int i;
     int status[256];

     if (sdname(tbuf, kcharset) == FAIL) return FAIL;
     (void)sdparm(tbuf, 0);

     if (sdcsdesc(tbuf, status) == FAIL)
	  return FAIL;

     for (i = 128; i < 256; i++)
	  if (status[i] != UNDESC)
	       break;
     if (i >= 256) {
	  /* Only a 7-bit character set was described.  Fill it out to 8-bits. */
	  for (i = 128; i < 256; i++)
	       status[i] = UNUSED;
#if 0
	  sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
#endif
     }
     /* Characters that are declared UNUSED in the document character set
	are assigned to non-SGML. */
     for (i = 0; i < 256; i++) {
	  if (status[i] == UNDESC) {
	       sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
	       char_flags[i] |= CHAR_NONSGML;
	  }
	  else if (status[i] == UNUSED)
	       char_flags[i] |= CHAR_NONSGML;
     }
     done_nonsgml = 1;
     return SUCCESS;
}

/* Parse a character set description.   Uses one character lookahead. */

static int sdcsdesc(tbuf, status)
UNCH *tbuf;
int *status;
{
     int i;
     int nsets = 0;
     struct fpi fpi;

     for (i = 0; i < 256; i++)
	  status[i] = UNDESC;

     for (;;) {
	  int nchars;
	  int *baseset = 0;

	  if (pcbsd.action != NAS1) {
	       if (nsets == 0) {
		    sderr(120, (UNCH *)0, (UNCH *)0);
		    return FAIL;
	       }
	       break;
	  }
	  if (!matches(tbuf, kbaseset)) {
	       if (nsets == 0) {
		    sderr(118, tbuf+1, kbaseset);
		    return FAIL;
	       }
	       break;
	  }
	  nsets++;
	  MEMZERO((UNIV)&fpi, FPISZ);
	  if (sdparm(tbuf, &pcblitv) != LIT1) {
	       sderr(123, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  fpi.fpipubis = tbuf;
	  /* Give a warning if it is not a CHARSET fpi. */
	  if (parsefpi(&fpi))
	       sderr(E_FORMAL, (UNCH *)0, (UNCH *)0);
	  else if (fpi.fpic != FPICHARS)
	       sderr(E_BADCLASS, kcharset, (UNCH *)0);
	  else {
	       fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
	       baseset = (int *)pmaplookup(charset_map,
					   (char *)fpi.fpipubis + fpi.fpil);
	       if (!baseset)
		    sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0);
	  }
	  if (sdname(tbuf, kdescset) == FAIL) return FAIL;
	  nchars = 0;
	  for (;;) {
	       long start, count;
	       long basenum;
	       if (sdparm(tbuf, 0) != NUM1)
		    break;
	       start = atol((char *)tbuf);
	       if (sdparm(tbuf, 0) != NUM1) {
		    sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
		    return FAIL;
	       }
	       count = atol((char *)tbuf);
	       switch (sdparm(tbuf, &pcblitv)) {
	       case NUM1:
		    basenum = atol((char *)tbuf);
		    break;
	       case LIT1:
		    basenum = UNKNOWN;
		    break;
	       case NAS1:
		    if (matches(tbuf, kunused)) {
			 basenum = UNUSED;
			 break;
		    }
		    /* fall through */
	       default:
		    sderr(E_CHARDESC, ltous(start), (UNCH *)0);
		    return FAIL;
	       }
	       if (start + count > 256)
		    sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
	       else {
		    int i;
		    int lim = (int)start + count;
		    for (i = (int)start; i < lim; i++) {
			 if (status[i] != UNDESC)
			      sderr(E_CHARDUP, ltous((long)i), (UNCH *)0);
			 else if (basenum == UNUSED || basenum == UNKNOWN)
			      status[i] = (int)basenum;
			 else if (baseset == 0)
			      status[i] = UNKNOWN_SET;
			 else {
			      int n = basenum + (i - start);
			      if (n < 0 || n > 255)
				   sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
			      else if (baseset[n] == UNUSED)
				   sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0);
			      else
				   status[i] = baseset[n];
			 }
		    }
	       }
	       nchars++;
	  }
	  if (nchars == 0) {
	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
     }
     return SUCCESS;
}

/* Parse the CAPACITY section.  Uses one token lookahead. */

static int sdcapacity(tbuf)
UNCH *tbuf;
{
     int ncap;

     if (sdckname(tbuf, kcapacity) == FAIL)
	  return FAIL;
     if (sdparm(tbuf, 0) != NAS1) {
	  sderr(120, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     if (matches(tbuf, kpublic))
	  return sdpubcapacity(tbuf);
     if (!matches(tbuf, ksgmlref)) {
	  sderr(E_CAPACITY, tbuf+1, (UNCH *)0);
	  return FAIL;
     }
     memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));
     ncap = 0;
     for (;;) {
	  int capno = -1;
	  int i;

	  if (sdparm(tbuf, 0) != NAS1)
	       break;
	  for (i = 0; i < SIZEOF(captab); i++)
	       if (matches(tbuf, captab[i])) {
		    capno = i;
		    break;
	       }
	  if (capno < 0)
	       break;
	  if (sdparm(tbuf, 0) != NUM1) {
	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  sd.capacity[capno] = atol((char *)tbuf);
	  ncap++;
     }
     if (ncap == 0) {
	  sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }

     return SUCCESS;
}

/* Parse a CAPACITY section that started with PUBLIC.  Must do one
token lookahead, since sdcapacity() also does. */

static int sdpubcapacity(tbuf)
UNCH *tbuf;
{
     UNIV ptr;
     if (sdparm(tbuf, &pcblitv) != LIT1) {
	  sderr(123, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     sdfixstandard(tbuf);
     ptr = pmaplookup(capset_map, (char *)tbuf);
     if (!ptr)
	  sderr(E_CAPSET, tbuf, (UNCH *)0);
     else
	  memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));
     (void)sdparm(tbuf, 0);
     return SUCCESS;
}

/* Parse the SCOPE section. Uses no lookahead. */

static int sdscope(tbuf)
UNCH *tbuf;
{
     if (sdckname(tbuf, kscope) == FAIL)
	  return FAIL;
     if (sdparm(tbuf, 0) != NAS1) {
	  sderr(120, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     if (matches(tbuf, kdocument))
	  ;
     else if (matches(tbuf, kinstance))
	  sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);
     else {
	  sderr(E_SCOPE, tbuf+1, (UNCH *)0);
	  return FAIL;
     }
     return SUCCESS;
}

/* Parse the SYNTAX section.  Uses one token lookahead. */

static int sdsyntax(tbuf)
UNCH *tbuf;
{
     if (sdname(tbuf, ksyntax) == FAIL) return FAIL;
     if (sdparm(tbuf, 0) != NAS1) {
	  sderr(120, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     if (matches(tbuf, kpublic))
	  return sdpubsyntax(tbuf);
     return sdxsyntax(tbuf);
}

/* Parse the SYNTAX section which starts with PUBLIC.  Uses one token
lookahead. */

static int sdpubsyntax(tbuf)
UNCH *tbuf;
{
     int nswitches;
     if (sdparm(tbuf, &pcblitv) != LIT1)
	  return FAIL;
     sdfixstandard(tbuf);
     if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
	  sd.shortref = 0;
     else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
	  sd.shortref = 1;
     else
	  sderr(E_SYNTAX, tbuf, (UNCH *)0);
     if (sdparm(tbuf, 0) != NAS1)
	  return SUCCESS;
     if (!matches(tbuf, kswitches))
	  return SUCCESS;
     nswitches = 0;
     for (;;) {
	  int errsw = 0;

	  if (sdparm(tbuf, 0) != NUM1)
	       break;
	  if (atol((char *)tbuf) > 255) {
	       sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
	       errsw = 1;
	  }
	  if (sdparm(tbuf, 0) != NUM1) {
	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (!errsw) {
	       if (atol((char *)tbuf) > 255)
		    sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
	  }
	  nswitches++;
     }
     if (nswitches == 0) {
	  sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);
     return SUCCESS;
}

/* Parse an explicit concrete syntax. Uses one token lookahead. */

static
int sdxsyntax(tbuf)
UNCH *tbuf;
{
     static int (*section[]) P((UNCH *)) = {
	  sdshunchar,
	  sdsynref,
	  sdfunction,
	  sdnaming,
	  sddelim,
	  sdnames,
	  sdquantity,
     };
     int i;

     for (i = 0; i < SIZEOF(section); i++)
	  if ((*section[i])(tbuf) == FAIL)
	       return FAIL;
     return SUCCESS;
}

/* Parse the SHUNCHAR section. Uses one token lookahead. */

static
int sdshunchar(tbuf)
UNCH *tbuf;
{
     int i;
     for (i = 0; i < 256; i++)
	  char_flags[i] &= ~CHAR_SHUNNED;

     if (sdckname(tbuf, kshunchar) == FAIL)
	  return FAIL;

     if (sdparm(tbuf, 0) == NAS1) {
	  if (matches(tbuf, knone)) {
	       (void)sdparm(tbuf, 0);
	       return SUCCESS;
	  }
	  if (matches(tbuf, kcontrols)) {
	       for (i = 0; i < 256; i++)
		    if (ISASCII(i) && iscntrl(i))
			 char_flags[i] |= CHAR_SHUNNED;
	       if (sdparm(tbuf, 0) != NUM1)
		    return SUCCESS;
	  }
     }
     if (pcbsd.action != NUM1) {
	  sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     do {
	  long n = atol((char *)tbuf);
	  if (n > 255)
	       sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
	  else
	       char_flags[(int)n] |= CHAR_SHUNNED;
     } while (sdparm(tbuf, 0) == NUM1);
     return SUCCESS;
}

/* Parse the syntax reference character set. Uses one token lookahead. */

static
int sdsynref(tbuf)
UNCH *tbuf;
{
     return sdcsdesc(tbuf, synrefcharset);
}

/* Translate a character number from the syntax reference character set
to the system character set. If it can't be done, give an error message
and return -1. */

static
int sdtranscharnum(tbuf)
UNCH *tbuf;
{
     long n = atol((char *)tbuf);
     if (n > 255) {
	  sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
	  return -1;
     }
     return sdtranschar((int)n);
}


static
int sdtranschar(n)
int n;
{
     int ch = synrefcharset[n];
     if (ch >= 0)
	  return ch;
     switch (ch) {
     case UNUSED:
	  sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);
	  break;
     case UNDESC:
	  sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);
	  break;
     case UNKNOWN:
	  sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);
	  break;
     case UNKNOWN_SET:
	  sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);
	  break;
     default:
	  abort();
     }
     return -1;
}


/* Parse the function section. Uses two tokens lookahead. "NAMING"
could be a function name. */

static
int sdfunction(tbuf)
UNCH *tbuf;
{
     static UNCH *fun[] = { kre, krs, kspace };
     static int funval[] = { RECHAR, RSCHAR, ' ' };
     int i;
     int had_tab = 0;
     int changed = 0;		/* attempted to change reference syntax */

     if (sdckname(tbuf, kfunction) == FAIL)
	  return FAIL;
     for (i = 0; i < SIZEOF(fun); i++) {
	  int ch;
	  if (sdname(tbuf, fun[i]) == FAIL)
	       return FAIL;
	  if (sdparm(tbuf, 0) != NUM1) {
	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  ch = sdtranscharnum(tbuf);
	  if (ch >= 0 && ch != funval[i])
	       changed = 1;
     }
     for (;;) {
	  int tabsw = 0;
	  int namingsw = 0;
	  if (sdparm(tbuf, 0) != NAS1) {
	       sderr(120, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (matches(tbuf, (UNCH *)"TAB")) {
	       tabsw = 1;
	       if (had_tab)
		    sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);
	  }
	  else {
	       for (i = 0; i < SIZEOF(fun); i++)
		    if (matches(tbuf, fun[i]))
			 sderr(E_BADFUN, fun[i], (UNCH *)0);
	       if (matches(tbuf, knaming))
		    namingsw = 1;
	       else
		    changed = 1;
	  }
	  if (sdparm(tbuf, 0) != NAS1) {
	       sderr(120, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (namingsw) {
	       if (matches(tbuf, klcnmstrt))
		    break;
	       changed = 1;
	  }
	  if (sdparm(tbuf, 0) != NUM1) {
	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (tabsw && !had_tab) {
	       int ch = sdtranscharnum(tbuf);
	       if (ch >= 0 && ch != TABCHAR)
		    changed = 1;
	       had_tab = 1;
	  }

     }
     if (!had_tab)
	  changed = 1;
     if (changed)
	  sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);
     return SUCCESS;
}

/* Parse the NAMING section.  Uses no lookahead. */

static
int sdnaming(tbuf)
UNCH *tbuf;
{
     int i;
     int bad = 0;
     static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };
     static UNCH *types[] = { kgeneral, kentity };

#define NCLASSES SIZEOF(classes)

     int bufsize = 4;		/* allocated size of buf */
     UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters
					      in naming classes */
     int bufi = 0;		/* next index into buf */
     int start[NCLASSES];	/* index of first character for each class */
     int count[NCLASSES];	/* number of characters for each class */

     for (i = 0; i < NCLASSES; i++) {
	  UNCH *s;

	  if (sdckname(tbuf, classes[i]) == FAIL) {
	       frem((UNIV)buf);
	       return FAIL;
	  }
	  if (sdparm(tbuf, &pcblitp) != LIT1) {
	       sderr(123, (UNCH *)0, (UNCH *)0);
	       frem((UNIV)buf);
	       return FAIL;
	  }
	  start[i] = bufi;
	  
	  for (s = tbuf; *s; s++) {
	       int c = *s;
	       if (c == DELNONCH) {
		    c = UNSHIFTNON(*s);
		    s++;
	       }
	       c = sdtranschar(c);
	       if (c < 0)
		    bad = 1;
	       else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
			&& c != '.' && c != '-') {
		    int class = lextoke[c];
		    if (class == SEP || class == SP || class == NMC
			|| class == NMS || class == NU)
			 sderr(E_NMBAD, ltous((long)c), (UNCH *)0);
		    else
			 sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);
		    bad = 1;
	       }
	       if (bufi >= bufsize)
		    buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);
	       buf[bufi++] = c;
	  }

	  count[i] = bufi - start[i];
	  (void)sdparm(tbuf, 0);
     }
     if (!bad && count[0] != count[1]) {
	  sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);
	  bad = 1;
     }
     if (!bad && count[2] != count[3]) {
	  sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);
	  bad = 1;
     }
     if (!bad) {
	  nlextoke = (UNCH *)rmalloc(256);
	  memcpy((UNIV)nlextoke, lextoke, 256);
	  nlextoke['.'] = nlextoke['-'] = INV;

	  nlextran = (UNCH *)rmalloc(256);
	  memcpy((UNIV)nlextran, lextran, 256);

	  for (i = 0; i < count[0]; i++) {
	       UNCH lc = buf[start[0] + i];
	       UNCH uc = buf[start[1] + i];
	       nlextoke[lc] = NMS;
	       nlextoke[uc] = NMS;
	       nlextran[lc] = uc;
	  }
		       
	  for (i = 0; i < count[2]; i++) {
	       UNCH lc = buf[start[2] + i];
	       UNCH uc = buf[start[3] + i];
	       if (nlextoke[lc] == NMS) {
		    sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);
		    bad = 1;
	       }
	       else if (nlextoke[uc] == NMS) {
		    sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);
		    bad = 1;
	       }
	       else {
		    nlextoke[lc] = NMC;
		    nlextoke[uc] = NMC;
		    nlextran[lc] = uc;
	       }
	  }
	  if (nlextoke['-'] != NMC) {
	       sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);
	       bad = 1;
	  }
	  if (bad) {
	       if (nlextoke) {
		    frem((UNIV)nlextoke);
		    nlextoke = 0;
	       }
	       if (nlextran) {
		    frem((UNIV)nlextran);
		    nlextran = 0;
	       }
	  }
     }

     frem((UNIV)buf);

     if (sdckname(tbuf, knamecase) == FAIL)
	  return FAIL;
     for (i = 0; i < SIZEOF(types); ++i) {
	  if (sdname(tbuf, types[i]) == FAIL)
	       return FAIL;
	  if (sdparm(tbuf, 0) != NAS1) {
	       sderr(120, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (matches(tbuf, kyes))
	       sd.namecase[i] = 1;
	  else if (matches(tbuf, kno))
	       sd.namecase[i] = 0;
	  else {
	       sderr(E_YESNO, tbuf+1, (UNCH *)0);
	       return FAIL;
	  }
     }
     return SUCCESS;
}

/* Parse the DELIM section. Uses one token lookahead. */

static
int sddelim(tbuf)
UNCH *tbuf;
{
     int changed = 0;
     if (sdname(tbuf, kdelim) == FAIL
	 || sdname(tbuf, kgeneral) == FAIL
	 || sdname(tbuf, ksgmlref) == FAIL)
	  return FAIL;
     for (;;) {
	  if (sdparm(tbuf, 0) != NAS1) {
	       sderr(120, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (matches(tbuf, kshortref))
	       break;
	  if (sdparm(tbuf, &pcblitp) != LIT1) {
	       sderr(123, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  changed = 1;
     }
     if (changed) {
	  sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);
	  changed = 0;
     }
     if (sdparm(tbuf, 0) != NAS1) {
	  sderr(120, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     if (matches(tbuf, ksgmlref))
	  sd.shortref = 1;
     else if (matches(tbuf, knone))
	  sd.shortref = 0;
     else {
	  sderr(118, tbuf+1, ksgmlref);	/* probably they forgot SGMLREF */
	  return FAIL;
     }
     while (sdparm(tbuf, &pcblitp) == LIT1)
	  changed = 1;
     if (changed)
	  sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);
     return SUCCESS;
}

/* Parse the NAMES section. Uses one token lookahead. */

static
int sdnames(tbuf)
UNCH *tbuf;
{
     int i;
     if (sdckname(tbuf, knames) == FAIL)
	  return FAIL;
     if (sdname(tbuf, ksgmlref) == FAIL)
	  return FAIL;

     while (sdparm(tbuf, 0) == NAS1) {
	  int j;
	  if (matches(tbuf, kquantity))
	       break;
	  for (i = 0; i < NKEYS; i++)
	       if (matches(tbuf, key[i]))
		    break;
	  if (i >= NKEYS) {
	       sderr(E_BADKEY, tbuf+1, (UNCH *)0);
	       return FAIL;
	  }
	  if (sdparm(tbuf, &pcblitp) != NAS1) {
	       sderr(120, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  if (!newkey) {
	       newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);
	       MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);
	  }
	  for (j = 0; j < NKEYS; j++) {
	       if (matches(tbuf, key[j])) {
		    sderr(E_REFNAME, tbuf + 1, (UNCH *)0);
		    break;
	       }
	       if (matches(tbuf, newkey[j])) {
		    sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);
		    break;
	       }
	  }
	  if (j >= NKEYS)
	       ustrcpy(newkey[i], tbuf + 1);
     }
     /* Now install the new keys. */
     if (newkey) {
	  for (i = 0; i < NKEYS; i++)
	       if (newkey[i][0] != '\0') {
		    UNCH temp[REFNAMELEN + 1];
		    
		    ustrcpy(temp, key[i]);
		    ustrcpy(key[i], newkey[i]);
		    ustrcpy(newkey[i], temp);
	       }
     }
     return SUCCESS;
}

/* Parse the QUANTITY section. Uses one token lookahead. */

static int sdquantity(tbuf)
UNCH *tbuf;
{
     int quantity[NQUANTITY];
     int i;

     for (i = 0; i < NQUANTITY; i++)
	  quantity[i] = -1;
     if (sdckname(tbuf, kquantity) == FAIL)
	  return FAIL;
     if (sdname(tbuf, ksgmlref) == FAIL)
	  return FAIL;
     while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {
	  long n;
	  for (i = 0; i < SIZEOF(quantity_names); i++)
	       if (matches(tbuf, quantity_names[i]))
		    break;
	  if (i >= SIZEOF(quantity_names)) {
	       sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);
	       return FAIL;
	  }
	  if (sdparm(tbuf, 0) != NUM1) {
	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
	       return FAIL;
	  }
	  n = atol((char *)tbuf);
	  if (n < sd.quantity[i])
	       sderr(E_QUANTITY, (UNCH *)quantity_names[i],
		     ltous((long)sd.quantity[i]));
	  else if (n > max_quantity[i]) {
	       sderr(E_QTOOBIG, (UNCH *)quantity_names[i],
		     ltous((long)max_quantity[i]));
	       quantity[i] = max_quantity[i];
	  }
	  else
	       quantity[i] = (int)n;
     }
     for (i = 0; i < NQUANTITY; i++)
	  if (quantity[i] > 0) {
	       sd.quantity[i] = quantity[i];
	       if (!quantity_changed)
		    quantity_changed = (char *)rmalloc(NQUANTITY);
	       quantity_changed[i] = 1;
	  }
     return SUCCESS;
}

/* Parse the FEATURES section.  Uses no lookahead. */

static int sdfeatures(tbuf)
UNCH *tbuf;
{
     static struct  {
	  UNCH *name;
	  UNCH argtype;  /* 0 = no argument, 1 = boolean, 2 = numeric */
	  UNIV valp;     /* UNCH * if boolean, long * if numeric. */
     } features[] = {
	  { kminimize, 0, 0 },
	  { kdatatag, 1, 0 },
	  { komittag, 1, (UNIV)&sd.omittag },
	  { krank, 1, 0 },
	  { kshorttag, 1, (UNIV)&sd.shorttag },
	  { klink, 0, 0 },
	  { ksimple, 2, 0 },
	  { kimplicit, 1, 0 },
	  { kexplicit, 2, 0 },
	  { kother, 0, 0 },
	  { kconcur, 2, 0 },
	  { ksubdoc, 2, (UNIV)&sd.subdoc },
	  { kformal, 1, (UNIV)&sd.formal },
     };

     int i;

     if (sdckname(tbuf, kfeatures) == FAIL)
	  return FAIL;
     for (i = 0; i < SIZEOF(features); i++) {
	  if (sdname(tbuf, features[i].name) == FAIL) return FAIL;
	  if (features[i].argtype > 0) {
	       long n;
	       if (sdparm(tbuf, 0) != NAS1) {
		    sderr(120, (UNCH *)0, (UNCH *)0);
		    return FAIL;
	       }
	       if (matches(tbuf, kyes)) {
		    if (features[i].argtype > 1) {
			 if (sdparm(tbuf, 0) != NUM1) {
			      sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
			      return FAIL;
			 }
			 n = atol((char *)tbuf);
			 if (n == 0)
			      sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);
		    }
		    else
			 n = 1;
	       }
	       else if (matches(tbuf, kno))
		    n = 0;
	       else {
		    sderr(E_YESNO, tbuf+1, (UNCH *)0);
		    return FAIL;
	       }
	       if (features[i].valp == 0) {
		    if (n > 0)
			 sderr(E_NOTSUPPORTED, features[i].name,
			      (UNCH *)0);
	       }
	       else if (features[i].argtype > 1)
		    *(long *)features[i].valp = n;
	       else
		    *(UNCH *)features[i].valp = (UNCH)n;
	  }
     }
     if (!sd.shorttag)
	  noemptytag();
     return SUCCESS;
}

/* Parse the APPINFO section.  Uses no lookahead. */

static int sdappinfo(tbuf)
UNCH *tbuf;
{
     if (sdname(tbuf, kappinfo) == FAIL) return FAIL;
     switch (sdparm(tbuf, &pcblitv)) {
     case LIT1:
	  appinfosw = 1;
	  break;
     case NAS1:
	  if (matches(tbuf, knone))
	       break;
	  sderr(118, tbuf+1, knone);
	  return FAIL;
     default:
	  sderr(E_XNMLIT, knone, (UNCH *)0);
	  return FAIL;
     }
     return SUCCESS;
}

/* Change a prefix of ISO 8879-1986 to ISO 8879:1986.  Amendment 1 to
the standard requires the latter. */

static VOID sdfixstandard(tbuf)
UNCH *tbuf;
{
     if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
	  sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
	  tbuf[8] = ':';
     }
}

static int sdname(tbuf, key)
UNCH *tbuf;
UNCH *key;
{
     if (sdparm(tbuf, 0) != NAS1) {
	  sderr(120, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     if (!matches(tbuf, key)) {
	  sderr(118, tbuf+1, key);
	  return FAIL;
     }
     return SUCCESS;
}

static int sdckname(tbuf, key)
UNCH *tbuf;
UNCH *key;
{
     if (pcbsd.action != NAS1) {
	  sderr(120, (UNCH *)0, (UNCH *)0);
	  return FAIL;
     }
     if (!matches(tbuf, key)) {
	  sderr(118, tbuf+1, key);
	  return FAIL;
     }
     return SUCCESS;
}

/* Parse a SGML declaration parameter.  If lpcb is NULL, pt must be
REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters
long. LPCB should be NULL if a literal is not allowed. */

static int sdparm(pt, lpcb)
UNCH *pt;			/* Token buffer. */
struct parse *lpcb;		/* PCB for literal parse. */
{
     for (;;) {
	  parse(&pcbsd);
	  if (pcbsd.action != ISIG)
	       break;
	  sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);
     }
     ++parmno;
     switch (pcbsd.action) {
     case LIT1:
	  if (!lpcb) {
	       sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
	       REPEATCC;
	       return pcbsd.action = INV_;
	  }
	  parselit(pt, lpcb, REFLITLEN, lex.d.lit);
	  return pcbsd.action;
     case LIT2:
	  if (!lpcb) {
	       sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
	       REPEATCC;
	       return pcbsd.action = INV_;
	  }
	  parselit(pt, lpcb, REFLITLEN, lex.d.lita);
	  return pcbsd.action = LIT1;
     case NAS1:
	  parsenm(pt, 1);
	  return pcbsd.action;
     case NUM1:
	  parsetkn(pt, NU, REFNAMELEN);
	  return pcbsd.action;
     }
     return pcbsd.action;
}

VOID sdinit()
{
     int i;
     /* Shunned character numbers in the reference concrete syntax. */
     static UNCH refshun[] = { 
	  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
	  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
	  };
     UNCH **p;
     /* A character is magic if it is a non-SGML character used for
     some internal purpose in the parser. */
     char_flags[EOS] |= CHAR_MAGIC;
     char_flags[EOBCHAR] |= CHAR_MAGIC;
     char_flags[EOFCHAR] |= CHAR_MAGIC;
     char_flags[GENRECHAR] |= CHAR_MAGIC;
     char_flags[DELNONCH] |= CHAR_MAGIC;
     char_flags[DELCDATA] |= CHAR_MAGIC;
     char_flags[DELSDATA] |= CHAR_MAGIC;

     /* Figure out the significant SGML characters. */
     for (p = lextabs; *p; p++) {
	  UNCH datclass = (*p)[CANON_DATACHAR];
	  UNCH nonclass = (*p)[CANON_NONSGML];
	  for (i = 0; i < 256; i++)
	       if (!(char_flags[i] & CHAR_MAGIC)
		   && (*p)[i] != datclass && (*p)[i] != nonclass)
		    char_flags[i] |= CHAR_SIGNIFICANT;
     }
     for (i = 0; i < SIZEOF(refshun); i++)
	  char_flags[refshun[i]] |= CHAR_SHUNNED;
     for (i = 0; i < 256; i++)
	  if (ISASCII(i) && iscntrl(i))
	       char_flags[i] |= CHAR_SHUNNED;
     bufsalloc();
}


static
VOID bufsalloc()
{
     scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));
     tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);
     /* entbuf is used for parsing numeric character references */
     entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);
}

static
VOID bufsrealloc()
{
     UNS size;
     
     if (ENTLVL != REFENTLVL)
	  scbs = (struct source *)rrealloc((UNIV)scbs,
					   (ENTLVL+1)*sizeof(struct source));
     /* Calculate the size for tbuf. */
     size = LITLEN + ATTSPLEN;
     if (PILEN > size)
	  size = PILEN;
     if (BSEQLEN > size)
	  size = BSEQLEN;
     if (size != REFATTSPLEN + REFLITLEN)
	  tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);
     if (NAMELEN != REFNAMELEN)
	  entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);
}


/* Check that the non-SGML characters are compatible with the concrete
syntax and munge the lexical tables accordingly.  If IMPLIED is
non-zero, then the SGML declaration was implied; in this case, don't
give error messages about shunned characters not being declared
non-SGML.  Also make any changes that are required by the NAMING section.
*/

static VOID setlexical()
{
     int i;
     UNCH **p;
     
     if (nlextoke) {
	  /* Handle characters that were made significant by the
	     NAMING section. */
	  for (i = 0; i < 256; i++)
	       if (nlextoke[i] == NMC || nlextoke[i] == NMS)
		    char_flags[i] |= CHAR_SIGNIFICANT;
     }

     for (i = 0; i < 256; i++)
	  if (char_flags[i] & CHAR_SIGNIFICANT) {
	       /* Significant SGML characters musn't be non-SGML. */
	       if (char_flags[i] & CHAR_NONSGML) {
		    UNCH buf[2];
		    buf[0] = i;
		    buf[1] = '\0';
		    sderr(E_NONSGML, buf, (UNCH *)0);
		    char_flags[i] &= ~CHAR_NONSGML;
	       }
	  }
	  else {
	       /* Shunned characters that are not significant SGML characters
		  must be non-SGML. */
	       if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))
		   == CHAR_SHUNNED) {
		   sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);
		   char_flags[i] |= CHAR_NONSGML;
	       }
	  }

     
     /* Now munge the lexical tables. */
     for (p = lextabs; *p; p++) {
	  UNCH nonclass = (*p)[CANON_NONSGML];
	  UNCH datclass = (*p)[CANON_DATACHAR];
	  UNCH nmcclass = (*p)[CANON_NMC];
	  UNCH nmsclass = (*p)[CANON_NMS];
	  UNCH minclass = (*p)[CANON_MIN];
	  for (i = 0; i < 256; i++) {
	       if (char_flags[i] & CHAR_NONSGML) {
		    /* We already know that it's not significant. */
		    if (!(char_flags[i] & CHAR_MAGIC))
			 (*p)[i] = nonclass;
	       }
	       else {
		    if (char_flags[i] & CHAR_MAGIC) {
			 sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);
		    }
		    else if (!(char_flags[i] & CHAR_SIGNIFICANT))
			 (*p)[i] = datclass;
		    else if (nlextoke
			     /* This relies on the fact that lextoke
				occurs last in lextabs. */
			     && lextoke[i] != nlextoke[i]) {
			 switch (nlextoke[i]) {
			 case NMC:
			      (*p)[i] = nmcclass;
			      break;
			 case NMS:
			      (*p)[i] = nmsclass;
			      break;
			 case INV:
			      /* This will happen if period is not a
				 name character. */
			      (*p)[i] = minclass;
			      break;
			 default:
			      abort();
			 }
		    }
	       }
	  }
     }
     if (nlextran) {
	  memcpy((UNIV)lextran, (UNIV)nlextran, 256);
	  frem((UNIV)nlextran);
     }
     if (nlextoke) {
	  frem((UNIV)nlextoke);
	  nlextoke = 0;
     }
     
}

/* Munge parse tables so that empty start and end tags are not recognized. */

static VOID noemptytag()
{
     static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
     int i;
     
     for (i = 0; i < SIZEOF(pcbs); i++) {
	  int maxclass, maxstate;
	  int j, k, act;
	  UNCH *plex = pcbs[i]->plex;
	  UNCH **ptab = pcbs[i]->ptab;

	  /* Figure out the maximum lexical class. */
	  maxclass = 0;
	  for (j = 0; j < 256; j++)
	       if (plex[j] > maxclass)
		    maxclass = plex[j];

	  /* Now figure out the maximum state number and at the same time
	     change actions. */

	  maxstate = 0;

	  for (j = 0; j <= maxstate; j += 2) {
	       for (k = 0; k <= maxclass; k++)
		    if (ptab[j][k] > maxstate)
			 maxstate = ptab[j][k];
	       /* If the '>' class has an empty start or end tag action,
		  change it to the action that the NMC class has. */
	       act = ptab[j + 1][plex['>']];
	       if (act == NET_ || act == NST_)
		    ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];
	  }
     }
}

/* Lookup the value of the entry in pmap PTR whose key is KEY. */

static UNIV pmaplookup(ptr, key)
struct pmap *ptr;
char *key;
{
     for (; ptr->name; ptr++)
	  if (strcmp(key, ptr->name) == 0)
	       return ptr->value;
     return 0;
}

/* Return an ASCII representation of N. */

static UNCH *ltous(n)
long n;
{
     static char buf[sizeof(long)*3 + 2];
     sprintf(buf, "%ld", n);
     return (UNCH *)buf;
}

VOID sgmlwrsd(fp)
FILE *fp;
{
     int i;
     int changed;
     char *p;
     char uc[256];		/* upper case characters (with different lower
				   case characters) */
     char lcletter[256];	/* LC letters: a-z */

     fprintf(fp, "<!SGML \"%s\"\n", standard);
     fprintf(fp, "CHARSET\nBASESET \"%s//CHARSET %s//%s\"\nDESCSET\n",
	     SYSTEM_CHARSET_OWNER,
	     SYSTEM_CHARSET_DESCRIPTION,
	     SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
     
     if (!done_nonsgml) {
	  done_nonsgml = 1;
	  for (i = 0; i < 256; i++)
	       if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))
		   == CHAR_SHUNNED)
	            char_flags[i] |= CHAR_NONSGML;
     }
     i = 0;
     while (i < 256) {
	  int j;
	  for (j = i + 1; j < 256; j++)
	       if ((char_flags[j] & CHAR_NONSGML)
		   != (char_flags[i] & CHAR_NONSGML))
		    break;
	  if (char_flags[i] & CHAR_NONSGML)
	       fprintf(fp, "%d %d UNUSED\n", i, j - i);
	  else
	       fprintf(fp, "%d %d %d\n", i, j - i, i);
	  i = j;
     }
     fprintf(fp, "CAPACITY\n");
     changed = 0;
     for (i = 0; i < NCAPACITY; i++)
	  if (refcapset[i] != sd.capacity[i]) {
	       if (!changed) {
		    fprintf(fp, "SGMLREF\n");
		    changed = 1;
	       }
	       fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);
	  }
     if (!changed)
	  fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
     fprintf(fp, "SCOPE DOCUMENT\n");
     
     fprintf(fp, "SYNTAX\nSHUNCHAR");
     for (i = 0; i < 256; i++)
	  if (char_flags[i] & CHAR_SHUNNED)
	       fprintf(fp, " %d", i);
     fprintf(fp, "\n");
     fprintf(fp, "BASESET \"%s//CHARSET %s//%s\"\nDESCSET 0 256 0\n",
	     SYSTEM_CHARSET_OWNER,
	     SYSTEM_CHARSET_DESCRIPTION,
	     SYSTEM_CHARSET_DESIGNATING_SEQUENCE);

     fprintf(fp, "FUNCTION\nRE 13\nRS 10\nSPACE 32\nTAB SEPCHAR 9\n");

     MEMZERO((UNIV)uc, 256);
     for (i = 0; i < 256; i++)
	  if (lextran[i] != i)
	       uc[lextran[i]] = 1;

     MEMZERO((UNIV)lcletter, 256);
     for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)
	  lcletter[(unsigned char)*p]= 1;

     fprintf(fp, "NAMING\n");
     fputs("LCNMSTRT \"", fp);
     for (i = 0; i < 256; i++)
	  if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
	       fprintf(fp, "&#%d;", i);
     fputs("\"\n", fp);
     fputs("UCNMSTRT \"", fp);
     for (i = 0; i < 256; i++)
	  if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
	       fprintf(fp, "&#%d;", lextran[i]);
     fputs("\"\n", fp);
     fputs("LCNMCHAR \"", fp);
     for (i = 0; i < 256; i++)
	  if (lextoke[i] == NMC && !uc[i])
	       fprintf(fp, "&#%d;", i);
     fputs("\"\n", fp);
     fputs("UCNMCHAR \"", fp);
     for (i = 0; i < 256; i++)
	  if (lextoke[i] == NMC && !uc[i])
	       fprintf(fp, "&#%d;", lextran[i]);
     fputs("\"\n", fp);

     fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",
	     sd.namecase[0] ? "YES" : "NO",
	     sd.namecase[1] ? "YES" : "NO");
     fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",
	     sd.shortref ? "SGMLREF" : "NONE");
     fprintf(fp, "NAMES SGMLREF\n");
     if (newkey) {
	  /* The reference key was saved in newkey. */
	  for (i = 0; i < NKEYS; i++)
	       if (newkey[i][0])
		    fprintf(fp, "%s %s\n", newkey[i], key[i]);
     }
     fprintf(fp, "QUANTITY SGMLREF\n");
     if (quantity_changed)
	  for (i = 0; i < NQUANTITY; i++)
	       if (quantity_changed[i])
		    fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);
     fprintf(fp,
	     "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",
	     sd.omittag ? "YES" : "NO",
	     sd.shorttag ? "YES" : "NO");
     fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");
     fprintf(fp, "OTHER CONCUR NO ");
     if (sd.subdoc > 0)
	  fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);
     else
	  fprintf(fp, "SUBDOC NO ");
     fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");
     fprintf(fp, "APPINFO NONE");
     fprintf(fp, ">\n");
}

/*
Local Variables:
c-indent-level: 5
c-continued-statement-offset: 5
c-brace-offset: -5
c-argdecl-indent: 0
c-label-offset: -5
End:
*/