Blame sgmls-1.1/sgmls.c

Packit 9741aa
/* sgmls.c:
Packit 9741aa
   Library for reading output of sgmls.
Packit 9741aa
Packit 9741aa
   Written by James Clark (jjc@jclark.com). */
Packit 9741aa
Packit 9741aa
#include "config.h"
Packit 9741aa
#include "std.h"
Packit 9741aa
#include "sgmls.h"
Packit 9741aa
#include "lineout.h"
Packit 9741aa
Packit 9741aa
#ifdef __GNUC__
Packit 9741aa
#define NO_RETURN volatile
Packit 9741aa
#else
Packit 9741aa
#define NO_RETURN /* as nothing */
Packit 9741aa
#endif
Packit 9741aa
Packit 9741aa
#ifdef USE_PROTOTYPES
Packit 9741aa
#define P(parms) parms
Packit 9741aa
#else
Packit 9741aa
#define P(parms) ()
Packit 9741aa
#endif
Packit 9741aa
Packit 9741aa
#ifndef __STDC__
Packit 9741aa
#define const /* as nothing */
Packit 9741aa
#endif
Packit 9741aa
Packit 9741aa
typedef struct sgmls_data data_s;
Packit 9741aa
typedef struct sgmls_notation notation_s;
Packit 9741aa
typedef struct sgmls_internal_entity internal_entity_s;
Packit 9741aa
typedef struct sgmls_external_entity external_entity_s;
Packit 9741aa
typedef struct sgmls_entity entity_s;
Packit 9741aa
typedef struct sgmls_attribute attribute_s;
Packit 9741aa
typedef struct sgmls_event event_s;
Packit 9741aa
Packit 9741aa
/* lists are sorted in reverse order of level */
Packit 9741aa
struct list {
Packit 9741aa
  int subdoc_level;		/* -1 if associated with finished subdoc */
Packit 9741aa
  struct list *next;
Packit 9741aa
  char *name;
Packit 9741aa
};
Packit 9741aa
Packit 9741aa
struct entity_list {
Packit 9741aa
  int subdoc_level;
Packit 9741aa
  struct entity_list *next;
Packit 9741aa
  entity_s entity;
Packit 9741aa
};
Packit 9741aa
Packit 9741aa
struct notation_list {
Packit 9741aa
  int subdoc_level;
Packit 9741aa
  struct notation_list *next;
Packit 9741aa
  notation_s notation;
Packit 9741aa
};
Packit 9741aa
Packit 9741aa
struct sgmls {
Packit 9741aa
  FILE *fp;
Packit 9741aa
  char *buf;
Packit 9741aa
  unsigned buf_size;
Packit 9741aa
  struct entity_list *entities;
Packit 9741aa
  struct notation_list *notations;
Packit 9741aa
  attribute_s *attributes;
Packit 9741aa
  unsigned long lineno;
Packit 9741aa
  char *filename;
Packit 9741aa
  unsigned filename_size;
Packit 9741aa
  unsigned long input_lineno;
Packit 9741aa
  int subdoc_level;
Packit 9741aa
  char **files;			/* from `f' commands */
Packit 9741aa
  int nfiles;
Packit 9741aa
  char *sysid;			/* from `s' command */
Packit 9741aa
  char *pubid;			/* from `p' command */
Packit 9741aa
};
Packit 9741aa
Packit 9741aa
enum error_code {
Packit 9741aa
  E_ZERO,			/* Not an error */
Packit 9741aa
  E_NOMEM,			/* Out of memory */
Packit 9741aa
  E_BADESCAPE,			/* Bad escape */
Packit 9741aa
  E_NULESCAPE,			/* \000 other than in data */
Packit 9741aa
  E_NUL,			/* A null input character */
Packit 9741aa
  E_BADENTITY,			/* Reference to undefined entity */
Packit 9741aa
  E_INTERNALENTITY,		/* Internal entity when external was needed */
Packit 9741aa
  E_SYSTEM,			/* System input error */
Packit 9741aa
  E_COMMAND,			/* Bad command letter */
Packit 9741aa
  E_MISSING,			/* Missing arguments */
Packit 9741aa
  E_NUMBER,			/* Not a number */
Packit 9741aa
  E_ATTR,			/* Bad attribute type */
Packit 9741aa
  E_BADNOTATION,		/* Reference to undefined notation */
Packit 9741aa
  E_BADINTERNAL,		/* Bad internal entity type */
Packit 9741aa
  E_BADEXTERNAL,		/* Bad external entity type */
Packit 9741aa
  E_EOF,			/* EOF in middle of line */
Packit 9741aa
  E_SDATA,			/* \| other than in data */
Packit 9741aa
  E_LINELENGTH			/* line longer than UNSIGNED_MAX */
Packit 9741aa
};
Packit 9741aa
Packit 9741aa
static char *errlist[] = {
Packit 9741aa
  0,
Packit 9741aa
  "Out of memory",
Packit 9741aa
  "Bad escape",
Packit 9741aa
  "\\0 escape not in data",
Packit 9741aa
  "Nul character in input",
Packit 9741aa
  "Reference to undefined entity",
Packit 9741aa
  "Internal entity when external was needed",
Packit 9741aa
  "System input error",
Packit 9741aa
  "Bad command letter",
Packit 9741aa
  "Missing arguments",
Packit 9741aa
  "Not a number",
Packit 9741aa
  "Bad attribute type",
Packit 9741aa
  "Reference to undefined notation",
Packit 9741aa
  "Bad internal entity type",
Packit 9741aa
  "Bad external entity type",
Packit 9741aa
  "EOF in middle of line",
Packit 9741aa
  "\\| other than in data",
Packit 9741aa
  "Too many V commands",
Packit 9741aa
  "Input line too long"
Packit 9741aa
};
Packit 9741aa
Packit 9741aa
static void error P((enum error_code));
Packit 9741aa
static int parse_data P((char *, unsigned long *));
Packit 9741aa
static void parse_location P((char *, struct sgmls *));
Packit 9741aa
static void parse_notation P((char *, notation_s *));
Packit 9741aa
static void parse_internal_entity P((char *, internal_entity_s *));
Packit 9741aa
static void parse_external_entity
Packit 9741aa
  P((char *, struct sgmls *, external_entity_s *));
Packit 9741aa
static void parse_subdoc_entity P((char *, external_entity_s *));
Packit 9741aa
static attribute_s *parse_attribute P((struct sgmls *, char *));
Packit 9741aa
static void grow_datav P((void));
Packit 9741aa
static char *unescape P((char *));
Packit 9741aa
static char *unescape_file P((char *));
Packit 9741aa
static int unescape1 P((char *));
Packit 9741aa
static char *scan_token P((char **));
Packit 9741aa
static int count_args P((char *));
Packit 9741aa
static struct list *list_find P((struct list *, char *, int));
Packit 9741aa
static UNIV xmalloc P((unsigned));
Packit 9741aa
static UNIV xrealloc P((UNIV , unsigned));
Packit 9741aa
static char *strsave P((char *));
Packit 9741aa
static int read_line P((struct sgmls *));
Packit 9741aa
static notation_s *lookup_notation P((struct sgmls *, char *));
Packit 9741aa
static entity_s *lookup_entity P((struct sgmls *, char *));
Packit 9741aa
static external_entity_s *lookup_external_entity P((struct sgmls *, char *));
Packit 9741aa
static void define_external_entity P((struct sgmls *, external_entity_s *));
Packit 9741aa
static void define_internal_entity P((struct sgmls *, internal_entity_s *));
Packit 9741aa
static void define_notation P((struct sgmls *, notation_s *));
Packit 9741aa
static data_s *copy_data P((data_s *, int));
Packit 9741aa
static void list_finish_level P((struct list **, int));
Packit 9741aa
static void add_attribute P((attribute_s **, attribute_s *));
Packit 9741aa
static void default_errhandler P((int, char *, unsigned long));
Packit 9741aa
Packit 9741aa
#define xfree(s) do { if (s) free(s); } while (0)
Packit 9741aa
Packit 9741aa
static sgmls_errhandler *errhandler = default_errhandler;
Packit 9741aa
static unsigned long input_lineno = 0;
Packit 9741aa
Packit 9741aa
static data_s *datav = 0;
Packit 9741aa
static int datav_size = 0;
Packit 9741aa
Packit 9741aa
struct sgmls *sgmls_create(fp)
Packit 9741aa
     FILE *fp;
Packit 9741aa
{
Packit 9741aa
  struct sgmls *sp;
Packit 9741aa
Packit 9741aa
  sp = (struct sgmls *)malloc(sizeof(struct sgmls));
Packit 9741aa
  if (!sp)
Packit 9741aa
    return 0;
Packit 9741aa
  sp->fp = fp;
Packit 9741aa
  sp->entities = 0;
Packit 9741aa
  sp->notations = 0;
Packit 9741aa
  sp->attributes = 0;
Packit 9741aa
  sp->lineno = 0;
Packit 9741aa
  sp->filename = 0;
Packit 9741aa
  sp->filename_size = 0;
Packit 9741aa
  sp->input_lineno = 0;
Packit 9741aa
  sp->buf_size = 0;
Packit 9741aa
  sp->buf = 0;
Packit 9741aa
  sp->subdoc_level = 0;
Packit 9741aa
  sp->files = 0;
Packit 9741aa
  sp->nfiles = 0;
Packit 9741aa
  sp->sysid = 0;
Packit 9741aa
  sp->pubid = 0;
Packit 9741aa
  return sp;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
void sgmls_free(sp)
Packit 9741aa
     struct sgmls *sp;
Packit 9741aa
{
Packit 9741aa
  struct entity_list *ep;
Packit 9741aa
  struct notation_list *np;
Packit 9741aa
Packit 9741aa
  if (!sp)
Packit 9741aa
    return;
Packit 9741aa
  xfree(sp->filename);
Packit 9741aa
  sgmls_free_attributes(sp->attributes);
Packit 9741aa
Packit 9741aa
  for (ep = sp->entities; ep;) {
Packit 9741aa
    struct entity_list *tem = ep->next;
Packit 9741aa
    if (ep->entity.is_internal) {
Packit 9741aa
      xfree(ep->entity.u.internal.data.s);
Packit 9741aa
      free(ep->entity.u.internal.name);
Packit 9741aa
    }
Packit 9741aa
    else {
Packit 9741aa
      int i;
Packit 9741aa
      for (i = 0; i < ep->entity.u.external.nfilenames; i++)
Packit 9741aa
	xfree(ep->entity.u.external.filenames[i]);
Packit 9741aa
      xfree(ep->entity.u.external.filenames);
Packit 9741aa
      xfree(ep->entity.u.external.sysid);
Packit 9741aa
      xfree(ep->entity.u.external.pubid);
Packit 9741aa
      sgmls_free_attributes(ep->entity.u.external.attributes);
Packit 9741aa
      free(ep->entity.u.internal.name);
Packit 9741aa
    }
Packit 9741aa
    free(ep);
Packit 9741aa
    ep = tem;
Packit 9741aa
  }
Packit 9741aa
Packit 9741aa
  for (np = sp->notations; np;) {
Packit 9741aa
    struct notation_list *tem = np->next;
Packit 9741aa
    xfree(np->notation.sysid);
Packit 9741aa
    xfree(np->notation.pubid);
Packit 9741aa
    free(np->notation.name);
Packit 9741aa
    free(np);
Packit 9741aa
    np = tem;
Packit 9741aa
  }
Packit 9741aa
Packit 9741aa
  xfree(sp->buf);
Packit 9741aa
  xfree(sp->pubid);
Packit 9741aa
  xfree(sp->sysid);
Packit 9741aa
  if (sp->files) {
Packit 9741aa
    int i;
Packit 9741aa
    for (i = 0; i < sp->nfiles; i++)
Packit 9741aa
      free(sp->files[i]);
Packit 9741aa
    free(sp->files);
Packit 9741aa
  }
Packit 9741aa
  free(sp);
Packit 9741aa
Packit 9741aa
  xfree(datav);
Packit 9741aa
  datav = 0;
Packit 9741aa
  datav_size = 0;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
sgmls_errhandler *sgmls_set_errhandler(handler)
Packit 9741aa
     sgmls_errhandler *handler;
Packit 9741aa
{
Packit 9741aa
  sgmls_errhandler *old = errhandler;
Packit 9741aa
  if (handler)
Packit 9741aa
    errhandler = handler;
Packit 9741aa
  return old;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
int sgmls_next(sp, e)
Packit 9741aa
     struct sgmls *sp;
Packit 9741aa
     event_s *e;
Packit 9741aa
{
Packit 9741aa
  while (read_line(sp)) {
Packit 9741aa
    char *buf = sp->buf;
Packit 9741aa
Packit 9741aa
    e->filename = sp->filename;
Packit 9741aa
    e->lineno = sp->lineno;
Packit 9741aa
Packit 9741aa
    switch (buf[0]) {
Packit 9741aa
    case DATA_CODE:
Packit 9741aa
      e->u.data.n = parse_data(buf + 1, &sp->lineno);
Packit 9741aa
      e->u.data.v = datav;
Packit 9741aa
      e->type = SGMLS_EVENT_DATA;
Packit 9741aa
      return 1;
Packit 9741aa
    case START_CODE:
Packit 9741aa
      {
Packit 9741aa
	char *p;
Packit 9741aa
	e->u.start.attributes = sp->attributes;
Packit 9741aa
	sp->attributes = 0;
Packit 9741aa
	e->type = SGMLS_EVENT_START;
Packit 9741aa
	p = buf + 1;
Packit 9741aa
	e->u.start.gi = scan_token(&p);
Packit 9741aa
	return 1;
Packit 9741aa
      }
Packit 9741aa
    case END_CODE:
Packit 9741aa
      {
Packit 9741aa
	char *p = buf + 1;
Packit 9741aa
	e->type = SGMLS_EVENT_END;
Packit 9741aa
	e->u.end.gi = scan_token(&p);
Packit 9741aa
	return 1;
Packit 9741aa
      }
Packit 9741aa
    case START_SUBDOC_CODE:
Packit 9741aa
    case END_SUBDOC_CODE:
Packit 9741aa
      {
Packit 9741aa
	char *p = buf + 1;
Packit 9741aa
	char *name = scan_token(&p);
Packit 9741aa
	if (buf[0] == START_SUBDOC_CODE) {
Packit 9741aa
	  e->u.entity = lookup_external_entity(sp, name);
Packit 9741aa
	  sp->subdoc_level++;
Packit 9741aa
	  e->type = SGMLS_EVENT_SUBSTART;
Packit 9741aa
	}
Packit 9741aa
	else {
Packit 9741aa
	  e->type = SGMLS_EVENT_SUBEND;
Packit 9741aa
	  list_finish_level((struct list **)&sp->entities, sp->subdoc_level);
Packit 9741aa
	  list_finish_level((struct list **)&sp->notations, sp->subdoc_level);
Packit 9741aa
	  sp->subdoc_level--;
Packit 9741aa
	  e->u.entity = lookup_external_entity(sp, name);
Packit 9741aa
	}
Packit 9741aa
	return 1;
Packit 9741aa
      }
Packit 9741aa
    case ATTRIBUTE_CODE:
Packit 9741aa
      add_attribute(&sp->attributes, parse_attribute(sp, buf + 1));
Packit 9741aa
      break;
Packit 9741aa
    case DATA_ATTRIBUTE_CODE:
Packit 9741aa
      {
Packit 9741aa
	char *p = buf + 1;
Packit 9741aa
	char *name;
Packit 9741aa
	attribute_s *a;
Packit 9741aa
	external_entity_s *ext;
Packit 9741aa
	
Packit 9741aa
	name = scan_token(&p);
Packit 9741aa
	a = parse_attribute(sp, p);
Packit 9741aa
	ext = lookup_external_entity(sp, name);
Packit 9741aa
	add_attribute(&ext->attributes, a);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case REFERENCE_ENTITY_CODE:
Packit 9741aa
      {
Packit 9741aa
	char *p = buf + 1;
Packit 9741aa
	char *name;
Packit 9741aa
	name = scan_token(&p);
Packit 9741aa
	e->u.entity = lookup_external_entity(sp, name);
Packit 9741aa
	e->type = SGMLS_EVENT_ENTITY;
Packit 9741aa
	return 1;
Packit 9741aa
      }
Packit 9741aa
    case DEFINE_NOTATION_CODE:
Packit 9741aa
      {
Packit 9741aa
	notation_s notation;
Packit 9741aa
Packit 9741aa
	parse_notation(buf + 1, &notation);
Packit 9741aa
	define_notation(sp, &notation);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case DEFINE_EXTERNAL_ENTITY_CODE:
Packit 9741aa
      {
Packit 9741aa
	external_entity_s external;
Packit 9741aa
Packit 9741aa
	parse_external_entity(buf + 1, sp, &external);
Packit 9741aa
	define_external_entity(sp, &external);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case DEFINE_SUBDOC_ENTITY_CODE:
Packit 9741aa
      {
Packit 9741aa
	external_entity_s external;
Packit 9741aa
Packit 9741aa
	parse_subdoc_entity(buf + 1, &external);
Packit 9741aa
	define_external_entity(sp, &external);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case DEFINE_INTERNAL_ENTITY_CODE:
Packit 9741aa
      {
Packit 9741aa
	internal_entity_s internal;
Packit 9741aa
Packit 9741aa
	parse_internal_entity(buf + 1, &internal);
Packit 9741aa
	define_internal_entity(sp, &internal);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case PI_CODE:
Packit 9741aa
      e->u.pi.len = unescape1(buf + 1);
Packit 9741aa
      e->u.pi.s = buf + 1;
Packit 9741aa
      e->type = SGMLS_EVENT_PI;
Packit 9741aa
      return 1;
Packit 9741aa
    case LOCATION_CODE:
Packit 9741aa
      parse_location(buf + 1, sp);
Packit 9741aa
      break;
Packit 9741aa
    case APPINFO_CODE:
Packit 9741aa
      e->u.appinfo = unescape(buf + 1);
Packit 9741aa
      e->type = SGMLS_EVENT_APPINFO;
Packit 9741aa
      return 1;
Packit 9741aa
    case SYSID_CODE:
Packit 9741aa
      sp->sysid = strsave(unescape(buf + 1));
Packit 9741aa
      break;
Packit 9741aa
    case PUBID_CODE:
Packit 9741aa
      sp->pubid = strsave(unescape(buf + 1));
Packit 9741aa
      break;
Packit 9741aa
    case FILE_CODE:
Packit 9741aa
      sp->files = xrealloc(sp->files, (sp->nfiles + 1)*sizeof(char *));
Packit 9741aa
      sp->files[sp->nfiles] = strsave(unescape_file(buf + 1));
Packit 9741aa
      sp->nfiles += 1;
Packit 9741aa
      break;
Packit 9741aa
    case CONFORMING_CODE:
Packit 9741aa
      e->type = SGMLS_EVENT_CONFORMING;
Packit 9741aa
      return 1;
Packit 9741aa
    default:
Packit 9741aa
      error(E_COMMAND);
Packit 9741aa
    }
Packit 9741aa
  }
Packit 9741aa
Packit 9741aa
  return 0;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
int parse_data(p, linenop)
Packit 9741aa
     char *p;
Packit 9741aa
     unsigned long *linenop;
Packit 9741aa
{
Packit 9741aa
  int n = 0;
Packit 9741aa
  char *start = p;
Packit 9741aa
  char *q;
Packit 9741aa
  int is_sdata = 0;
Packit 9741aa
Packit 9741aa
  /* No need to copy before first escape. */
Packit 9741aa
Packit 9741aa
  for (; *p != '\\' && *p != '\0'; p++)
Packit 9741aa
    ;
Packit 9741aa
  q = p;
Packit 9741aa
  while (*p) {
Packit 9741aa
    if (*p == '\\') {
Packit 9741aa
      switch (*++p) {
Packit 9741aa
      case '\\':
Packit 9741aa
	*q++ = *p++;
Packit 9741aa
	break;
Packit 9741aa
      case 'n':
Packit 9741aa
	*q++ = RECHAR;
Packit 9741aa
	*linenop += 1;
Packit 9741aa
	p++;
Packit 9741aa
	break;
Packit 9741aa
      case '0':
Packit 9741aa
      case '1':
Packit 9741aa
      case '2':
Packit 9741aa
      case '3':
Packit 9741aa
      case '4':
Packit 9741aa
      case '5':
Packit 9741aa
      case '6':
Packit 9741aa
      case '7':
Packit 9741aa
	{
Packit 9741aa
	  int val = *p++ - '0';
Packit 9741aa
	  if (*p >= '0' && *p <= '7') {
Packit 9741aa
	    val = val*8 + (*p++ - '0');
Packit 9741aa
	    if (*p >= '0' && *p <= '7')
Packit 9741aa
	      val = val*8 + (*p++ - '0');
Packit 9741aa
	  }
Packit 9741aa
	  *q++ = (char)val;
Packit 9741aa
	}
Packit 9741aa
	break;
Packit 9741aa
      case '|':
Packit 9741aa
	if (q > start || is_sdata) {
Packit 9741aa
	  if (n >= datav_size)
Packit 9741aa
	    grow_datav();
Packit 9741aa
	  datav[n].s = start;
Packit 9741aa
	  datav[n].len = q - start;
Packit 9741aa
	  datav[n].is_sdata = is_sdata;
Packit 9741aa
	  n++;
Packit 9741aa
	}
Packit 9741aa
	is_sdata = !is_sdata;
Packit 9741aa
	start = q;
Packit 9741aa
	p++;
Packit 9741aa
	break;
Packit 9741aa
      default:
Packit 9741aa
	error(E_BADESCAPE);
Packit 9741aa
      }
Packit 9741aa
    }
Packit 9741aa
    else
Packit 9741aa
      *q++ = *p++;
Packit 9741aa
  }
Packit 9741aa
  
Packit 9741aa
  if (q > start || is_sdata) {
Packit 9741aa
    if (n >= datav_size)
Packit 9741aa
      grow_datav();
Packit 9741aa
    datav[n].s = start;
Packit 9741aa
    datav[n].len = q - start;
Packit 9741aa
    datav[n].is_sdata = is_sdata;
Packit 9741aa
    n++;
Packit 9741aa
  }
Packit 9741aa
  return n;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void grow_datav()
Packit 9741aa
{
Packit 9741aa
  unsigned size = datav_size ? 2*datav_size : 2;
Packit 9741aa
  datav = (data_s *)xrealloc((UNIV)datav, size*sizeof(data_s));
Packit 9741aa
  datav_size = size;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void parse_location(s, sp)
Packit 9741aa
     char *s;
Packit 9741aa
     struct sgmls *sp;
Packit 9741aa
{
Packit 9741aa
  unsigned size;
Packit 9741aa
Packit 9741aa
  if (*s < '0' || *s > '9' || sscanf(s, "%lu", &sp->lineno) != 1)
Packit 9741aa
    error(E_NUMBER);
Packit 9741aa
  do {
Packit 9741aa
    ++s;
Packit 9741aa
  } while (*s >= '0' && *s <= '9');
Packit 9741aa
Packit 9741aa
  if (*s != ' ')
Packit 9741aa
    return;
Packit 9741aa
  s++;
Packit 9741aa
  s = unescape_file(s);
Packit 9741aa
  size = strlen(s) + 1;
Packit 9741aa
  if (size <= sp->filename_size)
Packit 9741aa
    strcpy(sp->filename, s);
Packit 9741aa
  else {
Packit 9741aa
    sp->filename = xrealloc(sp->filename, size);
Packit 9741aa
    strcpy(sp->filename, s);
Packit 9741aa
    sp->filename_size = size;
Packit 9741aa
  }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void parse_notation(s, n)
Packit 9741aa
     char *s;
Packit 9741aa
     notation_s *n;
Packit 9741aa
{
Packit 9741aa
  n->name = strsave(scan_token(&s);;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void parse_internal_entity(s, e)
Packit 9741aa
     char *s;
Packit 9741aa
     internal_entity_s *e;
Packit 9741aa
{
Packit 9741aa
  char *type;
Packit 9741aa
Packit 9741aa
  e->name = strsave(scan_token(&s);;
Packit 9741aa
  type = scan_token(&s);
Packit 9741aa
  if (strcmp(type, "CDATA") == 0)
Packit 9741aa
    e->data.is_sdata = 0;
Packit 9741aa
  else if (strcmp(type, "SDATA") == 0)
Packit 9741aa
    e->data.is_sdata = 1;
Packit 9741aa
  else
Packit 9741aa
    error(E_BADINTERNAL);
Packit 9741aa
  e->data.len = unescape1(s);
Packit 9741aa
  if (e->data.len == 0)
Packit 9741aa
    e->data.s = 0;
Packit 9741aa
  else {
Packit 9741aa
    e->data.s = xmalloc(e->data.len);
Packit 9741aa
    memcpy(e->data.s, s, e->data.len);
Packit 9741aa
  }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void parse_external_entity(s, sp, e)
Packit 9741aa
     char *s;
Packit 9741aa
     struct sgmls *sp;
Packit 9741aa
     external_entity_s *e;
Packit 9741aa
{
Packit 9741aa
  char *type;
Packit 9741aa
  char *notation;
Packit 9741aa
Packit 9741aa
  e->name = strsave(scan_token(&s);;
Packit 9741aa
  type = scan_token(&s);
Packit 9741aa
  if (strcmp(type, "CDATA") == 0)
Packit 9741aa
    e->type = SGMLS_ENTITY_CDATA;
Packit 9741aa
  else if (strcmp(type, "SDATA") == 0)
Packit 9741aa
    e->type = SGMLS_ENTITY_SDATA;
Packit 9741aa
  else if (strcmp(type, "NDATA") == 0)
Packit 9741aa
    e->type = SGMLS_ENTITY_NDATA;
Packit 9741aa
  else
Packit 9741aa
    error(E_BADEXTERNAL);
Packit 9741aa
  notation = scan_token(&s);
Packit 9741aa
  e->notation = lookup_notation(sp, notation);
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void parse_subdoc_entity(s, e)
Packit 9741aa
     char *s;
Packit 9741aa
     external_entity_s *e;
Packit 9741aa
{
Packit 9741aa
  e->name = strsave(scan_token(&s);;
Packit 9741aa
  e->type = SGMLS_ENTITY_SUBDOC;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
attribute_s *parse_attribute(sp, s)
Packit 9741aa
     struct sgmls *sp;
Packit 9741aa
     char *s;
Packit 9741aa
{
Packit 9741aa
  attribute_s *a;
Packit 9741aa
  char *type;
Packit 9741aa
Packit 9741aa
  a = (attribute_s *)xmalloc(sizeof(*a));
Packit 9741aa
  a->name = strsave(scan_token(&s);;
Packit 9741aa
  type = scan_token(&s);
Packit 9741aa
  if (strcmp(type, "CDATA") == 0) {
Packit 9741aa
    unsigned long lineno = 0;
Packit 9741aa
    a->type = SGMLS_ATTR_CDATA;
Packit 9741aa
    a->value.data.n = parse_data(s, &lineno);
Packit 9741aa
    a->value.data.v = copy_data(datav, a->value.data.n);
Packit 9741aa
  }
Packit 9741aa
  else if (strcmp(type, "IMPLIED") == 0) {
Packit 9741aa
    a->type = SGMLS_ATTR_IMPLIED;
Packit 9741aa
  }
Packit 9741aa
  else if (strcmp(type, "NOTATION") == 0) {
Packit 9741aa
    a->type = SGMLS_ATTR_NOTATION;
Packit 9741aa
    a->value.notation = lookup_notation(sp, scan_token(&s);;
Packit 9741aa
  }
Packit 9741aa
  else if (strcmp(type, "ENTITY") == 0) {
Packit 9741aa
    int n, i;
Packit 9741aa
    a->type = SGMLS_ATTR_ENTITY;
Packit 9741aa
    n = count_args(s);
Packit 9741aa
    if (n == 0)
Packit 9741aa
      error(E_MISSING);
Packit 9741aa
    a->value.entity.v = (entity_s **)xmalloc(n*sizeof(entity_s *));
Packit 9741aa
    a->value.entity.n = n;
Packit 9741aa
    for (i = 0; i < n; i++)
Packit 9741aa
      a->value.entity.v[i] = lookup_entity(sp, scan_token(&s);;
Packit 9741aa
  }
Packit 9741aa
  else if (strcmp(type, "TOKEN") == 0) {
Packit 9741aa
    int n, i;
Packit 9741aa
    a->type = SGMLS_ATTR_TOKEN;
Packit 9741aa
    n = count_args(s);
Packit 9741aa
    if (n == 0)
Packit 9741aa
      error(E_MISSING);
Packit 9741aa
    a->value.token.v = (char **)xmalloc(n * sizeof(char *));
Packit 9741aa
    for (i = 0; i < n; i++)
Packit 9741aa
      a->value.token.v[i] = strsave(scan_token(&s);;
Packit 9741aa
    a->value.token.n = n;
Packit 9741aa
  }
Packit 9741aa
  else
Packit 9741aa
    error(E_ATTR);
Packit 9741aa
  return a;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
void sgmls_free_attributes(p)
Packit 9741aa
     attribute_s *p;
Packit 9741aa
{
Packit 9741aa
  while (p) {
Packit 9741aa
    attribute_s *nextp = p->next;
Packit 9741aa
    switch (p->type) {
Packit 9741aa
    case SGMLS_ATTR_CDATA:
Packit 9741aa
      if (p->value.data.v) {
Packit 9741aa
	free(p->value.data.v[0].s);
Packit 9741aa
	free(p->value.data.v);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case SGMLS_ATTR_TOKEN:
Packit 9741aa
      {
Packit 9741aa
	int i;
Packit 9741aa
	for (i = 0; i < p->value.token.n; i++)
Packit 9741aa
	  free(p->value.token.v[i]);
Packit 9741aa
	xfree(p->value.token.v);
Packit 9741aa
      }
Packit 9741aa
      break;
Packit 9741aa
    case SGMLS_ATTR_ENTITY:
Packit 9741aa
      xfree(p->value.entity.v);
Packit 9741aa
      break;
Packit 9741aa
    case SGMLS_ATTR_IMPLIED:
Packit 9741aa
    case SGMLS_ATTR_NOTATION:
Packit 9741aa
      break;
Packit 9741aa
    }
Packit 9741aa
    free(p->name);
Packit 9741aa
    free(p);
Packit 9741aa
    p = nextp;
Packit 9741aa
  }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
data_s *copy_data(v, n)
Packit 9741aa
     data_s *v;
Packit 9741aa
     int n;
Packit 9741aa
{
Packit 9741aa
  if (n == 0)
Packit 9741aa
    return 0;
Packit 9741aa
  else {
Packit 9741aa
    int i;
Packit 9741aa
    unsigned total;
Packit 9741aa
    char *p;
Packit 9741aa
    data_s *result;
Packit 9741aa
    
Packit 9741aa
    result = (data_s *)xmalloc(n*sizeof(data_s));
Packit 9741aa
    total = 0;
Packit 9741aa
    for (i = 0; i < n; i++)
Packit 9741aa
      total += v[i].len;
Packit 9741aa
    if (!total)
Packit 9741aa
      total++;
Packit 9741aa
    p = xmalloc(total);
Packit 9741aa
    for (i = 0; i < n; i++) {
Packit 9741aa
      result[i].s = p;
Packit 9741aa
      memcpy(result[i].s, v[i].s, v[i].len);
Packit 9741aa
      result[i].len = v[i].len;
Packit 9741aa
      p += v[i].len;
Packit 9741aa
      result[i].is_sdata = v[i].is_sdata;
Packit 9741aa
    }
Packit 9741aa
    return result;
Packit 9741aa
  }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
/* Unescape s, and return nul-terminated data.  Give an error
Packit 9741aa
if the data contains 0. */
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
char *unescape(s)
Packit 9741aa
     char *s;
Packit 9741aa
{
Packit 9741aa
  int len = unescape1(s);
Packit 9741aa
  if (memchr(s, '\0', len))
Packit 9741aa
    error(E_NULESCAPE);
Packit 9741aa
  s[len] = '\0';
Packit 9741aa
  return s;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
/* Like unescape(), but REs are represented by 012 not 015. */
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
char *unescape_file(s)
Packit 9741aa
     char *s;
Packit 9741aa
{
Packit 9741aa
  char *p;
Packit 9741aa
  p = s = unescape(s);
Packit 9741aa
  while ((p = strchr(p, RECHAR)) != 0)
Packit 9741aa
    *p++ = '\n';
Packit 9741aa
  return s;
Packit 9741aa
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
/* Unescape s, and return length of data.  The data may contain 0. */
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
int unescape1(s)
Packit 9741aa
     char *s;
Packit 9741aa
{
Packit 9741aa
  const char *p;
Packit 9741aa
  char *q;
Packit 9741aa
Packit 9741aa
  q = strchr(s, '\\');
Packit 9741aa
  if (!q)
Packit 9741aa
    return strlen(s);
Packit 9741aa
  p = q;
Packit 9741aa
  while (*p) {
Packit 9741aa
    if (*p == '\\') {
Packit 9741aa
      switch (*++p) {
Packit 9741aa
      case '\\':
Packit 9741aa
	*q++ = *p++;
Packit 9741aa
	break;
Packit 9741aa
      case 'n':
Packit 9741aa
	*q++ = RECHAR;
Packit 9741aa
	p++;
Packit 9741aa
	break;
Packit 9741aa
      case '0':
Packit 9741aa
      case '1':
Packit 9741aa
      case '2':
Packit 9741aa
      case '3':
Packit 9741aa
      case '4':
Packit 9741aa
      case '5':
Packit 9741aa
      case '6':
Packit 9741aa
      case '7':
Packit 9741aa
	{
Packit 9741aa
	  int val = *p++ - '0';
Packit 9741aa
	  if (*p >= '0' && *p <= '7') {
Packit 9741aa
	    val = val*8 + (*p++ - '0');
Packit 9741aa
	    if (*p >= '0' && *p <= '7')
Packit 9741aa
	      val = val*8 + (*p++ - '0');
Packit 9741aa
	  }
Packit 9741aa
	  *q++ = (char)val;
Packit 9741aa
	}
Packit 9741aa
	break;
Packit 9741aa
      case '|':
Packit 9741aa
	error(E_SDATA);
Packit 9741aa
      default:
Packit 9741aa
	error(E_BADESCAPE);
Packit 9741aa
      }
Packit 9741aa
    }
Packit 9741aa
    else
Packit 9741aa
      *q++ = *p++;
Packit 9741aa
  }
Packit 9741aa
  return q - s;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
char *scan_token(pp)
Packit 9741aa
     char **pp;
Packit 9741aa
{
Packit 9741aa
  char *start = *pp;
Packit 9741aa
  while (**pp != '\0') {
Packit 9741aa
    if (**pp == ' ') {
Packit 9741aa
      **pp = '\0';
Packit 9741aa
      *pp += 1;
Packit 9741aa
      break;
Packit 9741aa
    }
Packit 9741aa
    *pp += 1;
Packit 9741aa
  }
Packit 9741aa
  if (!*start)
Packit 9741aa
    error(E_MISSING);
Packit 9741aa
  return start;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
int count_args(p)
Packit 9741aa
     char *p;
Packit 9741aa
{
Packit 9741aa
  int n = 0;
Packit 9741aa
Packit 9741aa
  while (*p != '\0') {
Packit 9741aa
    n++;
Packit 9741aa
    do {
Packit 9741aa
      ++p;
Packit 9741aa
      if (*p == ' ') {
Packit 9741aa
	p++;
Packit 9741aa
	break;
Packit 9741aa
      }
Packit 9741aa
    } while (*p != '\0');
Packit 9741aa
  }
Packit 9741aa
  return n;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
int read_line(sp)
Packit 9741aa
     struct sgmls *sp;
Packit 9741aa
{
Packit 9741aa
  unsigned i = 0;
Packit 9741aa
  FILE *fp = sp->fp;
Packit 9741aa
  int c;
Packit 9741aa
  char *buf = sp->buf;
Packit 9741aa
  unsigned buf_size = sp->buf_size;
Packit 9741aa
Packit 9741aa
  c = getc(fp);
Packit 9741aa
  if (c == EOF) {
Packit 9741aa
    input_lineno = sp->input_lineno;
Packit 9741aa
    if (ferror(fp))
Packit 9741aa
      error(E_SYSTEM);
Packit 9741aa
    return 0;
Packit 9741aa
  }
Packit 9741aa
  
Packit 9741aa
  sp->input_lineno++;
Packit 9741aa
  input_lineno = sp->input_lineno;
Packit 9741aa
  for (;;) {
Packit 9741aa
    if (i >= buf_size) {
Packit 9741aa
      if (buf_size == 0)
Packit 9741aa
	buf_size = 24;
Packit 9741aa
      else if (buf_size > (unsigned)UINT_MAX/2) {
Packit 9741aa
	if (buf_size == (unsigned)UINT_MAX)
Packit 9741aa
	  error(E_LINELENGTH);
Packit 9741aa
	buf_size = (unsigned)UINT_MAX;
Packit 9741aa
      }
Packit 9741aa
      else
Packit 9741aa
	buf_size *= 2;
Packit 9741aa
      buf = xrealloc(buf, buf_size);
Packit 9741aa
      sp->buf = buf;
Packit 9741aa
      sp->buf_size = buf_size;
Packit 9741aa
    }
Packit 9741aa
    if (c == '\0')
Packit 9741aa
      error(E_NUL);
Packit 9741aa
    if (c == '\n') {
Packit 9741aa
      buf[i] = '\0';
Packit 9741aa
      break;
Packit 9741aa
    }
Packit 9741aa
    buf[i++] = c;
Packit 9741aa
    c = getc(fp);
Packit 9741aa
    if (c == EOF) {
Packit 9741aa
      if (ferror(fp))
Packit 9741aa
	error(E_SYSTEM);
Packit 9741aa
      else
Packit 9741aa
	error(E_EOF);
Packit 9741aa
    }
Packit 9741aa
  }
Packit 9741aa
  return 1;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
notation_s *lookup_notation(sp, name)
Packit 9741aa
struct sgmls *sp;
Packit 9741aa
char *name;
Packit 9741aa
{
Packit 9741aa
  struct notation_list *p
Packit 9741aa
    = (struct notation_list *)list_find((struct list *)sp->notations, name,
Packit 9741aa
					sp->subdoc_level);
Packit 9741aa
  if (!p)
Packit 9741aa
    error(E_BADNOTATION);
Packit 9741aa
  return &p->notation;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
entity_s *lookup_entity(sp, name)
Packit 9741aa
struct sgmls *sp;
Packit 9741aa
char *name;
Packit 9741aa
{
Packit 9741aa
  struct entity_list *p
Packit 9741aa
    = (struct entity_list *)list_find((struct list *)sp->entities, name,
Packit 9741aa
				      sp->subdoc_level);
Packit 9741aa
  if (!p)
Packit 9741aa
    error(E_BADENTITY);
Packit 9741aa
  return &p->entity;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
external_entity_s *lookup_external_entity(sp, name)
Packit 9741aa
struct sgmls *sp;
Packit 9741aa
char *name;
Packit 9741aa
{
Packit 9741aa
  entity_s *p = lookup_entity(sp, name);
Packit 9741aa
  if (p->is_internal)
Packit 9741aa
    error(E_INTERNALENTITY);
Packit 9741aa
  return &p->u.external;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void define_external_entity(sp, e)
Packit 9741aa
struct sgmls *sp;
Packit 9741aa
external_entity_s *e;
Packit 9741aa
{
Packit 9741aa
  struct entity_list *p;
Packit 9741aa
  e->attributes = 0;
Packit 9741aa
  e->filenames = sp->files;
Packit 9741aa
  e->nfilenames = sp->nfiles;
Packit 9741aa
  sp->files = 0;
Packit 9741aa
  sp->nfiles = 0;
Packit 9741aa
  e->pubid = sp->pubid;
Packit 9741aa
  sp->pubid = 0;
Packit 9741aa
  e->sysid = sp->sysid;
Packit 9741aa
  sp->sysid = 0;
Packit 9741aa
  p = (struct entity_list *)xmalloc(sizeof(struct entity_list));
Packit 9741aa
  memcpy((UNIV)&p->entity.u.external, (UNIV)e, sizeof(*e));
Packit 9741aa
  p->entity.is_internal = 0;
Packit 9741aa
  p->subdoc_level = sp->subdoc_level;
Packit 9741aa
  p->next = sp->entities;
Packit 9741aa
  sp->entities = p;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void define_internal_entity(sp, e)
Packit 9741aa
struct sgmls *sp;
Packit 9741aa
internal_entity_s *e;
Packit 9741aa
{
Packit 9741aa
  struct entity_list *p;
Packit 9741aa
  p = (struct entity_list *)xmalloc(sizeof(struct entity_list));
Packit 9741aa
  memcpy((UNIV)&p->entity.u.internal, (UNIV)e, sizeof(*e));
Packit 9741aa
  p->entity.is_internal = 1;
Packit 9741aa
  p->subdoc_level = sp->subdoc_level;
Packit 9741aa
  p->next = sp->entities;
Packit 9741aa
  sp->entities = p;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void define_notation(sp, np)
Packit 9741aa
struct sgmls *sp;
Packit 9741aa
notation_s *np;
Packit 9741aa
{
Packit 9741aa
  struct notation_list *p;
Packit 9741aa
  np->sysid = sp->sysid;
Packit 9741aa
  sp->sysid = 0;
Packit 9741aa
  np->pubid = sp->pubid;
Packit 9741aa
  sp->pubid = 0;
Packit 9741aa
  p = (struct notation_list *)xmalloc(sizeof(struct notation_list));
Packit 9741aa
  memcpy((UNIV)&p->notation, (UNIV)np, sizeof(*np));
Packit 9741aa
  p->subdoc_level = sp->subdoc_level;
Packit 9741aa
  p->next = sp->notations;
Packit 9741aa
  sp->notations = p;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
struct list *list_find(p, name, level)
Packit 9741aa
     struct list *p;
Packit 9741aa
     char *name;
Packit 9741aa
     int level;
Packit 9741aa
{
Packit 9741aa
  for (; p && p->subdoc_level == level; p = p->next)
Packit 9741aa
    if (strcmp(p->name, name) == 0)
Packit 9741aa
      return p;
Packit 9741aa
  return 0;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
/* Move all the items in the list whose subdoc level is level to the
Packit 9741aa
end of the list and make their subdoc_level -1. */
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void list_finish_level(listp, level)
Packit 9741aa
     struct list **listp;
Packit 9741aa
     int level;
Packit 9741aa
{
Packit 9741aa
  struct list **pp, *next_level, *old_level;
Packit 9741aa
  for (pp = listp; *pp && (*pp)->subdoc_level == level; pp = &(*pp)->next)
Packit 9741aa
    (*pp)->subdoc_level = -1;
Packit 9741aa
  next_level = *pp;
Packit 9741aa
  *pp = 0;
Packit 9741aa
  old_level = *listp;
Packit 9741aa
  *listp = next_level;
Packit 9741aa
  for (pp = listp; *pp; pp = &(*pp)->next)
Packit 9741aa
    ;
Packit 9741aa
  *pp = old_level;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void add_attribute(pp, a)
Packit 9741aa
     attribute_s **pp, *a;
Packit 9741aa
{
Packit 9741aa
  for (; *pp && strcmp((*pp)->name, a->name) < 0; pp = &(*pp)->next)
Packit 9741aa
    ;
Packit 9741aa
  a->next = *pp;
Packit 9741aa
  *pp = a;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
     
Packit 9741aa
static
Packit 9741aa
char *strsave(s)
Packit 9741aa
char *s;
Packit 9741aa
{
Packit 9741aa
  if (!s)
Packit 9741aa
    return s;
Packit 9741aa
  else {
Packit 9741aa
    char *p = xmalloc(strlen(s) + 1);
Packit 9741aa
    strcpy(p, s);
Packit 9741aa
    return p;
Packit 9741aa
  }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
UNIV xmalloc(n)
Packit 9741aa
  unsigned n;
Packit 9741aa
{
Packit 9741aa
  UNIV p = malloc(n);
Packit 9741aa
  if (!p)
Packit 9741aa
    error(E_NOMEM);
Packit 9741aa
  return p;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
/* ANSI C says first argument to realloc can be NULL, but not everybody
Packit 9741aa
   appears to support this. */
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
UNIV xrealloc(p, n)
Packit 9741aa
     UNIV p;
Packit 9741aa
     unsigned n;
Packit 9741aa
{
Packit 9741aa
  p = p ? realloc(p, n) : malloc(n);
Packit 9741aa
  if (!p)
Packit 9741aa
    error(E_NOMEM);
Packit 9741aa
  return p;
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void error(num)
Packit 9741aa
     enum error_code num;
Packit 9741aa
{
Packit 9741aa
  (*errhandler)((int)num, errlist[num], input_lineno);
Packit 9741aa
  abort();
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
static
Packit 9741aa
void default_errhandler(num, msg, lineno)
Packit 9741aa
     int num;
Packit 9741aa
     char *msg;
Packit 9741aa
     unsigned long lineno;
Packit 9741aa
{
Packit 9741aa
  fprintf(stderr, "Line %lu: %s\n", lineno, msg);
Packit 9741aa
  exit(1);
Packit 9741aa
}