Blob Blame History Raw
/* rast.c
   Translate sgmls output to RAST result format.

   Written by James Clark (jjc@jclark.com). */

#include "config.h"
#include "std.h"
#include "sgmls.h"
#include "getopt.h"

#ifdef USE_PROTOTYPES
#define P(parms) parms
#else
#define P(parms) ()
#endif

#ifdef __GNUC__
#define NO_RETURN volatile
#else
#define NO_RETURN /* as nothing */
#endif

#ifdef VARARGS
#define VP(parms) ()
#else
#define VP(parms) P(parms)
#endif

#ifdef USE_ISASCII
#define ISASCII(c) isascii(c)
#else
#define ISASCII(c) (1)
#endif

NO_RETURN void error VP((char *,...));

static void input_error P((int, char *, unsigned long));
static int do_file P((FILE *));
static void usage P((void));

static void output_processing_instruction P((char *, unsigned));
static void output_data P((struct sgmls_data *, int));
static void output_data_lines P((char *, unsigned));
static void output_internal_sdata P((char *, unsigned));
static void output_external_entity P((struct sgmls_external_entity *));
static void output_external_entity_info P((struct sgmls_external_entity *));
static void output_element_start P((char *, struct sgmls_attribute *));
static void output_element_end P((char *));
static void output_attribute P((struct sgmls_attribute *));
static void output_tokens P((char **, int));
static void output_markup_chars P((char *, unsigned));
static void output_markup_string P((char *));
static void output_char P((int, int));
static void output_flush P((int));
static void output_external_id P((char *, char *));
static void output_entity P((struct sgmls_entity *));
static void output_external_entity_info P((struct sgmls_external_entity *));
static void output_internal_entity P((struct sgmls_internal_entity *));

#define output_flush_markup() output_flush('!')
#define output_flush_data() output_flush('|')

static FILE *outfp;
static int char_count = 0;
static char *program_name;

int main(argc, argv)
     int argc;
     char **argv;
{
  int c;
  int opt;
  char *output_file = 0;

  program_name = argv[0];

  while ((opt = getopt(argc, argv, "o:")) != EOF)
    switch (opt) {
    case 'o':
      output_file = optarg;
      break;
    case '?':
      usage();
    default:
      abort();
    }

  if (output_file) {
    errno = 0;
    outfp = fopen(output_file, "w");
    if (!outfp)
      error("couldn't open `%s' for output: %s", strerror(errno));
  }
  else {
    outfp = tmpfile();
    if (!outfp)
      error("couldn't create temporary file: %s", strerror(errno));
  }

  if (argc - optind > 1)
    usage();

  if (argc - optind == 1) {
    if (!freopen(argv[optind], "r", stdin))
      error("couldn't open `%s' for input: %s", argv[optind], strerror(errno));
  }

  (void)sgmls_set_errhandler(input_error);

  if (!do_file(stdin)) {
    fclose(outfp);
    if (output_file) {
      if (!freopen(output_file, "w", stdout))
	error("couldn't reopen `%s' for output: %s", strerror(errno));
    }
    fputs("#ERROR\n", stdout);
    exit(EXIT_FAILURE);
  }

  if (output_file) {
    errno = 0;
    if (fclose(outfp) == EOF)
      error("error closing `%s': %s", output_file, strerror(errno));
  }
  else {
    errno = 0;
    if (fseek(outfp, 0L, SEEK_SET))
      error("couldn't rewind temporary file: %s", strerror(errno));
    while ((c = getc(outfp)) != EOF)
      if (putchar(c) == EOF)
	error("error writing standard output: %s", strerror(errno));
  }
  exit(EXIT_SUCCESS);
}

static
void usage()
{
  fprintf(stderr, "usage: %s [-o output_file] [input_file]\n", program_name);
  exit(EXIT_FAILURE);
}

static
int do_file(fp)
     FILE *fp;
{
  struct sgmls *sp;
  struct sgmls_event e;
  int conforming = 0;

  sp = sgmls_create(fp);
  while (sgmls_next(sp, &e))
    switch (e.type) {
    case SGMLS_EVENT_DATA:
      output_data(e.u.data.v, e.u.data.n);
      break;
    case SGMLS_EVENT_ENTITY:
      output_external_entity(e.u.entity);
      break;
    case SGMLS_EVENT_PI:
      output_processing_instruction(e.u.pi.s, e.u.pi.len);
      break;
    case SGMLS_EVENT_START:
      output_element_start(e.u.start.gi, e.u.start.attributes);
      sgmls_free_attributes(e.u.start.attributes);
      break;
    case SGMLS_EVENT_END:
      output_element_end(e.u.end.gi);
      break;
    case SGMLS_EVENT_SUBSTART:
      {
	int level = 1;
	output_external_entity(e.u.entity);
	while (level > 0) {
	  if (!sgmls_next(sp, &e))
	    return 0;
	  switch (e.type) {
	  case SGMLS_EVENT_SUBSTART:
	    level++;
	    break;
	  case SGMLS_EVENT_SUBEND:
	    level--;
	    break;
	  case SGMLS_EVENT_START:
	    sgmls_free_attributes(e.u.start.attributes);
	    break;
	  default:
	    /* prevent compiler warnings */
	    break;
	  }
	}
      }
      break;
    case SGMLS_EVENT_APPINFO:
      break;
    case SGMLS_EVENT_CONFORMING:
      conforming = 1;
      break;
    default:
      abort();
    }
  sgmls_free(sp);
  return conforming;
}

static
void output_processing_instruction(s, len)
     char *s;
     unsigned len;
{
  fputs("[?", outfp);
  if (len > 0) {
    putc('\n', outfp);
    output_data_lines(s, len);
    output_flush_data();
  }
  fputs("]\n", outfp);
}

static
void output_data(v, n)
     struct sgmls_data *v;
     int n;
{
  int i;
  for (i = 0; i < n; i++) {
    if (v[i].is_sdata)
      output_internal_sdata(v[i].s, v[i].len);
    else if (v[i].len > 0)
      output_data_lines(v[i].s, v[i].len);
  }
}

static
void output_data_lines(s, n)
     char *s;
     unsigned n;
{
  assert(n > 0);
  for (; n > 0; --n)
    output_char((unsigned char)*s++, '|');
  output_flush_data();
}

static
void output_internal_sdata(s, n)
     char *s;
     unsigned n;
{
  fputs("#SDATA-TEXT\n", outfp);
  output_markup_chars(s, n);
  output_flush_markup();
  fputs("#END-SDATA\n", outfp);
}

static
void output_external_entity(e)
     struct sgmls_external_entity *e;
{
  fprintf(outfp, "[&%s\n", e->name);
  output_external_entity_info(e);
  fputs("]\n", outfp);
}

static
void output_element_start(gi, att)
     char *gi;
     struct sgmls_attribute *att;
{
  fprintf(outfp, "[%s", gi);
  if (att) {
    struct sgmls_attribute *p;
    putc('\n', outfp);
    for (p = att; p; p = p->next)
      output_attribute(p);
  }
  fputs("]\n", outfp);
}

static
void output_element_end(gi)
     char *gi;
{
  fprintf(outfp, "[/%s]\n", gi);
}

static
void output_attribute(p)
     struct sgmls_attribute *p;
{
  fprintf(outfp, "%s=\n", p->name);
  switch (p->type) {
  case SGMLS_ATTR_IMPLIED:
    fputs("#IMPLIED\n", outfp);
    break;
  case SGMLS_ATTR_CDATA:
    {
      struct sgmls_data *v = p->value.data.v;
      int n = p->value.data.n;
      int i;
      for (i = 0; i < n; i++)
	if (v[i].is_sdata)
	  output_internal_sdata(v[i].s, v[i].len);
	else {
	  output_markup_chars(v[i].s, v[i].len);
	  output_flush_markup();
	}
    }
    break;
  case SGMLS_ATTR_TOKEN:
    output_tokens(p->value.token.v, p->value.token.n);
    break;
  case SGMLS_ATTR_ENTITY:
    {
      int i;
      for (i = 0; i < p->value.entity.n; i++) {
	struct sgmls_entity *e = p->value.entity.v[i];
	char *name;

	if (e->is_internal)
	  name = e->u.internal.name;
	else
	  name = e->u.external.name;
	if (i > 0)
	  output_markup_string(" ");
	output_markup_string(name);
      }
      output_flush_markup();
      for (i = 0; i < p->value.entity.n; i++)
	output_entity(p->value.entity.v[i]);
    }
    break;
  case SGMLS_ATTR_NOTATION:
    output_tokens(&p->value.notation->name, 1);
    output_external_id(p->value.notation->pubid, p->value.notation->sysid);
    break;
  }
}

static void output_tokens(v, n)
     char **v;
     int n;
{
  int i;
  assert(n > 0);
  output_markup_string(v[0]);
  for (i = 1; i < n; i++) {
    output_markup_string(" ");
    output_markup_string(v[i]);
  }
  output_flush_markup();
}

static
void output_markup_chars(s, n)
     char *s;
     unsigned n;
{
  for (; n > 0; --n)
    output_char((unsigned char)*s++, '!');
}

static
void output_markup_string(s)
     char *s;
{
  while (*s)
    output_char((unsigned char)*s++, '!');
}

static
void output_char(c, delim)
     int c;
     int delim;
{
  if (ISASCII(c) && isprint(c)) {
    if (char_count == 0)
      putc(delim, outfp);
    putc(c, outfp);
    char_count++;
    if (char_count == 60) {
      putc(delim, outfp);
      putc('\n', outfp);
      char_count = 0;
    }
  }
  else {
    output_flush(delim);
    switch (c) {
    case RECHAR:
      fputs("#RE\n", outfp);
      break;
    case RSCHAR:
      fputs("#RS\n", outfp);
      break;
    case TABCHAR:
      fputs("#TAB\n", outfp);
      break;
    default:
      fprintf(outfp, "#%d\n", c);
    }
  }
}

static
void output_flush(delim)
     int delim;
{
  if (char_count > 0) {
    putc(delim, outfp);
    putc('\n', outfp);
    char_count = 0;
  }
}

static
void output_external_id(pubid, sysid)
  char *pubid;
  char *sysid;
{
  if (!pubid && !sysid)
    fputs("#SYSTEM\n#NONE\n", outfp);
  else {
    if (pubid) {
      fputs("#PUBLIC\n", outfp);
      if (*pubid) {
	output_markup_string(pubid);
	output_flush_markup();
      }
      else
	fputs("#EMPTY\n", outfp);
    }
    if (sysid) {
      fputs("#SYSTEM\n", outfp);
      if (*sysid) {
	output_markup_string(sysid);
	output_flush_markup();
      }
      else
	fputs("#EMPTY\n", outfp);
    }
  }
}

static
void output_entity(e)
     struct sgmls_entity *e;
{
  if (e->is_internal)
    output_internal_entity(&e->u.internal);
  else
    output_external_entity_info(&e->u.external);
  fputs("#END-ENTITY", outfp);
#ifndef ASIS
  putc('\n', outfp);
#endif
}

static
void output_external_entity_info(e)
     struct sgmls_external_entity *e;
{
  switch (e->type) {
  case SGMLS_ENTITY_CDATA:
    fputs("#CDATA-EXTERNAL", outfp);
    break;
  case SGMLS_ENTITY_SDATA:
    fputs("#SDATA-EXTERNAL", outfp);
    break;
  case SGMLS_ENTITY_NDATA:
    fputs("#NDATA-EXTERNAL", outfp);
    break;
  case SGMLS_ENTITY_SUBDOC:
    fputs("#SUBDOC", outfp);
    break;
  }
  putc('\n', outfp);
  output_external_id(e->pubid, e->sysid);
  if (e->type != SGMLS_ENTITY_SUBDOC) {
    struct sgmls_attribute *p;
    fprintf(outfp, "#NOTATION=%s\n", e->notation->name);
    output_external_id(e->notation->pubid, e->notation->sysid);
    for (p = e->attributes; p; p = p->next)
      output_attribute(p);
  }
}

static
void output_internal_entity(e)
     struct sgmls_internal_entity *e;
{
  if (e->data.is_sdata)
    fputs("#SDATA-INTERNAL", outfp);
  else
    fputs("#CDATA-INTERNAL", outfp);
  putc('\n', outfp);
  output_markup_chars(e->data.s, e->data.len);
  output_flush_markup();
}

static
void input_error(num, str, lineno)
     int num;
     char *str;
     unsigned long lineno;
{
  error("Error at input line %lu: %s", lineno, str);
}

NO_RETURN
#ifdef VARARGS
void error(va_alist) va_dcl
#else
void error(char *message,...)
#endif
{
#ifdef VARARGS
     char *message;
#endif
     va_list ap;
     
     fprintf(stderr, "%s: ", program_name);
#ifdef VARARGS
     va_start(ap);
     message = va_arg(ap, char *);
#else
     va_start(ap, message);
#endif
     vfprintf(stderr, message, ap);
     va_end(ap);
     fputc('\n', stderr);
     fflush(stderr);
     exit(EXIT_FAILURE);
}