Blame rtf-fix/rtf2rtf.l

Packit 9741aa
/* -*- c -*- */
Packit 9741aa
Packit 9741aa
/* Butchered version of html2html.l to post-process RTF files
Packit 9741aa
 *
Packit 9741aa
 * Steve Tynor (tynor@atlanta.twr.com). I'm not proud of this code. It's a hack
Packit 9741aa
 * foisted upon a hack, nested within another hack or two.  It sort of works
Packit 9741aa
 * well enough for my purposes (generating WinHelp .RTF files for my documents),
Packit 9741aa
 * but it could definitely use a redesign/rewrite. I shamelessly blame the RTF
Packit 9741aa
 * format for most of the hackery here -- RTF is not a _language_ like LaTeX or
Packit 9741aa
 * ROFF -- it's just a file format. So, we can't rely on RTF to do even simple
Packit 9741aa
 * things like "insert a paragraph break here only if the previous token was not
Packit 9741aa
 * also a paragraph break. Since the SGML front end has no conditional processing
Packit 9741aa
 * capabilities, multiple blank likes in the SGML get translated to multiple para
Packit 9741aa
 * breaks in the RTF (actually, that's why I use the <@@par> crap....).
Packit 9741aa
 *
Packit 9741aa
 * XREF: BROKEN_DELIMITERS:  We say "{}K{\footnote .." instead of just
Packit 9741aa
 * "\nK{\footnote .." since the MicroS$ft RTF parser does not (always?) treat
Packit 9741aa
 * white space as a delimeter before a hyperlink markup.
Packit 9741aa
 *
Packit 9741aa
 * ESR reordered the @@indent rules to eliminate a generation-time warning.
Packit 9741aa
 */
Packit 9741aa
Packit 9741aa
%{
Packit 9741aa
Packit 9741aa
#include <stdlib.h>
Packit 9741aa
#include <stdio.h>
Packit 9741aa
#include <string.h>
Packit 9741aa
Packit 9741aa
/* #define DEBUG */
Packit 9741aa
Packit 9741aa
  /* NOTE: must be kept in sync with the 

defs in mapping */
Packit 9741aa
#define LEFTMARGIN 500
Packit 9741aa
#define NEWPAR "\\par\\pard\\plain\\li500\\sb200\\fi0"
Packit 9741aa
#define INDENT_AMT 500 /* twips for each level of indentation */
Packit 9741aa
  
Packit 9741aa
const static char* header =
Packit 9741aa
    "{\\rtf1\\ansi\n" 
Packit 9741aa
    "{\\fonttbl\n" 
Packit 9741aa
    "{\\f0\\fswiss Helvetica;}\n" 
Packit 9741aa
    "{\\f1\\fmodern Courier;}\n" 
Packit 9741aa
    "{\\f2\\froman Times;}\n" 
Packit 9741aa
    "{\\f3\\froman Symbol;}\n" 
Packit 9741aa
    "}\n" 
Packit 9741aa
    "{\\info{\\comment Translated from SGML to RTF w/ Linuxdoc-SGML}}\n"
Packit 9741aa
    ;
Packit 9741aa
Packit 9741aa
int big_page_mode = 1; /* default is backwards compatible */
Packit 9741aa
Packit 9741aa
/* <report> style uses <chapt> as the top-level section; <article> uses
Packit 9741aa
   <sect>.  We need to know whether a <sect> should be numbered as "n"
Packit 9741aa
   or "n.m".  We assume <article> more unless we see a <chapt> */
Packit 9741aa
int chapter_mode = 0;
Packit 9741aa
int section_id = 0; /* used for TOC hyperlinks */
Packit 9741aa
int in_appendix = 0;
Packit 9741aa
  
Packit 9741aa
FILE *out;
Packit 9741aa
FILE *idx_fp;
Packit 9741aa
Packit 9741aa
char buf[2048], sectname[256], labeltxt[256];
Packit 9741aa
char firstname[256], outname[256];
Packit 9741aa
char ssectname[256], Tmp1[4096], Tmp2[256], headname[256];
Packit 9741aa
char urlname[256], Headbuf[4096], refname[256];
Packit 9741aa
Packit 9741aa
int indent = 0;
Packit 9741aa
int nofill = 0;
Packit 9741aa
int filenum = 1;
Packit 9741aa
int filecount = 1;
Packit 9741aa
int tmpfn;
Packit 9741aa
int secnr = 0;
Packit 9741aa
int ssecnr;
Packit 9741aa
int current;
Packit 9741aa
int skipnewline = 0;
Packit 9741aa
Packit 9741aa
extern char *yytext;
Packit 9741aa
Packit 9741aa
#if (YY_FLEX_MAJOR_VERSION == 2 && YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION > 35) \
Packit 9741aa
 || (YY_FLEX_MAJOR_VERSION == 2 && YY_FLEX_MINOR_VERSION == 6 && YY_FLEX_SUBMINOR_VERSION == 0)
Packit 9741aa
extern yy_size_t yyleng;
Packit 9741aa
#else
Packit 9741aa
extern int yyleng;
Packit 9741aa
#endif
Packit 9741aa
Packit 9741aa
#define CHAPTER 1
Packit 9741aa
#define SECTION 2
Packit 9741aa
#define SUBSECT 3
Packit 9741aa
Packit 9741aa
void fatal_perror ( char *fmt, char *arg )
Packit 9741aa
  {
Packit 9741aa
    fprintf( stderr, "rtf2rtf: Fatal: " );
Packit 9741aa
    fprintf( stderr, fmt, arg);
Packit 9741aa
    perror( "\nReason");
Packit 9741aa
    exit(1);
Packit 9741aa
  }
Packit 9741aa
Packit 9741aa
int getstr( char *str, int n )
Packit 9741aa
  {
Packit 9741aa
#ifdef DEBUG
Packit 9741aa
    fprintf( stderr, "DEBUG: yytext: <<%s>>\n", yytext );
Packit 9741aa
#endif
Packit 9741aa
    if( strlen( yytext ) == n )
Packit 9741aa
      return 0;
Packit 9741aa
    strncpy( str, yytext+n, yyleng-n);
Packit 9741aa
    str[ yyleng-n ] = 0;
Packit 9741aa
    return 1;
Packit 9741aa
  }
Packit 9741aa
Packit 9741aa
void
Packit 9741aa
put_reference (FILE *fp, char *str)
Packit 9741aa
  {
Packit 9741aa
Packit 9741aa
    char *p;
Packit 9741aa
    char buf[2] = {0, 0};
Packit 9741aa
    /* trim trailing spaces: */
Packit 9741aa
    while (str[strlen(str)-1] == ' ') {
Packit 9741aa
      str[strlen(str)-1] = '\0';
Packit 9741aa
    }
Packit 9741aa
    /* replace non-legal topic-id characters with underscores */
Packit 9741aa
    p = str;
Packit 9741aa
    while (*p) {
Packit 9741aa
      buf[0] = *p;
Packit 9741aa
      if (! strpbrk(buf,
Packit 9741aa
		    "abcdegfhijklmnopqrstuvwxyz"
Packit 9741aa
		    "ABCDEGFHIJKLMNOPQRSTUVWXYZ"
Packit 9741aa
		    "0123456789")) {
Packit 9741aa
	fprintf (fp, "_");
Packit 9741aa
      } else {
Packit 9741aa
	fprintf (fp, "%c", *p);
Packit 9741aa
      }
Packit 9741aa
      p++;
Packit 9741aa
    }
Packit 9741aa
  }
Packit 9741aa
Packit 9741aa
void 
Packit 9741aa
put_secnum (FILE *fp, int s1, int s2)
Packit 9741aa
  {
Packit 9741aa
    if (in_appendix) {
Packit 9741aa
      if (s2 == 0) {
Packit 9741aa
	fprintf (fp, "%c", 'A'-1+s1);
Packit 9741aa
      } else {
Packit 9741aa
	fprintf (fp, "%c.%d", 'A'-1+s1, s2);
Packit 9741aa
      }
Packit 9741aa
    } else {
Packit 9741aa
      if (s2 == 0) {
Packit 9741aa
	fprintf (fp, "%d", s1);
Packit 9741aa
      } else {
Packit 9741aa
	fprintf (fp, "%d.%d", s1, s2);
Packit 9741aa
      }
Packit 9741aa
    }
Packit 9741aa
  }
Packit 9741aa
   
Packit 9741aa
void
Packit 9741aa
heading (char *str, int s1, int s2, int newpage)
Packit 9741aa
   {
Packit 9741aa
     if (newpage && (out == stdout))
Packit 9741aa
       {
Packit 9741aa
	 sprintf (outname, "%s-0.rtf", firstname);
Packit 9741aa
	 out = fopen (outname, "w");
Packit 9741aa
	 if (! out )
Packit 9741aa
	   {
Packit 9741aa
	     fatal_perror ("Could not open file \"%s\"", outname);
Packit 9741aa
	   }	 
Packit 9741aa
    	 fputs (header, out);
Packit 9741aa
       }  
Packit 9741aa
Packit 9741aa
     /*     fprintf (out, "\n\\page{\\pard\\plain\\s2\\f0\\fs%d\\b\\fi0\\li576\\sb160\\sa160\\tx576 { }",
Packit 9741aa
	    (s2==0 ? 36 : 24) ); */
Packit 9741aa
Packit 9741aa
     if (newpage)
Packit 9741aa
       {
Packit 9741aa
	 fprintf (out, "\n\\page{\\pard\\plain\\keepn\\f0\\fs%d\\b\\tx576{",
Packit 9741aa
		  (s2==0 ? 36 : 24) );
Packit 9741aa
       }
Packit 9741aa
     else
Packit 9741aa
       {
Packit 9741aa
	 fprintf (out, "\n{\\pard\\sb200\\plain\\f0\\fs%d\\b\\tx576{",
Packit 9741aa
		  (s2==0 ? 36 : 24) );
Packit 9741aa
       }
Packit 9741aa
     put_secnum (out, s1, s2);
Packit 9741aa
     fprintf (out, " %s}}", str);
Packit 9741aa
Packit 9741aa
     /* Used as a target for the link on the TOC page. Use a SECT_ prefix
Packit 9741aa
	just in case the document has any explicit <label>'s with the same
Packit 9741aa
	`id' as a section header */
Packit 9741aa
     /* XREF: BROKEN_DELIMITERS: See above for why we say "{}#{\footnote .."
Packit 9741aa
      * instead of just "\n#{\footnote ..".
Packit 9741aa
      */
Packit 9741aa
     fprintf (out, "\n{}#{\\footnote TOC_%d}\n", section_id);
Packit 9741aa
     if (newpage)
Packit 9741aa
       {
Packit 9741aa
	 fprintf (out, "\n${\\footnote ");
Packit 9741aa
	 put_reference (out, str);
Packit 9741aa
	 fprintf (out, "}\n");
Packit 9741aa
	 fprintf (out, "+{\\footnote browse}\n");
Packit 9741aa
	 fprintf (out, "%s\n", NEWPAR);
Packit 9741aa
       }
Packit 9741aa
     /* XREF: BROKEN_DELIMITERS: See above for why we say "{}K{\footnote .."
Packit 9741aa
      * instead of just "\nK{\footnote ..".
Packit 9741aa
      */
Packit 9741aa
     fprintf (out, "{}K{\\footnote %s}\n", str);
Packit 9741aa
/*     fprintf (out, "\\par\\pard\n");*/
Packit 9741aa
Packit 9741aa
     fprintf (idx_fp, "\\li%d\\fi-200{\\f3\\\'B7} ",
Packit 9741aa
	      (s2==0 ? 1 : 2) * 500);
Packit 9741aa
     put_secnum (idx_fp, s1, s2);
Packit 9741aa
     fprintf (idx_fp, " {\\uldb ");
Packit 9741aa
     fprintf (idx_fp, "%s}{\\v TOC_%d}\\par\n", str, section_id);
Packit 9741aa
     section_id++;
Packit 9741aa
   }
Packit 9741aa
  
Packit 9741aa
void
Packit 9741aa
subheading (char *str, int chapter_size)
Packit 9741aa
  {
Packit 9741aa
    /*     fprintf (fp, "\n{\\pard\\plain\\s2\\f0\\fs%d\\b \\fi0\\li576\\sb160\\sa160\\tx576 { }%s}\n", 
Packit 9741aa
	   (chapter_size ? 36 : 24), str);*/
Packit 9741aa
Packit 9741aa
    fprintf (out, "\n{\\pard\\plain\\f0\\sb200\\fs%d\\b\\tx576{%s}}\n", 
Packit 9741aa
	     (chapter_size ? 36 : 24), str);
Packit 9741aa
     /* XREF: BROKEN_DELIMITERS: See above for why we say "{}K{\footnote .."
Packit 9741aa
      * instead of just "\nK{\footnote ..".
Packit 9741aa
      */
Packit 9741aa
    fprintf (out, "{}K{\\footnote %s}%s\n", str, NEWPAR);
Packit 9741aa
  }
Packit 9741aa
  
Packit 9741aa
%}
Packit 9741aa
Packit 9741aa
%s SECT
Packit 9741aa
Packit 9741aa
%%
Packit 9741aa
Packit 9741aa
"<@@indent+>\n"		{ indent++; }
Packit 9741aa
"<@@indent->\n"		{ indent--; }
Packit 9741aa
"<@@indent>\n"		{ fprintf( out, "\\li%d{}", LEFTMARGIN+indent*INDENT_AMT); }
Packit 9741aa
Packit 9741aa
(\\par\\pard\\plain\n"<@@indent>"\n\\sb200\\fi0\n)+ {
Packit 9741aa
  /* HACK: Filter out redundant paragraph marks. I'd rather not put them
Packit 9741aa
   * in in the first place, but it's just TOO HARD.
Packit 9741aa
   */
Packit 9741aa
  fprintf (out, "\\par\\pard\\plain\n\\li%d{}\n\\sb200\\fi0\n",
Packit 9741aa
	   LEFTMARGIN+indent*INDENT_AMT);
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
(("<@@par>"\n)|("<@@indent"[\+\-]">"\n))+ {
Packit 9741aa
  /* Unfortunately, <@@par>'s and <@@indent+>'s get mixed together at the start 
Packit 9741aa
   * of <itemize>'s and <descrip>'s.  This hack allows us to treat them as a
Packit 9741aa
   * single <@@par>.
Packit 9741aa
   *
Packit 9741aa
   * Adjust the indentation based on the number of <@@indent+/->'s we see
Packit 9741aa
   */
Packit 9741aa
  char *p = yytext;
Packit 9741aa
  while (*p) {
Packit 9741aa
    if (*p == '+') {
Packit 9741aa
      indent++;
Packit 9741aa
    } else if (*p == '-') {
Packit 9741aa
      indent--;
Packit 9741aa
    }
Packit 9741aa
    p++;
Packit 9741aa
  }
Packit 9741aa
  fprintf (out, "\\par\\pard\\plain\n\\li%d{}\n\\sb200\\fi0\n",
Packit 9741aa
	   LEFTMARGIN+indent*INDENT_AMT);
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@chapt><@@head>".*$		{ 
Packit 9741aa
  chapter_mode = 1;
Packit 9741aa
  /*    skipnewline=1; */
Packit 9741aa
  getstr( sectname, 17 ) ;
Packit 9741aa
  current=SECTION;
Packit 9741aa
  secnr++; 
Packit 9741aa
  ssecnr=0; 
Packit 9741aa
  heading( sectname, secnr, 0, 1); 
Packit 9741aa
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@sect><@@head>".*$		{ 
Packit 9741aa
Packit 9741aa
  /*    skipnewline=1; */
Packit 9741aa
  if (chapter_mode)
Packit 9741aa
    {
Packit 9741aa
      getstr( ssectname, 16 ) ;
Packit 9741aa
      current=SECTION;
Packit 9741aa
      ssecnr++;
Packit 9741aa
      heading( ssectname, secnr, ssecnr, !big_page_mode);
Packit 9741aa
    }
Packit 9741aa
  else
Packit 9741aa
    {
Packit 9741aa
      getstr( sectname, 16 ) ;
Packit 9741aa
      current=CHAPTER;
Packit 9741aa
      secnr++;
Packit 9741aa
      ssecnr=0;
Packit 9741aa
      heading( sectname, secnr, 0, 1);
Packit 9741aa
    }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@ssect><@@head>".*$		{ 
Packit 9741aa
  /*    skipnewline=1;*/
Packit 9741aa
  getstr( ssectname, 17 ) ;
Packit 9741aa
  current=SUBSECT;
Packit 9741aa
  if (! chapter_mode)
Packit 9741aa
    {
Packit 9741aa
      ssecnr++;
Packit 9741aa
      heading( ssectname, secnr, ssecnr, !big_page_mode );
Packit 9741aa
    }
Packit 9741aa
  else
Packit 9741aa
    {
Packit 9741aa
      current=SUBSECT;
Packit 9741aa
      subheading( ssectname, 0 );
Packit 9741aa
    }  
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
Packit 9741aa
^"<@@appendix>".*$		{ 
Packit 9741aa
  in_appendix = 1; secnr=0; ssecnr=0; 
Packit 9741aa
  fprintf (idx_fp, "\\par\\pard\\qc{\\b Appendix}\\par\\pard\n");
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@part><@@head>".*$		{
Packit 9741aa
  fprintf (idx_fp, "\\par\\pard\\qc{\\b %s}\\par\\pard\n", yytext+16);
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@head>".*$		{ 
Packit 9741aa
Packit 9741aa
  /*  skipnewline=1; */
Packit 9741aa
  getstr( ssectname, 8 ) ;
Packit 9741aa
  current=SUBSECT;
Packit 9741aa
  subheading( ssectname, 0 );
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@label>".*$		{
Packit 9741aa
  
Packit 9741aa
     /* XREF: BROKEN_DELIMITERS: See above for why we say "{}K{\footnote .."
Packit 9741aa
      * instead of just "\nK{\footnote ..".
Packit 9741aa
      */
Packit 9741aa
  fprintf (out, "{}#{\\footnote ");
Packit 9741aa
  put_reference (out, yytext+9 );
Packit 9741aa
  fprintf (out, "}\n");
Packit 9741aa
/* Grrr.  WinHelp uses K footnotes for references from without the .HLP file */
Packit 9741aa
/* (e.g. via the WinHelp API) */
Packit 9741aa
  fprintf (out, "{}K{\\footnote ");
Packit 9741aa
  put_reference (out, yytext+9 );
Packit 9741aa
  fprintf (out, "}\n");
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
^"<@@title>".*$		{ 
Packit 9741aa
Packit 9741aa
  /*    skipnewline=1;*/
Packit 9741aa
  getstr( ssectname, 9 ) ;
Packit 9741aa
  subheading( ssectname, 1 );
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
"<@@nofill>\n"		{ indent++; nofill = 1; fprintf( out, "\\par\\pard\\sb200\\keep\\li%d{}", LEFTMARGIN+indent*INDENT_AMT); }
Packit 9741aa
"<@@endnofill>\n"	{ indent--; nofill = 0; fprintf( out, "\\par\\pard\\sb200\\li%d{}", LEFTMARGIN+indent*INDENT_AMT); }
Packit 9741aa
Packit 9741aa
..*			{
Packit 9741aa
Packit 9741aa
  if (skipnewline) {
Packit 9741aa
    /*    strcat(Headbuf, yytext ); */
Packit 9741aa
  } else if( out != stdout ) {
Packit 9741aa
    fprintf( out, "%s", yytext );
Packit 9741aa
  } else {
Packit 9741aa
    ECHO;
Packit 9741aa
  }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
\n			{
Packit 9741aa
Packit 9741aa
/*  fprintf( out, " ");*/		/* Add a space at the end of each line since RTF does
Packit 9741aa
				   not treat EOL as whitespace and will concatenate
Packit 9741aa
				   tokents spanning lines */
Packit 9741aa
  if (nofill)
Packit 9741aa
    {
Packit 9741aa
      fprintf( out, "\\line");   
Packit 9741aa
    }
Packit 9741aa
  if(!skipnewline)
Packit 9741aa
    {
Packit 9741aa
      if( out != stdout )
Packit 9741aa
	{
Packit 9741aa
	  fprintf( out, "%s", yytext );
Packit 9741aa
	}
Packit 9741aa
      else
Packit 9741aa
	{
Packit 9741aa
	  ECHO;
Packit 9741aa
	}
Packit 9741aa
    }
Packit 9741aa
}
Packit 9741aa
Packit 9741aa
Packit 9741aa
%%
Packit 9741aa
Packit 9741aa
int
Packit 9741aa
main( int argc, char **argv )
Packit 9741aa
{
Packit 9741aa
  int i;
Packit 9741aa
Packit 9741aa
  out = stdout;
Packit 9741aa
  idx_fp = stdout;
Packit 9741aa
Packit 9741aa
  strcpy( firstname, "RTF" );
Packit 9741aa
Packit 9741aa
  for (i = 1; i < argc; i++)
Packit 9741aa
    {
Packit 9741aa
      if (0 == strcmp("-2", argv[i]))
Packit 9741aa
	{
Packit 9741aa
	  big_page_mode = 0;
Packit 9741aa
	}
Packit 9741aa
      else if (argv[i][0] != '-')
Packit 9741aa
	{
Packit 9741aa
	  strncpy( firstname, argv[i], 256 );
Packit 9741aa
	}
Packit 9741aa
      else
Packit 9741aa
	{
Packit 9741aa
	  fprintf (stderr,
Packit 9741aa
		   "%s: Warning: unhandled command line option \"%s\"\n",
Packit 9741aa
		   argv[0], argv[i]);
Packit 9741aa
	}
Packit 9741aa
    }
Packit 9741aa
Packit 9741aa
  fputs (header, out);
Packit 9741aa
  fprintf (out, "+{\\footnote browse}\n");
Packit 9741aa
  
Packit 9741aa
Packit 9741aa
  secnr=0;
Packit 9741aa
  yylex();
Packit 9741aa
Packit 9741aa
  if( out != stdout )
Packit 9741aa
    {
Packit 9741aa
      fclose( out );      
Packit 9741aa
    }
Packit 9741aa
Packit 9741aa
  fputs ("\n}\n", idx_fp);
Packit 9741aa
  fclose( idx_fp );      
Packit 9741aa
Packit 9741aa
  exit( 0 );
Packit 9741aa
}
Packit 9741aa

Packit 9741aa
/* Use the existing indentation style:
Packit 9741aa
 Local variables:
Packit 9741aa
 c-indent-level: 2
Packit 9741aa
 c-continued-statement-offset: 2
Packit 9741aa
 c-brace-offset: 0
Packit 9741aa
 c-label-offset: -2
Packit 9741aa
 End:
Packit 9741aa
 */