=head1 NAME
docbook2texi-spec - convert DocBook Books to a Texinfo document
=head1 DESCRIPTION
This is a sgmlspl spec file that produces a Texinfo file
from DocBook markup.
=head1 LIMITATIONS
Trying docbook2info on non-DocBook or non-conformant SGML results in
undefined behavior. :-)
This program is a slow, dodgy Perl script.
=head1 COPYRIGHT
Copyright (C) 1998-1999 Steve Cheng <steve@ggi-project.org>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
You should have received a copy of the GNU General Public License along with
this program; see the file COPYING. If not, please write to the Free
Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
=cut
# $Id: docbook2texi-spec.pl,v 1.1 2000/07/21 20:22:30 rosalia Exp $
use SGMLS; # Use the SGMLS package.
use SGMLS::Output; # Use stack-based output.
use SGMLS::Refs;
########################################################################
# SGMLSPL script produced automatically by the script sgmlspl.pl
#
# Document Type: BOOK
# Edited by: me :)
########################################################################
$nocollapse_whitespace = 0; # Current whitespace collapse counter.
$newline_last = 1; # At beginning of line?
$skip_inline = 0;
$id_counter = 1;
$raw_cdata = 0;
$basename = shift || "db2texi";
sgml('start', sub {
$Refs = new SGMLS::Refs("$basename.refs");
});
sgml('end', sub {
$Refs->warn();
});
########################################################################
#
# Output helpers
#
########################################################################
sub save_cdata
{
$raw_cdata++;
push_output('string');
}
# Copied from docbook2man.
# Texinfo's newline rules aren't so stringent, so
# perhaps we can do away with at least some of these cases for speed.
sub texi_sgml
{
if(ref($_[1]) eq 'CODE') {
return &sgml;
}
my $s = $_[1];
$s =~ s/\\/\\\\/g;
$s =~ s/'/\\'/g;
# \n at the beginning means start at beginning of line
if($s =~ s/^\n//) {
$sub = 'sub { output "\n" unless $newline_last++; ';
if($s eq '') {
sgml($_[0], eval('sub { output "\n" unless $newline_last++; }'));
} elsif($s =~ /\n$/) {
sgml($_[0], eval("sub { output \"\\n\" unless \$newline_last++; output '$s'; }"));
} else {
sgml($_[0], eval("sub { output \"\\n\" unless \$newline_last; output '$s'; \$newline_last = 0; }"));
}
} else {
if($s =~ /\n$/) {
sgml($_[0], eval("sub { output '$s'; \$newline_last = 1; }"));
} else {
sgml($_[0], eval("sub { output '$s'; \$newline_last = 0; }"));
}
}
}
sub texi_output
{
$_ = shift;
if(s/^\n//) {
output "\n" unless $newline_last++;
}
return if $_ eq '';
output $_;
if(@_) {
output @_;
$newline_last = (pop(@_) =~ /\n$/);
} else {
$newline_last = ($_ =~ /\n$/)
}
}
# Fold lines into one, quote some characters
sub fold_string
{
$_ = shift;
for($_[0]) {
tr/\t/\n /;
s/\@/\@\@/g;
s/\{/\@\{/g;
s/\}/\@\}/g;
}
# Trim whitespace from beginning and end.
s/^ +//;
s/ +$//;
return $_;
}
# Another version of sgml(), for 'inline' @-commands,
# and prevent nesting them.
sub _inline
{
my ($gi, $tcmd) = @_;
$tcmd =~ s/\\/\\\\/g;
$tcmd =~ s/'/\\'/g;
sgml($gi, eval("sub { output '${tcmd}\{' unless \$raw_cdata or \$skip_inline++ }"));
$gi =~ s/^</<\//;
sgml($gi, eval("sub { output '}' unless \$raw_cdata or --\$skip_inline }"));
}
sub generate_id {
return "ID" . $id_counter++;
}
########################################################################
#
# Metadata
#
########################################################################
sub author_start {
if($_[0]->within('BOOKINFO')) {
save_cdata();
}
}
sub author_end {
if($_[0]->within('BOOKINFO')) {
texi_output('@author{' . fold_string(pop_output()) . "\}\n");
$raw_cdata--;
}
}
sgml('<AUTHOR>', \&author_start);
sgml('</AUTHOR>', \&author_end);
sgml('<EDITOR>', \&author_start);
sgml('</EDITOR>', \&author_end);
sgml('<COLLAB>', \&author_start);
sgml('</COLLAB>', \&author_end);
sgml('<CORPAUTHOR>', \&author_start);
sgml('</CORPAUTHOR>', \&author_end);
sgml('<OTHERCREDIT>', \&author_start);
sgml('</OTHERCREDIT>', \&author_end);
sgml('</FIRSTNAME>', ' ');
sgml('</SURNAME>', ' ');
sgml('</HONORIFIC>', ' ');
sgml('</LINEAGE>', ' ');
sgml('</OTHERNAME>', ' ');
sgml('</AFFILIATION>', ' ');
# Ignore content.
sgml('<CONTRIB>', sub { push_output('nul') });
sgml('</CONTRIB>', sub { pop_output });
sgml('<AUTHORBLURB>', sub { push_output('nul') });
sgml('</AUTHORBLURB>', sub { pop_output });
sgml('<DOCINFO>', sub { push_output('nul'); });
sgml('</DOCINFO>', sub { pop_output(); });
sgml('<REFSECT1INFO>', sub { push_output('nul'); });
sgml('</REFSECT1INFO>', sub { pop_output(); });
sgml('<REFSECT2INFO>', sub { push_output('nul'); });
sgml('</REFSECT2INFO>', sub { pop_output(); });
sgml('<REFSECT3INFO>', sub { push_output('nul'); });
sgml('</REFSECT3INFO>', sub { pop_output(); });
sgml('<TITLEABBREV>', sub { push_output('nul') });
sgml('</TITLEABBREV>', sub { pop_output() });
texi_sgml('<LEGALNOTICE>', "\n\@page\n\@vskip 0pt plus 1fill\n");
sgml('<BOOKINFO>', sub {
texi_output("\n\@titlepage\n");
texi_output('@title{', $doc_title, "\}\n");
});
#texi_sgml('</BOOKINFO>', "\n\@end titlepage\n");
texi_sgml('</BOOKINFO>', sub {
texi_output "\n\@end titlepage\n\n";
texi_output "\@node Top\n\@top\n\n";
# Generate top menu for BOOK.
$_[0]->parent->ext->{'output_toc'} = 1;
# This is so that elements below can find me.
$_[0]->parent->ext->{'id'} = $_[0]->parent->attribute('ID')->value || generate_id();
});
sgml('<TITLE>', \&save_cdata);
sgml('</TITLE>', sub {
my $title = fold_string(pop_output());
$raw_cdata--;
if($_[0]->in('BOOKINFO')) {
texi_output("\n\@subtitle{", $title, "\}\n");
}
elsif(exists $_[0]->parent->ext->{'title'}) {
# By far the easiest case. Just fold the string as
# above, and then set the parent element's variable.
$_[0]->parent->ext->{'title'} = $title;
}
elsif(exists $_[0]->parent->ext->{'nodesection'}) {
# Start new node, since we now know its title.
# This node.
$me = $_[0]->parent;
my $nodename = fold_string($me->attribute('XREFLABEL')->value) || $title;
$nodename =~ s/,/ /;
texi_output("\n\n\@node $nodename\n");
# The heading such as @chapter $title.
texi_output($me->ext->{'nodesection'}, " $title\n");
# This is so that elements below can find me.
my $id = $me->attribute('ID')->value || generate_id();
$me->ext->{'id'} = $id;
# Don't overwrite info from previous parse.
if($Refs->get($id) eq '') {
$Refs->put($id, $nodename);
# Add myself to parent node's list of nodes.
my $pid = $me->parent->ext->{'id'};
$Refs->put($pid, $Refs->get($pid) . ",$id");
}
}
else {
if($_[0]->in('BOOK')) {
$doc_title = $title;
}
output $title, "\n";
$newline_last++;
}
});
# Appendix
# Article
########################################################################
#
# Major sectioning elements
#
########################################################################
sgml('<BOOK>', "\\input texinfo\n\@settitle ");
texi_sgml('</BOOK>', "\n\n\@bye\n");
# Start a new section with new node.
# Most of the handling is at </TITLE>.
sub start_node
{
$_[0]->ext->{'nodesection'} = $_[1];
# Flag that the first child node ...
$_[0]->ext->{'output_toc'} = 1;
# ... need to output a list of nodes.
if($_[0]->parent->ext->{'output_toc'}) {
$_[0]->parent->ext->{'output_toc'} = 0;
my @nodes = split(/,/, $Refs->get($_[0]->parent->ext->{'id'}));
shift @nodes;
texi_output("\n\n\@menu\n");
foreach(@nodes) {
my ($nodename) = split(/,/, $Refs->get($_));
output "* ", $nodename, "::\n";
}
texi_output("\n\n\@end menu\n");
}
}
sgml('<PREFACE>', sub { start_node $_[0], '@chapter'; });
sgml('<CHAPTER>', sub { start_node $_[0], '@chapter'; });
sgml('<SECT1>', sub { start_node $_[0], '@section'; });
sgml('<SECT2>', sub { start_node $_[0], '@subsection'; });
sgml('<SECT3>', sub { start_node $_[0], '@subsubsection'; });
sgml('<SECT4>', sub { start_node $_[0], '@subsubsection'; });
sgml('<SECT5>', sub { start_node $_[0], '@subsubsection'; });
########################################################################
#
# Reference pages
#
########################################################################
sgml('<REFENTRY>', sub {
# Determine what it is under ...
# FIXME! Add more of these parents!
if($_[0]->in('CHAPTER')) {
start_node $_[0], '@section';
} elsif($_[0]->in('SECT1')) {
start_node $_[0], '@subsection';
} else {
# From Sect2 and after
start_node $_[0], '@subsubsection';
}
});
sgml('<REFENTRYTITLE>', sub {
if($_[0]->in('REFMETA')) {
save_cdata();
}
});
sgml('</REFENTRYTITLE>', sub {
if($_[0]->in('REFMETA')) {
my $title = fold_string(pop_output());
$raw_cdata--;
# This node.
my $me = $_[0]->parent->parent;
my $nodename = fold_string($me->attribute('XREFLABEL')->value) || $title;
$nodename =~ s/,/ /;
texi_output("\n\n\@node $nodename\n");
# The heading such as @chapter $title.
texi_output($me->ext->{'nodesection'}, " $title\n");
# This is so that elements below can find me.
my $id = $me->attribute('ID')->value || generate_id();
$me->ext->{'id'} = $id;
# Don't overwrite info from previous parse.
if($Refs->get($id) eq '') {
$Refs->put($id, $nodename);
# Add myself to parent node's list of nodes.
my $pid = $me->parent->ext->{'id'};
$Refs->put($pid, $Refs->get($pid) . ",$id");
}
}
});
sgml('<MANVOLNUM>', sub {
if($_[0]->in('REFMETA')) { push_output('nul') }
else { texi_output '(' }
});
sgml('</MANVOLNUM>', sub {
if($_[0]->in('REFMETA')) { pop_output() }
else { texi_output ')' }
});
sgml('<REFMISCINFO>', sub { push_output('nul') });
sgml('</REFMISCINFO>', sub { pop_output() });
sgml('<REFNAMEDIV>', sub {
$_[0]->ext->{'first_refname'} = 1;
# FIXME! Add more of these parents!
if($_[0]->parent->in('CHAPTER')) {
texi_output "\n\n\@unnumberedsubsec Name\n";
} else {
texi_output "\n\n\@unnumberedsubsubsec Name\n";
}
});
sgml('<REFNAME>', sub {
if($_[0]->parent->ext->{'first_refname'}) {
$_[0]->parent->ext->{'first_refname'} = 0;
return;
}
output ", ";
});
sgml('<REFPURPOSE>', " --- ");
# RefDescriptor
sgml('<REFSYNOPSISDIV>', sub {
# FIXME! Add more of these parents!
if($_[0]->parent->in('CHAPTER')) {
texi_output "\n\@unnumberedsubsec Synopsis\n";
} else {
texi_output "\n\@unnumberedsubsubsec Synopsis\n";
}
});
sgml('<REFSECT1>', sub {
# FIXME! Add more of these parents!
if($_[0]->parent->in('CHAPTER')) {
texi_output "\n\n\@unnumberedsubsec ";
} else {
texi_output "\\nn\@unnumberedsubsubsec ";
}
});
sgml('<REFSECT2>', sub { start_node $_[0], '@unnumberedsubsubsec' });
sgml('<REFSECT3>', sub { start_node $_[0], '@unnumberedsubsubsec' });
########################################################################
#
# Synopses
#
########################################################################
sgml('<FUNCSYNOPSIS>', sub { $skip_inline++ });
sgml('</FUNCSYNOPSIS>', sub { $skip_inline-- });
sgml('<CMDSYNOPSIS>', sub { $skip_inline++ });
sgml('</CMDSYNOPSIS>', sub { $skip_inline-- });
sgml('<FUNCPROTOTYPE>', "\n\n");
# Arguments to functions. This is C convention.
texi_sgml('<PARAMDEF>', '(');
texi_sgml('</PARAMDEF>', ');');
texi_sgml('<VOID>', '(void);');
## ARG: should be bold, but not needed.
#
########################################################################
#
# 'Useful highlighting'
#
########################################################################
_inline('<CITETITLE>', '@cite');
_inline('<EMAIL>', '@email');
_inline('<FIRSTTERM>', '@dfn');
_inline('<FILENAME>', '@file');
_inline('<ACCEL>', '@key');
_inline('<KEYCAP>', '@key');
_inline('<KEYCOMBO>', '@key');
_inline('<KEYSYM>', '@key');
_inline('<USERINPUT>', '@kbd');
_inline('<LITERAL>', '@samp');
_inline('<MARKUP>', '@samp');
_inline('<SGMLTAG>', '@samp');
_inline('<TOKEN>', '@samp');
_inline('<CLASSNAME>', '@code');
_inline('<ENVAR>', '@code');
_inline('<FUNCTION>', '@code');
_inline('<PARAMETER>', '@code'); # not always
_inline('<RETURNVALUE>', '@code');
_inline('<STRUCTFIELD>', '@code');
_inline('<STRUCTNAME>', '@code');
_inline('<SYMBOL>', '@code');
_inline('<TYPE>', '@code');
_inline('<ACRONYM>', '@sc');
_inline('<EMPHASIS>', '@emph');
# Only in certain contexts
#sgml('<REPLACEABLE>'
sgml('<OPTIONAL>', "[");
sgml('</OPTIONAL>', "]");
sgml('<COMMENT>', "[Comment: ");
sgml('</COMMENT>', "]");
# ACTION
# ALT
# AUTHOR
# AUTHORINITIALS
# ABBREV
# CITATION
# FOREIGNPHRASE
# PHRASE
# QUOTE
# WORDASWORD
# COMPUTEROUTPUT
# LITERAL
# MARKUP
# PROMPT
# RETURNVALUE
# SGMLTAG
# TOKEN
# DATABASE
# HARDWARE
# INTERFACE
# MEDIALABEL
# SYSTEMITEM
sgml('<CITEREFENTRY>', sub { $skip_inline++ });
sgml('</CITEREFENTRY>', sub { $skip_inline-- });
########################################################################
#
# 'Block' elements
#
########################################################################
sub para_start {
output "\n" unless $newline_last++;
if($_[0]->parent->ext->{'nobreak'}) {
# Usually this is the FIRST element of
# an @item, so we MUST not do a full
# paragraph break.
$_[0]->parent->ext->{'nobreak'} = 0;
} else {
output "\n";
}
}
# Actually applies to a few other block elements as well
sub para_end {
output "\n" unless $newline_last++;
}
sgml('<PARA>', \¶_start);
sgml('<SIMPARA>', \¶_end);
texi_sgml('<EXAMPLE>', "\n\@unnumberedsubsubsec ");
texi_sgml('</EXAMPLE>', "\n\n");
########################################################################
#
# Verbatim displays.
#
########################################################################
sub verbatim_on {
texi_output("\n\n");
texi_output("\@example\n") unless $no_collapase++;
}
sub verbatim_off {
texi_output("\n\n");
texi_output("\@end example\n") unless --$no_collapase;
}
texi_sgml('<PROGRAMLISTING>', \&verbatim_on);
texi_sgml('</PROGRAMLISTING>', \&verbatim_off);
texi_sgml('<SCREEN>', \&verbatim_on);
texi_sgml('</SCREEN>', \&verbatim_off);
texi_sgml('<LITERALLAYOUT>', \&verbatim_on);
texi_sgml('</LITERALLAYOUT>', \&verbatim_off);
texi_sgml('<SYNOPSIS>', \&verbatim_on);
texi_sgml('</SYNOPSIS>', \&verbatim_off);
########################################################################
#
# Admonitions and other 'outside flow'
#
########################################################################
## These are supposed to be set off from the text, so
## proper indentation is not so important.
#
#sub admonition_end {
# output "\n" unless $newline_last++;
# output ".sp\n";
#}
#
#$admonition_end = *admonition_end{CODE};
#
# FIXME! Lotsa things!
#sgml('<NOTE>', "\n\n\@cartouche\nNote: ");
#sgml('</NOTE>', "\n\n\@end cartouche\n");
#sgml('<WARNING>', "\n\n\@cartouche\nWarning: ");
#sgml('</WARNING>', "\n\n\@end cartouche\n");
#sgml('<TIP>', "\n\n\@cartouche\nTip: ");
#sgml('</TIP>', "\n\n\@end cartouche\n");
#sgml('<CAUTION>', "\n\n\@cartouche\nCaution: ");
#sgml('</CAUTION>', "\n\n\@end cartouche\n");
#sgml('<IMPORTANT>', "\n\n\@cartouche\nImportant: ");
#sgml('</IMPORTANT>', "\n\n\@end cartouche\n");
#sgml('<NOTE>', sub {
# output "\n" unless $newline_last++;
# output ".PP\n";
# output ".B Note: \n";
# $listitem_first = 1;
#});
#sgml('</NOTE>', $admonition_end);
########################################################################
#
# Lists
#
########################################################################
texi_sgml('<VARIABLELIST>', "\n\@table \@asis\n");
texi_sgml('</VARIABLELIST>', "\n\@end table\n");
texi_sgml('<ORDEREDLIST>', "\n\@enumerate\n");
texi_sgml('</ORDEREDLIST>', "\n\@end enumerate\n");
texi_sgml('<ITEMIZEDLIST>', "\n\@itemize \@bullet\n");
texi_sgml('</ITEMIZEDLIST>', "\n\@end itemize\n");
sgml('<VARLISTENTRY>', sub {
$_[0]->ext->{'first_term'} = 1;
});
sgml('<TERM>', sub {
if($_[0]->parent->ext->{'first_term'}) {
$_[0]->parent->ext->{'first_term'} = 0;
texi_output "\n\n\@item ";
} else {
texi_output "\n\n\@itemx ";
}
save_cdata();
});
sgml('</TERM>', sub {
output fold_string(pop_output()) . "\n";
$newline_last = 1;
$raw_cdata--;
});
sgml('<LISTITEM>', sub {
texi_output "\n\n\@item\n" unless $_[0]->in('VARLISTENTRY');
$_[0]->ext->{'nobreak'} = 1;
});
sgml('<SIMPLELIST>', sub {
$_[0]->ext->{'first_member'} = 1;
});
sgml('<MEMBER>', sub {
my $listtype = $_[0]->parent->attribute(TYPE)->value;
if($listtype =~ /Inline/i) {
if($_[0]->parent->ext->{'first_member'}) {
# If this is the first member don't put any commas
$_[0]->parent->ext->{'first_member'} = 0;
} else {
output ", ";
}
} elsif($listtype =~ /Vert/i) {
texi_output("\n\n");
}
});
########################################################################
#
# Stuff we don't know how to handle (yet)
#
########################################################################
# Address blocks:
# Credit stuff:
# ACKNO
# ADDRESS
# AFFILIATION
# ARTPAGENUMS
# ATTRIBUTION
# AUTHORBLURB
# AUTHORGROUP
# OTHERCREDIT
# HONORIFIC
# Other document metadata:
# Element: BOOKINFO
# Element: RELEASEINFO
# DocInfo
# ArtHeader
# Areas:
# AREA
# AREASET
# AREASPEC
texi_sgml('<BEGINPAGE>', "\n\@page\n");
#BeginPage
#IndexTerm
########################################################################
#
# Linkage, cross references
#
########################################################################
## Print the URL
sgml('<ULINK>', sub {
if($skip_inline++) { return; } # hopefully doesn't happen
output '@uref{', output $_[0]->attribute('URL'), ', '
});
sgml('</ULINK>', sub {
output '}' unless --$skip_inline;
});
sgml('<XREF>', sub {
my $id = $_[0]->attribute('LINKEND')->value;
my ($nodename) = split(/,/, $Refs->get($id));
if($nodename) {
texi_output("\@xref\{$nodename\}");
} else {
$blank_xrefs++;
texi_output "[XRef to $id]";
}
});
# Anchor
########################################################################
#
# Other handlers
#
########################################################################
sgml('|[lt ]|', "<");
sgml('|[gt ]|', ">");
sgml('|[amp ]|', "&");
#
# Default handlers (uncomment these if needed). Right now, these are set
# up to gag on any unrecognised elements, sdata, processing-instructions,
# or entities.
#
# sgml('start_element',sub { die "Unknown element: " . $_[0]->name; });
# sgml('end_element','');
sgml('cdata', sub
{
for($_[0]) {
s/\@/\@\@/g;
s/\{/\@\{/g;
s/\}/\@\}/g;
}
$newline_last = 0;
output $_[0];
});
sgml('re', sub
{
if($nocollapse_whitespace || !$newline_last) {
output "\n";
}
$newline_last = 1;
});
sgml('re', "\n");
sgml('sdata',sub { die "Unknown SDATA: " . $_[0]; });
sgml('pi',sub { die "Unknown processing instruction: " . $_[0]; });
sgml('entity',sub { die "Unknown external entity: " . $_[0]->name; });
sgml('start_subdoc',sub { die "Unknown subdoc entity: " . $_[0]->name; });
sgml('end_subdoc','');
# sgml('conforming','');
1;