Blob Blame History Raw
#!/bin/sh
set -e

# This script comes from docbook2X.
# Copyright (C) 2004 Steve Cheng <stevecheng@users.sourceforge.net>
# MIT license.

sed_script='

# This sed script automatically adds the XML ISO entities
# to the output from sgml2xml/sx/osx, so that
# XML tools can be used readily on SGML (DocBook) documents.

# Work around osx misfeature:
# it has no option to just echo SDATA entities,
# but insists on using PIs if one wants to preserve them
s/<?sdataEntity \([^ ]\{1,\}\)[^?]*?>/\&\1;/

# Note that this DOCTYPE parsing is not really correct.
# For example it cannot handle a DOCTYPE declaration
# that is split across several lines.  
# But it works with the output from sx.

/^<!DOCTYPE/{
# Lowercase the document element tag name,
# because sx does not do it (it is a bug that is fixed in osx)
h
s/<!DOCTYPE \{1,\}\([^ ]\{1,\}\).*/\1/
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
x
G
s/<!DOCTYPE \{1,\}\([^ ]\{1,\}\)\(.*\)\n\(.*\)/<!DOCTYPE \3\2/
p

# Insert the ISO entity declarations
c\
<!ENTITY % ISOamsa PUBLIC\
    "ISO 8879:1986//ENTITIES Added Math Symbols: Arrow Relations//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOamsa.ent">\
<!ENTITY % ISOamsb PUBLIC\
    "ISO 8879:1986//ENTITIES Added Math Symbols: Binary Operators//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOamsb.ent">\
<!ENTITY % ISOamsc PUBLIC\
    "ISO 8879:1986//ENTITIES Added Math Symbols: Delimiters//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOamsc.ent">\
<!ENTITY % ISOamsn PUBLIC\
    "ISO 8879:1986//ENTITIES Added Math Symbols: Negated Relations//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOamsn.ent">\
<!ENTITY % ISOamso PUBLIC\
    "ISO 8879:1986//ENTITIES Added Math Symbols: Ordinary//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOamso.ent">\
<!ENTITY % ISOamsr PUBLIC\
    "ISO 8879:1986//ENTITIES Added Math Symbols: Relations//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOamsr.ent">\
<!ENTITY % ISObox  PUBLIC\
    "ISO 8879:1986//ENTITIES Box and Line Drawing//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISObox.ent">\
<!ENTITY % ISOcyr1 PUBLIC\
    "ISO 8879:1986//ENTITIES Russian Cyrillic//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOcyr1.ent">\
<!ENTITY % ISOcyr2 PUBLIC\
    "ISO 8879:1986//ENTITIES Non-Russian Cyrillic//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOcyr2.ent">\
<!ENTITY % ISOdia  PUBLIC\
    "ISO 8879:1986//ENTITIES Diacritical Marks//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOdia.ent">\
<!ENTITY % ISOgrk1 PUBLIC\
    "ISO 8879:1986//ENTITIES Greek Letters//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOgrk1.ent">\
<!ENTITY % ISOgrk2 PUBLIC\
    "ISO 8879:1986//ENTITIES Monotoniko Greek//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOgrk2.ent">\
<!ENTITY % ISOgrk3 PUBLIC\
    "ISO 8879:1986//ENTITIES Greek Symbols//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOgrk3.ent">\
<!ENTITY % ISOgrk4 PUBLIC\
    "ISO 8879:1986//ENTITIES Alternative Greek Symbols//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOgrk4.ent">\
<!ENTITY % ISOlat1 PUBLIC\
    "ISO 8879:1986//ENTITIES Added Latin 1//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOlat1.ent">\
<!ENTITY % ISOlat2 PUBLIC\
    "ISO 8879:1986//ENTITIES Added Latin 2//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOlat2.ent">\
<!ENTITY % ISOnum  PUBLIC\
    "ISO 8879:1986//ENTITIES Numeric and Special Graphic//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOnum.ent">\
<!ENTITY % ISOpub  PUBLIC\
    "ISO 8879:1986//ENTITIES Publishing//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOpub.ent">\
<!ENTITY % ISOtech PUBLIC\
    "ISO 8879:1986//ENTITIES General Technical//EN//XML"\
    "http://www.docbook.org/xml/4.3/ent/ISOtech.ent">\
\
%ISOamsa;\
%ISOamsb;\
%ISOamsc;\
%ISOamsn;\
%ISOamso;\
%ISOamsr;\
%ISObox;\
%ISOcyr1;\
%ISOcyr2;\
%ISOdia;\
%ISOgrk1;\
%ISOgrk2;\
%ISOgrk3;\
%ISOgrk4;\
%ISOlat1;\
%ISOlat2;\
%ISOnum;\
%ISOpub;\
%ISOtech;
}'

sx_whining='W: reference to internal SDATA entity'

# The purpose of the following slew of redirections
# is to get the error messages in the right place
# and to return sx/osx exit status.
# (the sed and grep are unlikely to fail)

# Prefer sx (sgml2xml) over osx because it is faster

if test -x "@SX@"; then 
    exec 3>&1 4>&2;
    status=`((("@SX@" -xid -xlower "$@"; echo $? >&5) | sed -e "$sed_script") 2>&1 1>&3 | grep -v "$sx_whining" 1>&4) 5>&1`;
    exit $status;

elif test -x "@OSX@"; then
    exec 3>&1 4>&2;
    status=`(("@OSX@" -xid -xlower -xsdata-as-pis "$@"; echo $? >&5) | sed -e "$sed_script") 5>&1 2>&4 1>&3`;
    exit $status;
else
    echo "$0: cannot find sx(sgml2xml) or osx to convert SGML to XML.  Cannot continue." 2>&1 
    exit 255;
fi