Blame buildtools/makeman

Packit Service 72ec4e
#!/usr/libexec/platform-python
Packit 78deda
#
Packit 78deda
# makeman -- compile netpbm's stereotyped HTML to troff markup
Packit 78deda
#
Packit 78deda
# This approach works because we control the entire document universe 
Packit 78deda
# this is going to convert and can reinforce useful stereotypes.
Packit 78deda
#
Packit 78deda
# The output of this tool uses cliches parseable by doclifter,
Packit 78deda
# which should thus be able to recover all the semantic information
Packit 78deda
# it looks like this thing is losing.
Packit 78deda
#
Packit 78deda
# Known bugs:
Packit 78deda
#  * Ordered lists are smashed into unordered lists
Packit 78deda
#
Packit 78deda
# Limitations:
Packit 78deda
#  * IMG tags are issued as .IMG preceded by a bolded caption containing
Packit 78deda
#    the alt content.  This will only work if the page is formatted with
Packit 78deda
#    mwww macros.
Packit 78deda
#  * Loses summary information from tables.
Packit 78deda
#  * Only permits one 
in the HTML, right before the index.
Packit 78deda
#
Packit 78deda
# You can use the  PI to pass text directly through to the
Packit 78deda
# generated manual page,  A major use is to insert format lines for tables.
Packit 78deda
#
Packit 78deda
# By Eric S. Raymond <esr@thyrsus.com>
Packit 78deda
# Version 1.0, July 26 2004
Packit 78deda
#
Packit 78deda
# Modified by Akira F. Urushibata <afu@wta.att.ne.jp>
Packit 78deda
# Version 1.1, February 11 2016
Packit 78deda
#
Packit 78deda
#   Added ability to process — −
Packit 78deda
#   Added footer message to clarify original source. 
Packit 78deda
#
Packit 78deda
Packit 78deda
import os, sys, re
Packit 78deda
Packit 78deda
source = "netpbm documentation"
Packit 78deda
section = 1
Packit 78deda
Packit 78deda
warning = r'''\
Packit 78deda
.\" This man page was generated by the Netpbm tool 'makeman' from HTML source.
Packit 78deda
.\" Do not hand-hack it!  If you have bug fixes or improvements, please find
Packit 78deda
.\" the corresponding HTML page on the Netpbm website, generate a patch
Packit 78deda
.\" against that, and send it to the Netpbm maintainer.
Packit 78deda
'''
Packit 78deda
Packit 78deda
footerprefix = '''.SH DOCUMENT SOURCE
Packit 78deda
This manual page was generated by the Netpbm tool 'makeman' from HTML
Packit 78deda
source.  The master documentation is at
Packit 78deda
.IP
Packit 78deda
.B http://netpbm.sourceforge.net/doc/'''
Packit 78deda
Packit 78deda
class LiftException(Exception):
Packit 78deda
    def __init__(self, message, retval=1):
Packit 78deda
        self.message = message
Packit 78deda
        self.retval = retval
Packit 78deda
Packit 78deda
def makeman(name, file, indoc):
Packit 78deda
    "Transform a string representing an HTML document into man markup."
Packit 78deda
    global section, sectmap
Packit 78deda
    # Dot at left margin confuses troff.
Packit 78deda
    # This program generates these,
Packit 78deda
    indoc = indoc.replace("\n.", "\n@%@%@")
Packit 78deda
    # Protect escapes before we try generating font changes.
Packit 78deda
    indoc = indoc.replace("\\", r"\e")
Packit 78deda
    # Header-bashing
Packit 78deda
    indoc = re.sub('(?i)]*>', "", indoc)
Packit 78deda
    indoc = indoc.replace('<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">', "")
Packit 78deda
    indoc = indoc.replace('<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"/>', "")
Packit 78deda
    indoc = indoc.replace('\n',"")
Packit 78deda
    indoc = indoc.replace('<html xmlns="http://www.w3.org/1999/xhtml">', "")
Packit 78deda
    indoc = indoc.replace('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">', "")
Packit 78deda
    indoc = indoc.replace("<HEAD>", "").replace("</HEAD>", "")
Packit 78deda
    indoc = indoc.replace("<head>", "").replace("</head>", "")
Packit 78deda
    indoc = re.sub('(?i)Table Of Contents', "", indoc)
Packit 78deda
    datematch = re.compile("Updated: (.*)\n")
Packit 78deda
    match = datematch.search(indoc)
Packit 78deda
    if match:
Packit 78deda
        date = match.group(1)
Packit 78deda
    else:
Packit 78deda
        date = ""
Packit 78deda
    indoc = datematch.sub("", indoc)
Packit 78deda
    namematch = re.compile("

(.*)

", re.I)
Packit 78deda
    match = namematch.search(indoc)
Packit 78deda
    if match:
Packit 78deda
        name = match.group(1)
Packit 78deda
    else:
Packit 78deda
        name = None
Packit 78deda
    section = 1
Packit 78deda
    meta = re.compile('(?i)<META NAME="manual_section" CONTENT="([0-9])">')
Packit 78deda
    match = meta.search(indoc)
Packit 78deda
    if match:
Packit 78deda
        section = int(match.group(1))
Packit 78deda
        indoc = meta.sub("", indoc)
Packit 78deda
    else:
Packit 78deda
        section = sectmap.get(name, 0)
Packit 78deda
    indoc = namematch.sub("", indoc)
Packit 78deda
    indoc = re.sub("(?i)<BODY[^>]*>", "", indoc)
Packit 78deda
    indoc = re.sub("(?i)<HTML>", "", indoc)
Packit 78deda
    # Remove more superfluous headers
Packit 78deda
    titlematch = re.compile("<TITLE>(.*)</TITLE>\n+", re.I)
Packit 78deda
    match = titlematch.search(indoc)
Packit 78deda
    if match:
Packit 78deda
        title = match.group(1)
Packit 78deda
    else:
Packit 78deda
        title = None
Packit 78deda
    indoc = titlematch.sub("", indoc)
Packit 78deda
    indoc = re.sub("(?i)\n*
\n+", "\n", indoc)
Packit 78deda
    indoc = re.sub("(?i)
", "\n", indoc)
Packit 78deda
    indoc = ('.TH "%s" %d "%s" "%s"\n' % (title,section,date,source)) + indoc
Packit 78deda
    # Literal layout
Packit 78deda
    indoc = re.sub("(?i)\n *
", "\n.nf", indoc)
Packit 78deda
    indoc = re.sub("(?i)\n *", "\n.fi", indoc)
Packit 78deda
    indoc = re.sub("(?i)\n *
", "\n.RS", indoc)
Packit 78deda
    indoc = re.sub("(?i)\n *", "\n.RE", indoc)
Packit 78deda
    # Highlight processing
Packit 78deda
    indoc = re.sub("(?i)", r"\\fB", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fI", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fI", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fI", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)<TT>", r"\\f(CW", indoc)
Packit 78deda
    indoc = re.sub("(?i)</TT>", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\f(CW", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\f(CW", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fB", indoc)
Packit 78deda
    indoc = re.sub("(?i)", r"\\fP", indoc)
Packit 78deda
    indoc = re.sub("(?i)<SUP>", r"\\u", indoc)
Packit 78deda
    indoc = re.sub("(?i)</SUP>", r"\\d", indoc)
Packit 78deda
    # Paragraph handling
Packit 78deda
    indoc = re.sub("(?i)\n*

\n*", r"\n.PP\n", indoc)

Packit 78deda
    indoc = re.sub("(?i)
", r"\n.PP\n", indoc)
Packit 78deda
    indoc = re.sub("(?i)

", "", indoc)
Packit 78deda
    indoc = re.sub("(?i)", "", indoc)
Packit 78deda
    indoc = re.sub("(?i)<meta[^>]*>", "", indoc)
Packit 78deda
    lines = indoc.split("\n")
Packit 78deda
    listdepth = 0
Packit 78deda
    for i in range(len(lines)):
Packit 78deda
        lowered = lines[i].lower()
Packit 78deda
        if "
Packit 78deda
            listdepth += 1
Packit 78deda
        if listdepth:
Packit 78deda
            lines[i] = lines[i].replace(".PP", ".sp")
Packit 78deda
        if "" in lowered or "" in lowered or "" in lowered:
Packit 78deda
            listdepth -= 1
Packit 78deda
    indoc = "\n".join(lines)
Packit 78deda
    indoc = re.sub(r"\s*\.sp", "\n.sp", indoc)
Packit 78deda
    # Format email addresses as italic
Packit 78deda
    indoc = re.sub('(?i)<A[ \n]+HREF="mailto:[^>]+">([^<]+)', r'\\fI\1\\fP', indoc)    
Packit 78deda
    # Format manual crossreferences
Packit 78deda
    def xrefmatch(match):
Packit 78deda
        xrefto = match.group(2)
Packit 78deda
        xrefurl = match.group(1)
Packit 78deda
        xrefsection = sectmap.get(xrefurl, 1)
Packit 78deda
        if xrefsection == 0:
Packit 78deda
            return "\n.I " + xrefto
Packit 78deda
        else:
Packit 78deda
            return '\n.BR "%s" (%d)\\c\n\\&' % (xrefto, xrefsection)
Packit 78deda
    indoc = re.sub(r'(?i)\n* *(?:\\fB)?<A[ \n]+HREF="?([^>]+.html)"?>([^<]+)(?:\\fP)?',
Packit 78deda
                   xrefmatch, indoc)
Packit 78deda
    # Format URLs
Packit 78deda
    def urlmatch(match):
Packit 78deda
        url = match.group(1).replace('\n', ' ')
Packit 78deda
        txt = match.group(2).replace('\n', ' ')
Packit 78deda
        return "\n.UR %s\n%s\n.UE\n\\&" % (url, txt)
Packit 78deda
    indoc = re.sub(r'(?i)\n*(?:<)?<A[ \n]+HREF *= *"([^>]+)">([^<]+)(?:>)?',
Packit 78deda
                  urlmatch, indoc)
Packit 78deda
    # Turn some entities into harmless cookies
Packit 78deda
    indoc = indoc.replace("<", "@#!#@").replace(">", "#@!@#").replace("&", "#!@!@!#")
Packit 78deda
    indoc = indoc.replace("×", r"\(mu")
Packit 78deda
    indoc = indoc.replace("®", r"\*R")
Packit 78deda
    indoc = indoc.replace("©", r"\(co")
Packit 78deda
    # Turn anchors into .UN tags
Packit 78deda
    indoc = re.sub('(?i)(?: )*\s*', ".UN \\1\n", indoc)
Packit 78deda
    # Strip off the index trailer
Packit 78deda
    trailer = re.compile('
.*', re.DOTALL | re.IGNORECASE)
Packit 78deda
    indoc = re.sub(trailer, "", indoc)
Packit 78deda
    # If there was no index trailer, we still need to strip these
Packit 78deda
    indoc = indoc.replace("</BODY>", "").replace("</HTML>", "")
Packit 78deda
    indoc = indoc.replace("</body>", "").replace("</html>", "")
Packit 78deda
    # Recognize sections with IDs
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

',
Packit 78deda
                   ".UN \\1\n.SH \\2", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

',
Packit 78deda
                   ".UN \\1\n.SS \\2", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

',
Packit 78deda
                   ".UN \\1\n.B \\2", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

',
Packit 78deda
                   ".UN \\1\n.SH \\2", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

',
Packit 78deda
                   ".UN \\1\n.SS \\2", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

',
Packit 78deda
                   ".UN \\1\n.B \\2", indoc)
Packit 78deda
    # Sections without IDs
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

', ".SH \\1", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

', ".SS \\1", indoc)
Packit 78deda
    indoc = re.sub('(?i)

([^><]*)

', ".B \\1", indoc)
Packit 78deda
    # 
Packit 78deda
    # Process definition lists -- just turn them into .TPs
Packit 78deda
    indoc = re.sub("(?i) *
", "", indoc)
Packit 78deda
    indoc = re.sub("(?i) *", "", indoc)
Packit 78deda
    indoc = re.sub("(?i) *
", ".TP\n", indoc)
Packit 78deda
    indoc = re.sub("(?i) *", "", indoc)
Packit 78deda
    indoc = re.sub("(?i)\n*
\n*", "\n", indoc)
Packit 78deda
    indoc = re.sub("(?i) *", "", indoc)
Packit 78deda
    # Process unordered lists -- just turn them into .TPs
Packit 78deda
    indoc = re.sub("(?i)</?[UO]L *(COMPACT)?>", "", indoc)
Packit 78deda
    indoc = re.sub("(?i) *
  • ", ".IP \(bu\n", indoc)
  • Packit 78deda
        indoc = re.sub("(?i) *", "", indoc)
    Packit 78deda
        # No-print tags
    Packit 78deda
        indoc = re.sub(".*", "", indoc)
    Packit 78deda
        # Passthrough
    Packit 78deda
        indoc = re.sub(r"<\?makeman (.*) \?>", r'\1', indoc)
    Packit 78deda
        # Comments
    Packit 78deda
        indoc = re.sub("", r'.\"\1', indoc)
    Packit 78deda
        # Acronyms
    Packit 78deda
        indoc = re.sub('', "", indoc)
    Packit 78deda
        indoc = re.sub("", "", indoc)
    Packit 78deda
        # Image tags
    Packit 78deda
        indoc = re.sub(' *([^', ".B \\2\n.IMG -C \\1", indoc)
    Packit 78deda
        # Special characters
    Packit 78deda
        indoc = indoc.replace(""", "'")
    Packit 78deda
        indoc = indoc.replace(" ", "\\ ")
    Packit 78deda
        indoc = indoc.replace("−", "-")
    Packit 78deda
        indoc = indoc.replace("—", "-")
    Packit 78deda
        indoc = indoc.replace("μ", "mu")
    Packit 78deda
        indoc = indoc.replace("σ", "sigma")
    Packit 78deda
        # Tables
    Packit 78deda
        # This will not handle rowspan
    Packit 78deda
        indoc = re.sub('(?i) *<table[^>]*>.*', ".TS", indoc)
    Packit 78deda
        indoc = re.sub("(?i) *.*", ".TE", indoc)
    Packit 78deda
        # First the single-line case
    Packit 78deda
        indoc = re.sub("(?i) *", "\t", indoc)
    Packit 78deda
        indoc = re.sub("(?i) *", "", indoc)
    Packit 78deda
        indoc = re.sub("(?i) *", "", indoc)
    Packit 78deda
        # Then the multiline case
    Packit 78deda
        indoc = re.sub(r'(?i)\s*<t[hd][^>]*>([^<\n]*)</t[dh]>\s*', '\t\\1', indoc)
    Packit 78deda
        indoc = re.sub(r'(?i)\s*<t[hd][^>]*>([^<]*)</t[dh]>\s*', '\tT{\n\\1T}', indoc)
    Packit 78deda
        indoc = indoc.replace("\n\\&T}", "\nT}")
    Packit 78deda
        indoc = re.sub("(?i) *", "", indoc)
    Packit 78deda
        indoc = re.sub("(?i) *<tr[^>]*>\t*", "", indoc)
    Packit 78deda
        indoc = re.sub(r"\.TS\s+<[Cc][Aa][Pp][Tt][Ii][Oo][Nn]>([^<]*)</[Cc][Aa][Pp][Tt][Ii][Oo][Nn]>\s*", ".B \\1\n.TS\n", indoc)
    Packit 78deda
        # Debugging
    Packit 78deda
        #sys.stderr.write("Name: %s, Title: %s, Date: %s\n" % (name, title, date))
    Packit 78deda
        # Time for error checking now
    Packit 78deda
        badlines = []
    Packit 78deda
        for line in indoc.split("\n"):
    Packit 78deda
            if "<" in line or ">" in line.replace(" >", "") or re.search(r'(?
    Packit 78deda
                badlines.append(line)
    Packit 78deda
        if badlines:
    Packit 78deda
            sys.stderr.write(("Bad lines from %s:\n-----------------\n" % file) + "\n".join(badlines) + "\n-----------------\n")
    Packit 78deda
        # Goes after bad-line check so we don't misinterpret it as an error
    Packit 78deda
        indoc = indoc.replace("@#!#@", "<").replace("#@!@#", ">").replace("#!@!@!#", "&")
    Packit 78deda
        indoc = re.sub("\n+$", "\n", indoc)
    Packit 78deda
        # Single-quote at left margin confuses troff.
    Packit 78deda
        # This program never generates these.
    Packit 78deda
        indoc = indoc.replace("\n'", "\n\\&'")
    Packit 78deda
        # Finish guarding against leading dots.
    Packit 78deda
        indoc = indoc.replace("\n@%@%@", "\n\\&.")
    Packit 78deda
        # Mark these generated pages so people won't hand-hack them.
    Packit 78deda
        indoc = warning + indoc
    Packit 78deda
        indoc = indoc + footerprefix + os.path.basename(file) +"\n.PP"
    Packit 78deda
        return indoc
    Packit 78deda
    Packit 78deda
    def main(args, mainout=sys.stdout, mainerr=sys.stderr):
    Packit 78deda
        global sectmap
    Packit 78deda
        import getopt
    Packit 78deda
        (options, arguments) = getopt.getopt(args, "vd:")
    Packit 78deda
        dirprefix = ""
    Packit 78deda
        verbosity = 0
    Packit 78deda
        for (switch, val) in options:
    Packit 78deda
            if switch == '-d':	# Set HTML input directory
    Packit 78deda
                dirprefix = val
    Packit 78deda
            elif switch == '-v':	# Enable verbose error reporting
    Packit 78deda
                verbosity += 1
    Packit 78deda
        try:
    Packit 78deda
            # First pass: gather locations for crossreferences:
    Packit 78deda
            sectmap = {}
    Packit 78deda
            for file in arguments:
    Packit 78deda
                try: 
    Packit 78deda
                    infp = open(os.path.join(dirprefix, file))
    Packit 78deda
                except:
    Packit 78deda
                    sys.stderr.write("makeman: can't open %s\n" % file)
    Packit 78deda
                    continue
    Packit 78deda
                indoc = infp.read()
    Packit 78deda
                infp.close()
    Packit 78deda
                namere = re.compile("

    (.*)

    ", re.I)
    Packit 78deda
                namematch = namere.search(indoc)
    Packit 78deda
                titlere = re.compile("<TITLE>(.*)</TITLE>", re.I)
    Packit 78deda
                titlematch = titlere.search(indoc)
    Packit 78deda
                if not namematch:
    Packit 78deda
                    raise LiftException("name missing from %s" % file)
    Packit 78deda
                if not titlematch:
    Packit 78deda
                    raise LiftException("title missing from %s" % file)
    Packit 78deda
                else:
    Packit 78deda
                    title = titlematch.group(1)
    Packit 78deda
                    name = titlematch.group(1)
    Packit 78deda
                meta = re.compile('(?i)<META NAME="manual_section" CONTENT="([0-9])">')
    Packit 78deda
                match = meta.search(indoc)
    Packit 78deda
                if match:
    Packit 78deda
                    section = int(match.group(1))
    Packit 78deda
                    sectmap[title] = sectmap[file] = sectmap[name] = section
    Packit 78deda
                else:
    Packit 78deda
                    sectmap[title] = sectmap[file] = sectmap[name] = 1
    Packit 78deda
                hr = re.compile("(?i)
    ")
    Packit 78deda
                firsthr = hr.search(indoc)
    Packit 78deda
                if firsthr and hr.search(indoc[firsthr.start(0)+4:]):
    Packit 78deda
                    LiftException("%s has two 
    tags!" % file)
    Packit 78deda
            # Second pass: do formatting
    Packit 78deda
            for file in arguments:
    Packit 78deda
                try: 
    Packit 78deda
                    infp = open(os.path.join(dirprefix, file))
    Packit 78deda
                except:
    Packit 78deda
                    sys.stderr.write("makeman: can't open %s\n" % file)
    Packit 78deda
                    continue
    Packit 78deda
                indoc = infp.read()
    Packit 78deda
                infp.close()
    Packit 78deda
                tempfile = file + ".~%s-%d~" % (name, os.getpid())
    Packit 78deda
                try:
    Packit 78deda
                    outfp = open(tempfile, "w")
    Packit 78deda
                except OSError:
    Packit 78deda
                    sys.stderr.write("%s: can't open tempfile" % name)
    Packit 78deda
                    return True
    Packit 78deda
                try:
    Packit 78deda
                    if verbosity:
    Packit 78deda
                        sys.stderr.write("makeman: %s\n" % file)
    Packit 78deda
                    outdoc = makeman(name, file, indoc)
    Packit 78deda
                except:
    Packit 78deda
                    os.remove(tempfile)
    Packit 78deda
                    raise
    Packit 78deda
                if outdoc == indoc:
    Packit 78deda
                    os.remove(tempfile)
    Packit 78deda
                if outdoc is None:
    Packit 78deda
                    continue
    Packit 78deda
                else:
    Packit 78deda
                    outfp.write(outdoc)
    Packit 78deda
                    outfp.close()	# under Windows you can't rename an open file
    Packit 78deda
                    stem = file[:file.find(".")]
    Packit 78deda
                    os.rename(tempfile, stem + "." + repr(sectmap[file]))
    Packit 78deda
        except LiftException as e:
    Packit 78deda
            mainerr.write("makeman: " + e.message + "\n")
    Packit 78deda
            return e.retval
    Packit 78deda
        except IOError as e:
    Packit 78deda
            mainerr.write("makeman: file I/O error: %s\n" % e)
    Packit 78deda
            return 3
    Packit 78deda
        except KeyboardInterrupt:
    Packit 78deda
            mainerr.write("makeman: bailing out...\n")
    Packit 78deda
            return 4
    Packit 78deda
        except:
    Packit 78deda
            if verbosity:
    Packit 78deda
                raise
    Packit 78deda
            else:
    Packit 78deda
                mainerr.write("makeman: internal error!\n")
    Packit 78deda
                return 5
    Packit 78deda
    Packit 78deda
    if __name__ == "__main__":
    Packit 78deda
        # Run the main sequence
    Packit 78deda
        raise SystemExit(main(sys.argv[1:]))
    Packit 78deda
    Packit 78deda
    # The following sets edit modes for GNU EMACS
    Packit 78deda
    # Local Variables:
    Packit 78deda
    # mode:python
    Packit 78deda
    # End: