Tree - source-git/python-lxml - CentOS Git server

source-git / python-lxml

Files

Commit: d9acb67c9b615a0ad85ff8316fa45a8f0c5dd043
Blob Blame History Raw
# The script builds the LaTeX documentation.
# Testing:
#    python mklatex.py latex .. 1.0

from docstructure import SITE_STRUCTURE, BASENAME_MAP
import os, shutil, re, sys, datetime

TARGET_FILE = "lxmldoc.tex"

RST2LATEX_OPTIONS = " ".join([
#    "--no-toc-backlinks",
    "--strip-comments",
    "--language en",
#    "--date",
    "--use-latex-footnotes",
    "--use-latex-citations",
    "--use-latex-toc",
    "--font-encoding=T1",
    "--output-encoding=utf-8",
    "--input-encoding=utf-8",
    "--graphicx-option=pdftex",
    ])

htmlnsmap = {"h" : "http://www.w3.org/1999/xhtml"}

replace_invalid = re.compile(r'[-_/.\s\\]').sub
replace_content = re.compile("\{[^\}]*\}").sub

replace_epydoc_macros = re.compile(r'(,\s*amssymb|dvips\s*,\s*)').sub
replace_rst_macros = re.compile(r'(\\usepackage\{color}|\\usepackage\[[^]]*]\{hyperref})').sub

BASENAME_MAP = BASENAME_MAP.copy()
BASENAME_MAP.update({'api' : 'lxmlapi'})

# LaTeX snippets

DOCUMENT_CLASS = r"""
\documentclass[10pt,english]{report}
\usepackage[a4paper]{geometry}
\usepackage{tabularx}
\usepackage{ifthen}
\usepackage[pdftex]{graphicx}
\parindent0pt
\parskip1ex

%%% Fallback definitions for Docutils-specific commands

% providelength (provide a length variable and set default, if it is new)
\providecommand*{\DUprovidelength}[2]{
  \ifthenelse{\isundefined{#1}}{\newlength{#1}\setlength{#1}{#2}}{}
}

% docinfo (width of docinfo table)
\DUprovidelength{\DUdocinfowidth}{0.9\textwidth}

% titlereference role
\providecommand*{\DUroletitlereference}[1]{\textsl{#1}}

"""

PYGMENTS_IMPORT = r"""
\usepackage{fancyvrb}
\input{_part_pygments.tex}
"""

EPYDOC_IMPORT = r"""
\input{_part_epydoc.tex}
"""

def write_chapter(master, title, filename):
    filename = os.path.join(os.path.dirname(filename),
                            "_part_%s" % os.path.basename(filename))
    master.write(r"""
\chapter{%s}
\label{%s}
\input{%s}
""" % (title, filename, filename))


# the program ----

def rest2latex(script, source_path, dest_path):
    command = ('%s %s %s  %s > %s' %
               (sys.executable, script, RST2LATEX_OPTIONS,
                source_path, dest_path))
    os.system(command)

def build_pygments_macros(filename):
    from pygments.formatters import LatexFormatter
    text = LatexFormatter().get_style_defs()
    with open(filename, "w") as f:
        f.write(text)
        f.write('\n')

def copy_epydoc_macros(src, dest, existing_header_lines):
    doc = open(src, 'r')
    out = open(dest, "w")
    for line in doc:
        if line.startswith('%% generator') \
                or line.startswith('% generated by ') \
                or '\\begin{document}' in line \
                or '\\makeindex' in line:
            break
        if line.startswith('%') or \
                r'\documentclass' in line or \
                r'\makeindex' in line or \
                r'{inputenc}' in line:
            continue
        if line.startswith(r'\usepackage'):
            if line in existing_header_lines:
                continue
            if '{hyperref}' in line:
                line = line.replace('black', 'blue')
        out.write( replace_epydoc_macros('', line) )
    out.close()
    doc.close()

def noop(input):
    return input

counter_no = 0

def tex_postprocess(src_path, dest_path, want_header=False, process_line=noop):
    """
    Postprocessing of the LaTeX file generated from ReST.

    Reads file src_path and saves to dest_path only the true content
    (without the document header and final) - so it is suitable
    to be used as part of the longer document.

    Returns the title of document

    If want_header is set, returns also the document header (as
    the list of lines).
    """
    header = []
    add_header_line = header.append
    global counter_no
    counter_no = counter_no + 1
    counter_text = "listcnt%d" % counter_no

    search_title = re.compile(r'\\title{([^{}]*(?:{[^}]*})*)}').search
    skipping = re.compile(r'(\\end{document}|\\tableofcontents|^%)').search

    with open(src_path) as src:
        src_text = src.read()

    dest = open(dest_path, "w")

    title = search_title(src_text)
    if title:
        # remove any commands from the title
        title = re.sub(r'\\\w+({[^}]*})?', '', title.group(1))

    iter_lines = iter(src_text.splitlines())
    for l in iter_lines:
        l = process_line(l)
        if not l:
            continue
        if want_header:
            add_header_line(replace_rst_macros('', l))
        if l.startswith("\\maketitle"):
            break

    for l in iter_lines:
        l = process_line(l)
        if skipping(l):
            # To-Do minitoc instead of tableofcontents
            continue
        elif "\hypertarget{old-versions}" in l:
            break
        elif "listcnt0" in l:
            l = l.replace("listcnt0", counter_text)
        dest.write(l + '\n')

    dest.close()

    if not title:
        raise Exception("Bueee, no title in %s" % src_path)
    return title, header

def publish(dirname, lxml_path, release):
    if not os.path.exists(dirname):
        os.mkdir(dirname)

    book_title = "lxml %s" % release

    doc_dir = os.path.join(lxml_path, 'doc')
    script = os.path.join(doc_dir, 'rest2latex.py')
    pubkey = os.path.join(doc_dir, 'pubkey.asc')

    shutil.copy(pubkey, dirname)

    # build pygments macros
    build_pygments_macros(os.path.join(dirname, '_part_pygments.tex'))

    # Used in postprocessing of generated LaTeX files
    header = []
    titles = {}

    replace_interdoc_hyperrefs = re.compile(
        r'\\href\{([^/}]+)[.]([^./}]+)\}').sub
    replace_docinternal_hyperrefs = re.compile(
        r'\\href\{\\#([^}]+)\}').sub
    replace_image_paths = re.compile(
        r'^(\\includegraphics{)').sub
    def build_hyperref(match):
        basename, extension = match.groups()
        outname = BASENAME_MAP.get(basename, basename)
        if '#' in extension:
            anchor = extension.split('#')[-1]
            return r"\hyperref[%s]" % anchor
        elif extension != 'html':
            return r'\href{http://lxml.de/%s.%s}' % (
                outname, extension)
        else:
            return r"\hyperref[_part_%s.tex]" % outname
    def fix_relative_hyperrefs(line):
        line = replace_image_paths(r'\1../html/', line)
        if r'\href' not in line:
            return line
        line = replace_interdoc_hyperrefs(build_hyperref, line)
        return replace_docinternal_hyperrefs(r'\hyperref[\1]', line)

    # Building pages
    for section, text_files in SITE_STRUCTURE:
        for filename in text_files:
            if filename.startswith('@'):
                continue
                #page_title = filename[1:]
                #url = href_map[page_title]
                #build_menu_entry(page_title, url, section_head)

            basename = os.path.splitext(os.path.basename(filename))[0]
            basename = BASENAME_MAP.get(basename, basename)
            outname = basename + '.tex'
            outpath = os.path.join(dirname, outname)
            path = os.path.join(doc_dir, filename)

            print("Creating %s" % outname)
            rest2latex(script, path, outpath)

            final_name = os.path.join(dirname, os.path.dirname(outname),
                                      "_part_%s" % os.path.basename(outname))

            title, hd = tex_postprocess(outpath, final_name,
                                        want_header = not header,
                                        process_line=fix_relative_hyperrefs)
            if not header:
                header = hd
            titles[outname] = title

    # integrate generated API docs

    print("Integrating API docs")
    apidocsname = 'api.tex'
    apipath = os.path.join(dirname, apidocsname)
    tex_postprocess(apipath, os.path.join(dirname, "_part_%s" % apidocsname),
                    process_line=fix_relative_hyperrefs)
    copy_epydoc_macros(apipath, os.path.join(dirname, '_part_epydoc.tex'),
                       set(header))

    # convert CHANGES.txt

    print("Integrating ChangeLog")
    find_version_title = re.compile(
        r'(.*\\section\{)([0-9][^\} ]*)\s+\(([^)]+)\)(\}.*)').search
    def fix_changelog(line):
        m = find_version_title(line)
        if m:
            line = "%sChanges in version %s, released %s%s" % m.groups()
        else:
            line = line.replace(r'\subsection{', r'\subsection*{')
        return line

    chgname = 'changes-%s.tex' % release
    chgpath = os.path.join(dirname, chgname)
    rest2latex(script,
               os.path.join(lxml_path, 'CHANGES.txt'),
               chgpath)
    tex_postprocess(chgpath, os.path.join(dirname, "_part_%s" % chgname),
                    process_line=fix_changelog)

    # Writing a master file
    print("Building %s\n" % TARGET_FILE)
    master = open( os.path.join(dirname, TARGET_FILE), "w")
    for hln in header:
        if hln.startswith(r"\documentclass"):
            #hln = hln.replace('article', 'book')
            hln = DOCUMENT_CLASS + EPYDOC_IMPORT
        elif hln.startswith(r"\begin{document}"):
            # pygments and epydoc support
            master.write(PYGMENTS_IMPORT)
        elif hln.startswith(r"\title{"):
            hln = replace_content(
                r'{%s\\\\\\vspace{1cm}\\includegraphics[width=2.5cm]{../html/tagpython-big.png}}' % book_title, hln)
        elif hln.startswith(r"\date{"):
            hln = replace_content(
                r'{%s}' % datetime.date.today().isoformat(), hln)
        elif hln.startswith("pdftitle"):
            hln = replace_content(
                r'{%s}' % book_title, hln)
        master.write(hln + '\n')

    master.write("\\setcounter{page}{2}\n")
    master.write("\\tableofcontents\n")

    for section, text_files in SITE_STRUCTURE:
        master.write("\n\n\\part{%s}\n" % section)
        for filename in text_files:
            if filename.startswith('@'):
                continue
                #print "Not yet implemented: %s" % filename[1:]
                #page_title = filename[1:]
                #url = href_map[page_title]
                #build_menu_entry(page_title, url, section_head)
            else:
                basename = os.path.splitext(os.path.basename(filename))[0]
                basename = BASENAME_MAP.get(basename, basename)
                outname = basename + '.tex'
            write_chapter(master, titles[outname], outname)

    master.write("\\appendix\n")
    master.write("\\begin{appendix}\n")

    write_chapter(master, "Changes", chgname)
    write_chapter(master, "Generated API documentation", apidocsname)

    master.write("\\end{appendix}\n")
    master.write("\\end{document}\n")
                

if __name__ == '__main__':
    publish(sys.argv[1], sys.argv[2], sys.argv[3])
source-git / python-lxml

Source Code

Files