# The script builds the LaTeX documentation.
# Testing:
# python mklatex.py latex .. 1.0
from docstructure import SITE_STRUCTURE, BASENAME_MAP
import os, shutil, re, sys, datetime
TARGET_FILE = "lxmldoc.tex"
RST2LATEX_OPTIONS = " ".join([
# "--no-toc-backlinks",
"--strip-comments",
"--language en",
# "--date",
"--use-latex-footnotes",
"--use-latex-citations",
"--use-latex-toc",
"--font-encoding=T1",
"--output-encoding=utf-8",
"--input-encoding=utf-8",
"--graphicx-option=pdftex",
])
htmlnsmap = {"h" : "http://www.w3.org/1999/xhtml"}
replace_invalid = re.compile(r'[-_/.\s\\]').sub
replace_content = re.compile("\{[^\}]*\}").sub
replace_epydoc_macros = re.compile(r'(,\s*amssymb|dvips\s*,\s*)').sub
replace_rst_macros = re.compile(r'(\\usepackage\{color}|\\usepackage\[[^]]*]\{hyperref})').sub
BASENAME_MAP = BASENAME_MAP.copy()
BASENAME_MAP.update({'api' : 'lxmlapi'})
# LaTeX snippets
DOCUMENT_CLASS = r"""
\documentclass[10pt,english]{report}
\usepackage[a4paper]{geometry}
\usepackage{tabularx}
\usepackage{ifthen}
\usepackage[pdftex]{graphicx}
\parindent0pt
\parskip1ex
%%% Fallback definitions for Docutils-specific commands
% providelength (provide a length variable and set default, if it is new)
\providecommand*{\DUprovidelength}[2]{
\ifthenelse{\isundefined{#1}}{\newlength{#1}\setlength{#1}{#2}}{}
}
% docinfo (width of docinfo table)
\DUprovidelength{\DUdocinfowidth}{0.9\textwidth}
% titlereference role
\providecommand*{\DUroletitlereference}[1]{\textsl{#1}}
"""
PYGMENTS_IMPORT = r"""
\usepackage{fancyvrb}
\input{_part_pygments.tex}
"""
EPYDOC_IMPORT = r"""
\input{_part_epydoc.tex}
"""
def write_chapter(master, title, filename):
filename = os.path.join(os.path.dirname(filename),
"_part_%s" % os.path.basename(filename))
master.write(r"""
\chapter{%s}
\label{%s}
\input{%s}
""" % (title, filename, filename))
# the program ----
def rest2latex(script, source_path, dest_path):
command = ('%s %s %s %s > %s' %
(sys.executable, script, RST2LATEX_OPTIONS,
source_path, dest_path))
os.system(command)
def build_pygments_macros(filename):
from pygments.formatters import LatexFormatter
text = LatexFormatter().get_style_defs()
with open(filename, "w") as f:
f.write(text)
f.write('\n')
def copy_epydoc_macros(src, dest, existing_header_lines):
doc = open(src, 'r')
out = open(dest, "w")
for line in doc:
if line.startswith('%% generator') \
or line.startswith('% generated by ') \
or '\\begin{document}' in line \
or '\\makeindex' in line:
break
if line.startswith('%') or \
r'\documentclass' in line or \
r'\makeindex' in line or \
r'{inputenc}' in line:
continue
if line.startswith(r'\usepackage'):
if line in existing_header_lines:
continue
if '{hyperref}' in line:
line = line.replace('black', 'blue')
out.write( replace_epydoc_macros('', line) )
out.close()
doc.close()
def noop(input):
return input
counter_no = 0
def tex_postprocess(src_path, dest_path, want_header=False, process_line=noop):
"""
Postprocessing of the LaTeX file generated from ReST.
Reads file src_path and saves to dest_path only the true content
(without the document header and final) - so it is suitable
to be used as part of the longer document.
Returns the title of document
If want_header is set, returns also the document header (as
the list of lines).
"""
header = []
add_header_line = header.append
global counter_no
counter_no = counter_no + 1
counter_text = "listcnt%d" % counter_no
search_title = re.compile(r'\\title{([^{}]*(?:{[^}]*})*)}').search
skipping = re.compile(r'(\\end{document}|\\tableofcontents|^%)').search
with open(src_path) as src:
src_text = src.read()
dest = open(dest_path, "w")
title = search_title(src_text)
if title:
# remove any commands from the title
title = re.sub(r'\\\w+({[^}]*})?', '', title.group(1))
iter_lines = iter(src_text.splitlines())
for l in iter_lines:
l = process_line(l)
if not l:
continue
if want_header:
add_header_line(replace_rst_macros('', l))
if l.startswith("\\maketitle"):
break
for l in iter_lines:
l = process_line(l)
if skipping(l):
# To-Do minitoc instead of tableofcontents
continue
elif "\hypertarget{old-versions}" in l:
break
elif "listcnt0" in l:
l = l.replace("listcnt0", counter_text)
dest.write(l + '\n')
dest.close()
if not title:
raise Exception("Bueee, no title in %s" % src_path)
return title, header
def publish(dirname, lxml_path, release):
if not os.path.exists(dirname):
os.mkdir(dirname)
book_title = "lxml %s" % release
doc_dir = os.path.join(lxml_path, 'doc')
script = os.path.join(doc_dir, 'rest2latex.py')
pubkey = os.path.join(doc_dir, 'pubkey.asc')
shutil.copy(pubkey, dirname)
# build pygments macros
build_pygments_macros(os.path.join(dirname, '_part_pygments.tex'))
# Used in postprocessing of generated LaTeX files
header = []
titles = {}
replace_interdoc_hyperrefs = re.compile(
r'\\href\{([^/}]+)[.]([^./}]+)\}').sub
replace_docinternal_hyperrefs = re.compile(
r'\\href\{\\#([^}]+)\}').sub
replace_image_paths = re.compile(
r'^(\\includegraphics{)').sub
def build_hyperref(match):
basename, extension = match.groups()
outname = BASENAME_MAP.get(basename, basename)
if '#' in extension:
anchor = extension.split('#')[-1]
return r"\hyperref[%s]" % anchor
elif extension != 'html':
return r'\href{http://lxml.de/%s.%s}' % (
outname, extension)
else:
return r"\hyperref[_part_%s.tex]" % outname
def fix_relative_hyperrefs(line):
line = replace_image_paths(r'\1../html/', line)
if r'\href' not in line:
return line
line = replace_interdoc_hyperrefs(build_hyperref, line)
return replace_docinternal_hyperrefs(r'\hyperref[\1]', line)
# Building pages
for section, text_files in SITE_STRUCTURE:
for filename in text_files:
if filename.startswith('@'):
continue
#page_title = filename[1:]
#url = href_map[page_title]
#build_menu_entry(page_title, url, section_head)
basename = os.path.splitext(os.path.basename(filename))[0]
basename = BASENAME_MAP.get(basename, basename)
outname = basename + '.tex'
outpath = os.path.join(dirname, outname)
path = os.path.join(doc_dir, filename)
print("Creating %s" % outname)
rest2latex(script, path, outpath)
final_name = os.path.join(dirname, os.path.dirname(outname),
"_part_%s" % os.path.basename(outname))
title, hd = tex_postprocess(outpath, final_name,
want_header = not header,
process_line=fix_relative_hyperrefs)
if not header:
header = hd
titles[outname] = title
# integrate generated API docs
print("Integrating API docs")
apidocsname = 'api.tex'
apipath = os.path.join(dirname, apidocsname)
tex_postprocess(apipath, os.path.join(dirname, "_part_%s" % apidocsname),
process_line=fix_relative_hyperrefs)
copy_epydoc_macros(apipath, os.path.join(dirname, '_part_epydoc.tex'),
set(header))
# convert CHANGES.txt
print("Integrating ChangeLog")
find_version_title = re.compile(
r'(.*\\section\{)([0-9][^\} ]*)\s+\(([^)]+)\)(\}.*)').search
def fix_changelog(line):
m = find_version_title(line)
if m:
line = "%sChanges in version %s, released %s%s" % m.groups()
else:
line = line.replace(r'\subsection{', r'\subsection*{')
return line
chgname = 'changes-%s.tex' % release
chgpath = os.path.join(dirname, chgname)
rest2latex(script,
os.path.join(lxml_path, 'CHANGES.txt'),
chgpath)
tex_postprocess(chgpath, os.path.join(dirname, "_part_%s" % chgname),
process_line=fix_changelog)
# Writing a master file
print("Building %s\n" % TARGET_FILE)
master = open( os.path.join(dirname, TARGET_FILE), "w")
for hln in header:
if hln.startswith(r"\documentclass"):
#hln = hln.replace('article', 'book')
hln = DOCUMENT_CLASS + EPYDOC_IMPORT
elif hln.startswith(r"\begin{document}"):
# pygments and epydoc support
master.write(PYGMENTS_IMPORT)
elif hln.startswith(r"\title{"):
hln = replace_content(
r'{%s\\\\\\vspace{1cm}\\includegraphics[width=2.5cm]{../html/tagpython-big.png}}' % book_title, hln)
elif hln.startswith(r"\date{"):
hln = replace_content(
r'{%s}' % datetime.date.today().isoformat(), hln)
elif hln.startswith("pdftitle"):
hln = replace_content(
r'{%s}' % book_title, hln)
master.write(hln + '\n')
master.write("\\setcounter{page}{2}\n")
master.write("\\tableofcontents\n")
for section, text_files in SITE_STRUCTURE:
master.write("\n\n\\part{%s}\n" % section)
for filename in text_files:
if filename.startswith('@'):
continue
#print "Not yet implemented: %s" % filename[1:]
#page_title = filename[1:]
#url = href_map[page_title]
#build_menu_entry(page_title, url, section_head)
else:
basename = os.path.splitext(os.path.basename(filename))[0]
basename = BASENAME_MAP.get(basename, basename)
outname = basename + '.tex'
write_chapter(master, titles[outname], outname)
master.write("\\appendix\n")
master.write("\\begin{appendix}\n")
write_chapter(master, "Changes", chgname)
write_chapter(master, "Generated API documentation", apidocsname)
master.write("\\end{appendix}\n")
master.write("\\end{document}\n")
if __name__ == '__main__':
publish(sys.argv[1], sys.argv[2], sys.argv[3])