diff --git a/lib/dbtexmf/core/commander.py b/lib/dbtexmf/core/commander.py index 6319200..5efa074 100644 --- a/lib/dbtexmf/core/commander.py +++ b/lib/dbtexmf/core/commander.py @@ -1,5 +1,6 @@ import os from subprocess import Popen, PIPE +from io import open class Command: """Contains the needed data to run a command""" @@ -59,7 +60,7 @@ class CommandRunner: if cmd.stdout == "PIPE": stdout = PIPE elif cmd.stdout: - stdout = open(cmd.stdout % kw, "w") + stdout = open(cmd.stdout % kw, "wb") if kw: args = [a % kw for a in cmd.arguments] else: args = cmd.arguments diff --git a/lib/dbtexmf/core/commander.py.enable-python3 b/lib/dbtexmf/core/commander.py.enable-python3 new file mode 100644 index 0000000..6319200 --- /dev/null +++ b/lib/dbtexmf/core/commander.py.enable-python3 @@ -0,0 +1,85 @@ +import os +from subprocess import Popen, PIPE + +class Command: + """Contains the needed data to run a command""" + def __init__(self, args, stdin=None, stdout=None, shell=False): + self.arguments = args + self.stdin = stdin + self.stdout = stdout + self.shell = shell + +class CommandRunner: + """ + Execute the differents registered commands in the specified order, + either independantly or as a pipe process chain if required + """ + def __init__(self, module_name="", log=None): + self.module_name = module_name + self.commands = [] + self.processes = [] + self.log = log + + def info(self, text): + if self.log: self.log.info(text) + + def set_name(self, module_name): + self.module_name = module_name + + def add_command(self, args, stdin=None, stdout=None, shell=False): + command = Command(args, stdin, stdout, shell) + self.commands.append(command) + return command + + def shcmd(self, args): + nargs = [] + for arg in args: + if len(arg.split()) > 1: arg = '"%s"' % arg + nargs.append(arg) + return " ".join(nargs) + + def run(self, kw=None): + if not(self.commands): + return + if not(kw): kw = {} + pipe_top = False + rc = 0 + for cmd in self.commands: + if rc != 0: + break + stdin, stdout = None, None + prev_pipe = None + if cmd.stdin == "PIPE": + if self.processes: + prev_pipe = self.processes[-1] + stdin = prev_pipe.stdout + else: + pipe_top = True + stdin = PIPE + if cmd.stdout == "PIPE": + stdout = PIPE + elif cmd.stdout: + stdout = open(cmd.stdout % kw, "w") + + if kw: args = [a % kw for a in cmd.arguments] + else: args = cmd.arguments + + self.info(" ".join(args)) + # Some commands work only in shell env (e.g. links), so take care + if cmd.shell: + p = Popen(self.shcmd(args), stdin=stdin, stdout=stdout, + shell=True) + else: + p = Popen(args, stdin=stdin, stdout=stdout) + self.processes.append(p) + + if stdin and prev_pipe: + # Close stdout to allow receiving SIGPIPE + prev_pipe.stdout.close() + if stdout != PIPE: + # Wait until the process is finished if not in a pipe chain + rc = p.wait() + if stdout: stdout.close() + + return rc + diff --git a/lib/dbtexmf/core/confparser.py b/lib/dbtexmf/core/confparser.py index bf0b931..fc01786 100644 --- a/lib/dbtexmf/core/confparser.py +++ b/lib/dbtexmf/core/confparser.py @@ -1,11 +1,13 @@ +from __future__ import print_function + import os import sys from xml.etree.ElementTree import ParseError -from xmlparser import XmlConfig -from txtparser import TextConfig -from imagedata import ImageConverterPool, ImageConverter -from imagedata import ImageFormatPool, FormatRule -from imagedata import image_setup +from dbtexmf.core.xmlparser import XmlConfig +from dbtexmf.core.txtparser import TextConfig +from dbtexmf.core.imagedata import ImageConverterPool, ImageConverter +from dbtexmf.core.imagedata import ImageFormatPool, FormatRule +from dbtexmf.core.imagedata import image_setup from dbtexmf.xslt.xsltconf import XsltCommandPool, XsltEngine from dbtexmf.xslt import xslt_setup @@ -76,16 +78,16 @@ class DbtexConfig: self.style_exts = ["", ".xml", ".specs", ".conf"] def warn(self, text): - print >>sys.stderr, text + print(text, file=sys.stderr) def fromfile(self, filename): try: self.fromxmlfile(filename) - except ParseError, e: + except ParseError as e: self.warn("Text configuration files are deprecated. "\ "Use the XML format instead") self.fromtxtfile(filename) - except Exception, e: + except Exception as e: raise e def fromxmlfile(self, filename): diff --git a/lib/dbtexmf/core/confparser.py.enable-python3 b/lib/dbtexmf/core/confparser.py.enable-python3 new file mode 100644 index 0000000..bf0b931 --- /dev/null +++ b/lib/dbtexmf/core/confparser.py.enable-python3 @@ -0,0 +1,117 @@ +import os +import sys +from xml.etree.ElementTree import ParseError +from xmlparser import XmlConfig +from txtparser import TextConfig +from imagedata import ImageConverterPool, ImageConverter +from imagedata import ImageFormatPool, FormatRule +from imagedata import image_setup +from dbtexmf.xslt.xsltconf import XsltCommandPool, XsltEngine +from dbtexmf.xslt import xslt_setup + + +class ConfigFactory: + """ + Build the actual objects that configure the other modules from the XML + parsed configuration, and publish them to the related modules + """ + def __init__(self, xmlconfig): + self.xmlconfig = xmlconfig + + def publish(self): + pool = self.imagedata_converter_config() + if pool: image_setup().converter_pool.prepend_pool(pool) + pool = self.imagedata_format_config() + if pool: image_setup().format_pool.prepend_pool(pool) + pool = self.xslt_config() + if pool: xslt_setup().prepend_pool(pool) + + def imagedata_format_config(self): + rules = self.xmlconfig.get("imagedata").get("formatrule", None) + if not(rules): + return None + pool = ImageFormatPool() + for rul in rules: + fmt = FormatRule(rul.imgsrc, rul.imgdst, rul.docformat, rul.backend) + pool.add_rule(fmt) + return pool + + def imagedata_converter_config(self): + converters = self.xmlconfig.get("imagedata").get("converter", None) + if not(converters): + return None + pool = ImageConverterPool() + for cv in converters: + imc = ImageConverter(cv.imgsrc, cv.imgdst, cv.docformat, cv.backend) + for cmd in cv.commands: + imc.add_command(cmd.args, stdin=cmd.stdin, stdout=cmd.stdout, + shell=cmd.shell) + pool.add_converter(imc) + return pool + + def xslt_config(self): + engines = self.xmlconfig.get("xslt").get("engine", None) + if not(engines): + return None + pool = XsltCommandPool() + for proc in engines: + if not(proc.commands): + continue + eng = XsltEngine(param_format=proc.param_format) + for cmd in proc.commands: + eng.add_command(cmd.args, stdin=cmd.stdin, stdout=cmd.stdout, + shell=cmd.shell) + pool.add_command_run(eng) + return pool + + +class DbtexConfig: + """ + Main configuration object, in charge to parse the configuration files + and populate the setup. + """ + def __init__(self): + self.options = [] + self.paths = [] + self.style_exts = ["", ".xml", ".specs", ".conf"] + + def warn(self, text): + print >>sys.stderr, text + + def fromfile(self, filename): + try: + self.fromxmlfile(filename) + except ParseError, e: + self.warn("Text configuration files are deprecated. "\ + "Use the XML format instead") + self.fromtxtfile(filename) + except Exception, e: + raise e + + def fromxmlfile(self, filename): + xmlconfig = XmlConfig() + xmlconfig.fromfile(filename) + self.options += xmlconfig.options() + factory = ConfigFactory(xmlconfig) + factory.publish() + + def fromtxtfile(self, filename): + txtconfig = TextConfig() + txtconfig.fromfile(filename) + self.options += txtconfig.options() + + def fromstyle(self, style, paths=None): + # First, find the related config file + if not paths: + paths = self.paths + + for p in paths: + for e in self.style_exts: + file = os.path.join(p, style + e) + if os.path.isfile(file): + self.fromfile(file) + return + + # If we are here nothing found + raise ValueError("'%s': style not found" % style) + diff --git a/lib/dbtexmf/core/dbtex.py b/lib/dbtexmf/core/dbtex.py index 92b84f2..b12fafe 100644 --- a/lib/dbtexmf/core/dbtex.py +++ b/lib/dbtexmf/core/dbtex.py @@ -2,16 +2,18 @@ # DbTex base class handling the compilation of a DocBook file via # XSL Transformation and some TeX engine compilation. # +from __future__ import print_function + import sys import os import re import shlex import tempfile import shutil -import urllib import glob import imp from optparse import OptionParser +from io import open from dbtexmf.core.txtparser import texinputs_parse, texstyle_parse from dbtexmf.core.confparser import DbtexConfig @@ -19,6 +21,11 @@ from dbtexmf.xslt import xslt import dbtexmf.core.logger as logger from dbtexmf.core.error import signal_error, failed_exit, dump_stack +try: + from urllib import pathname2url +except ImportError: + from urllib.request import pathname2url + def suffix_replace(path, oldext, newext=""): (root, ext) = os.path.splitext(path) @@ -29,9 +36,9 @@ def suffix_replace(path, oldext, newext=""): def path_to_uri(path): if os.name == 'nt': - return 'file:' + urllib.pathname2url(path).replace('|', ':', 1) + return 'file:' + pathname2url(path).replace('|', ':', 1) else: - return urllib.pathname2url(path) + return pathname2url(path) class Document: @@ -48,24 +55,36 @@ class Document: def has_subext(self, ext): return (os.path.splitext(self.basename)[1] == ext) - def __cmp__(self, other): - """ - Comparaison method mainly to check if the document is in a list - """ - if cmp(self.rawfile, other) == 0: - return 0 - if cmp(self.texfile, other) == 0: - return 0 - if cmp(self.binfile, other) == 0: - return 0 - return -1 + def __eq__(self, other): + if self.rawfile == other: + return True + if self.texfile == other: + return True + if self.binfile == other: + return True + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + return self.__ne__(other) + + def __le__(self, other): + return False + + def __gt__(self, other): + return False + + def __ge__(self, other): + return False class DbTex: USE_MKLISTINGS = 1 xsl_header = \ -""" +u""" @@ -183,7 +202,7 @@ class DbTex: self.flags &= ~what def get_version(self): - f = file(os.path.join(self.topdir, "xsl", "version.xsl")) + f = open(os.path.join(self.topdir, "xsl", "version.xsl"), "rt", encoding="latin-1") versions = re.findall("]*>([^<]*)<", f.read()) f.close() if versions: @@ -196,11 +215,11 @@ class DbTex: self.xslbuild = self.xslmain return - f = file(wrapper, "w") + f = open(wrapper, "wt", encoding="latin-1") f.write(self.xsl_header) - f.write('\n' % path_to_uri(self.xslmain)) + f.write(u'\n' % path_to_uri(self.xslmain)) for xsluser in self.xslusers: - f.write('\n' % path_to_uri(xsluser)) + f.write(u'\n' % path_to_uri(xsluser)) # Reverse to set the latest parameter first (case of overriding) self.xslparams.reverse() @@ -231,8 +250,8 @@ class DbTex: self.listings, opts=self.xslopts, params=param) else: self.log.info("No external file support") - f = file(self.listings, "w") - f.write("\n") + f = open(self.listings, "wt", encoding="latin-1") + f.write(u"\n") f.close() def _single_setup(self): @@ -254,7 +273,7 @@ class DbTex: "Use the working directory") self.outputdir = self.cwdir - f = open(doclist) + f = open(doclist, "rt", encoding="latin-1") books = f.readlines() f.close() @@ -268,11 +287,11 @@ class DbTex: # set list self.log.info("Build the book set list...") xslset = "doclist.xsl" - f = file(xslset, "w") + f = open(xslset, "wt", encoding="latin-1") f.write(self.xsl_header) - f.write('\n' % path_to_uri(self.xslbuild)) - f.write('\n' % path_to_uri(self.xslset)) - f.write('\n') + f.write(u'\n' % path_to_uri(self.xslbuild)) + f.write(u'\n' % path_to_uri(self.xslset)) + f.write(u'\n') f.close() doclist = os.path.join(self.tmpdir, "doclist.txt") @@ -369,7 +388,7 @@ class DbTex: # Need to dump the stdin input, because of the two passes self.input = os.path.join(self.tmpdir, "stdin.xml") - f = open(self.input, "w") + f = open(self.input, "wt", encoding="latin-1") for line in sys.stdin: f.write(line) f.close() @@ -395,15 +414,15 @@ class DbTex: self.update_texinputs() # For easy debug - if self.debug and os.environ.has_key("TEXINPUTS"): + if self.debug and "TEXINPUTS" in os.environ: if os.name != "nt": - f = file("env_tex", "w") - f.write("TEXINPUTS=%s\nexport TEXINPUTS\n" % \ + f = open("env_tex", "wt") + f.write(u"TEXINPUTS=%s\nexport TEXINPUTS\n" % \ os.environ["TEXINPUTS"]) f.close() else: - f = file("env_tex.bat", "w") - f.write("set TEXINPUTS=%s\n" % os.environ["TEXINPUTS"]) + f = open("env_tex.bat", "wt") + f.write(u"set TEXINPUTS=%s\n" % os.environ["TEXINPUTS"]) f.close() # Build the tex file(s), and compile it(them) @@ -544,13 +563,13 @@ class DbTexCommand: if options.format: try: run.set_format(options.format) - except Exception, e: + except Exception as e: failed_exit("Error: %s" % e) # Always set the XSLT (default or not) try: run.set_xslt(options.xslt) - except Exception, e: + except Exception as e: failed_exit("Error: %s" % e) if options.xslopts: @@ -575,7 +594,7 @@ class DbTexCommand: if options.texstyle: try: xslparam, texpath = texstyle_parse(options.texstyle) - except Exception, e: + except Exception as e: failed_exit("Error: %s" % e) run.xslparams.append(xslparam) if texpath: run.texinputs.append(texpath) @@ -630,7 +649,7 @@ class DbTexCommand: if not(os.path.exists(options.tmpdir)): try: os.mkdir(options.tmpdir) - except Exception, e: + except Exception as e: failed_exit("Error: %s" % e) run.tmpdir_user = os.path.abspath(options.tmpdir) @@ -665,7 +684,7 @@ class DbTexCommand: if options.version: version = run.get_version() - print "%s version %s" % (self.prog, version) + print("%s version %s" % (self.prog, version)) if not(args): sys.exit(0) @@ -682,14 +701,14 @@ class DbTexCommand: try: conf.paths = self.get_config_paths() conf.fromstyle(options.style) - except Exception, e: + except Exception as e: failed_exit("Error: %s" % e) if options.config: try: for config in options.config: conf.fromfile(config) - except Exception, e: + except Exception as e: failed_exit("Error: %s" % e) if conf.options: @@ -735,7 +754,7 @@ class DbTexCommand: # Try to buid the file try: run.compile() - except Exception, e: + except Exception as e: signal_error(self, e) failed_exit("Error: %s" % e) diff --git a/lib/dbtexmf/core/dbtex.py.enable-python3 b/lib/dbtexmf/core/dbtex.py.enable-python3 new file mode 100644 index 0000000..92b84f2 --- /dev/null +++ b/lib/dbtexmf/core/dbtex.py.enable-python3 @@ -0,0 +1,741 @@ +# +# DbTex base class handling the compilation of a DocBook file via +# XSL Transformation and some TeX engine compilation. +# +import sys +import os +import re +import shlex +import tempfile +import shutil +import urllib +import glob +import imp +from optparse import OptionParser + +from dbtexmf.core.txtparser import texinputs_parse, texstyle_parse +from dbtexmf.core.confparser import DbtexConfig +from dbtexmf.xslt import xslt +import dbtexmf.core.logger as logger +from dbtexmf.core.error import signal_error, failed_exit, dump_stack + + +def suffix_replace(path, oldext, newext=""): + (root, ext) = os.path.splitext(path) + if ext == oldext: + return (root+newext) + else: + return (path+newext) + +def path_to_uri(path): + if os.name == 'nt': + return 'file:' + urllib.pathname2url(path).replace('|', ':', 1) + else: + return urllib.pathname2url(path) + + +class Document: + """ + Wrapper structure of the files built during the compilation per document + """ + def __init__(self, filename, binfmt="pdf"): + self.inputname = filename + self.basename = os.path.splitext(filename)[0] + self.rawfile = self.basename + ".rtex" + self.texfile = self.basename + ".tex" + self.binfile = self.basename + "." + binfmt + + def has_subext(self, ext): + return (os.path.splitext(self.basename)[1] == ext) + + def __cmp__(self, other): + """ + Comparaison method mainly to check if the document is in a list + """ + if cmp(self.rawfile, other) == 0: + return 0 + if cmp(self.texfile, other) == 0: + return 0 + if cmp(self.binfile, other) == 0: + return 0 + return -1 + + +class DbTex: + USE_MKLISTINGS = 1 + + xsl_header = \ +""" + + \n""" + + def __init__(self, base=""): + self.name = None + self.debug = 0 + self.verbose = 0 + if base: + self.set_base(base) + self.xslopts = [] + self.xslparams = [] + self.xslusers = [] + self.flags = self.USE_MKLISTINGS + self.stdindir = "" + self.inputdir = "" + self.input = "" + self.input_format = "xml" + self.outputdir = "" + self.output = "" + self.format = "pdf" + self.tmpdir = "" + self.tmpdir_user = None + self.fig_paths = [] + self.bib_paths = [] + self.bst_paths = [] + self.texinputs = [] + self.texbatch = 1 + self.texpost = "" + self.fig_format = "" + self.backend = "" + + # Temporary files + self.documents = [] + self.interms = [] + self.included = [] + self.basefile = "" + self.rawfile = "" + + # Engines to use + self.runtex = None + self.rawtex = None + self.xsltproc = None + self.sgmlxml = None + + def set_base(self, topdir): + self.topdir = os.path.realpath(topdir) + self.xslmain = os.path.join(self.topdir, "xsl", "docbook.xsl") + self.xsllist = os.path.join(self.topdir, "xsl", "common", "mklistings.xsl") + self.xslset = os.path.join(self.topdir, "xsl", "common", "mkdoclist.xsl") + self.texdir = os.path.join(self.topdir, "texstyle") + self.texlocal = "" + self.confdir = os.path.join(self.topdir, "confstyle") + + def update_texinputs(self): + sep = os.pathsep + # Get a uniform list of paths (not a list of lists) + ti = [] + for t in self.texinputs: + ti += t.split(sep) + + # Systematically put the package style in TEXINPUTS + ti_opts = ti + [self.texdir + "//"] + + # The original environment variable + texinputs = os.getenv("TEXINPUTS") or "" + ti_env = texinputs.split(sep) + + # Find where system default is in the paths + try: + syspos = ti_env.index('') + except: + # By default system has precedence (i.e. is the first one) + ti_env = [''] + ti_env + syspos = 0 + + ti_before = ti_env[:syspos] + ti_after = ti_env[syspos+1:] + + # Paths passed by options have no precedence over the system + ti_after = ti_opts + ti_after + + # Texlocal has precedence over the standard (and added) paths + if self.texlocal: + ti_before = [ self.texlocal + "//" ] + ti_before + + # Export the whole paths + texinputs = sep.join(ti_before + [''] + ti_after) + os.environ["TEXINPUTS"] = texinputs + + def set_xslt(self, xsltmod=None): + # Set the XSLT to use. Set a default XSLT if none specified. + # One can replace an already defined XSLT if explicitely required. + if not(xsltmod): + if self.xsltproc: + return + xsltmod = "xsltproc" + self.xsltproc = xslt.load(xsltmod) + + def set_backend(self): + # Set the backend to use or retrieve the default one + if self.backend: + self.runtex.set_backend(self.backend) + else: + self.backend = self.runtex.get_backend() + + def set_format(self, format): + if not(format in ("rtex", "tex", "dvi", "ps", "pdf")): + raise ValueError("unknown format '%s'" % format) + else: + self.format = format + + def unset_flags(self, what): + self.flags &= ~what + + def get_version(self): + f = file(os.path.join(self.topdir, "xsl", "version.xsl")) + versions = re.findall("]*>([^<]*)<", f.read()) + f.close() + if versions: + return versions[0].strip() + else: + return "unknown" + + def build_stylesheet(self, wrapper="custom.xsl"): + if not(self.xslparams or self.xslusers): + self.xslbuild = self.xslmain + return + + f = file(wrapper, "w") + f.write(self.xsl_header) + f.write('\n' % path_to_uri(self.xslmain)) + for xsluser in self.xslusers: + f.write('\n' % path_to_uri(xsluser)) + + # Reverse to set the latest parameter first (case of overriding) + self.xslparams.reverse() + for param in self.xslparams: + v = param.split("=", 1) + f.write('' % v[0]) + if len(v) == 2: + f.write('%s' % v[1]) + f.write('\n') + + f.write('\n') + f.close() + self.xslbuild = os.path.realpath(wrapper) + + def make_xml(self): + self.log.info("Build the XML file...") + xmlfile = self.basefile + ".xml" + self.sgmlxml.run(self.input, xmlfile) + self.input = xmlfile + + def make_listings(self): + self.listings = os.path.join(self.tmpdir, "listings.xml") + if (self.flags & self.USE_MKLISTINGS): + self.log.info("Build the listings...") + param = {"current.dir": self.inputdir} + self.xsltproc.use_catalogs = 0 + self.xsltproc.run(self.xsllist, self.input, + self.listings, opts=self.xslopts, params=param) + else: + self.log.info("No external file support") + f = file(self.listings, "w") + f.write("\n") + f.close() + + def _single_setup(self): + # If not specified the output name can be deduced from the input one: + # /path/to/input.{xml|sgml} -> /path/to/input.{tex|pdf|dvi|ps} + if not(self.output): + output = suffix_replace(self.input, "."+self.input_format, + ".%s" % self.format) + self.output = output + + self.documents.append(Document(self.basefile + \ + "." + self.input_format, + binfmt=self.format)) + + def _multiple_setup(self, doclist): + # If not specified, output the chunked books in the working dir + if not(self.outputdir): + self.log.info("No specified output dir (-O). "\ + "Use the working directory") + self.outputdir = self.cwdir + + f = open(doclist) + books = f.readlines() + f.close() + + for b in books: + d = Document(b.strip() + ".tex", binfmt=self.format) + self.documents.append(d) + + def build_doclist(self): + # The stylesheet must include the building stylesheets to have the + # actual parameter values (e.g. set.book.num) needed to give the book + # set list + self.log.info("Build the book set list...") + xslset = "doclist.xsl" + f = file(xslset, "w") + f.write(self.xsl_header) + f.write('\n' % path_to_uri(self.xslbuild)) + f.write('\n' % path_to_uri(self.xslset)) + f.write('\n') + f.close() + + doclist = os.path.join(self.tmpdir, "doclist.txt") + self.xsltproc.use_catalogs = 0 + self.xsltproc.run(xslset, self.input, doclist, opts=self.xslopts) + + # If is missing or is empty, there's no set, or only one + # book from the set is compiled + if os.path.isfile(doclist) and os.path.getsize(doclist) > 0: + self._multiple_setup(doclist) + else: + self._single_setup() + + def make_rawtex(self): + if len(self.documents) == 1: + self.rawfile = self.documents[0].rawfile + else: + self.rawfile = "output.rtex" + + param = {"listings.xml": self.listings, + "current.dir": self.inputdir} + self.xsltproc.use_catalogs = 1 + self.xsltproc.run(self.xslbuild, self.input, + self.rawfile, opts=self.xslopts, params=param) + + # Now, find the intermediate raw files + rawfiles = glob.glob("*.rtex") + for rawfile in rawfiles: + if not(rawfile in self.documents): + d = Document(rawfile, binfmt=self.format) + if d.has_subext(".input"): + self.included.append(d) + else: + self.interms.append(d) + + def make_tex(self): + self.rawtex.set_format(self.format, self.backend) + if self.fig_format: + self.rawtex.fig_format(self.fig_format) + + # By default figures are relative to the source file directory + self.rawtex.set_fig_paths([self.inputdir] + self.fig_paths) + + for d in self.documents + self.interms + self.included: + self.rawtex.parse(d.rawfile, d.texfile) + + def make_bin(self): + self.runtex.texpost = self.texpost + self.runtex.set_fig_paths([self.inputdir] + self.fig_paths) + self.runtex.set_bib_paths([self.inputdir] + self.bib_paths, + [self.inputdir] + self.bst_paths) + + # Build the intermediate files and (after) the main documents + for d in self.interms + self.documents: + self.log.info("Build %s" % d.binfile) + self.runtex.compile(d.texfile, d.binfile, self.format, + batch=self.texbatch) + # Only reinit, to not lose the produced working files + # used to track the dependencies on other documents + self.runtex.reinit() + + def compile(self): + self.set_xslt() + self.set_backend() + self.cwdir = os.getcwd() + self.tmpdir = self.tmpdir_user or tempfile.mkdtemp() + if self.input: + self.inputdir = os.path.dirname(self.input) + else: + self._stdin_write() + os.chdir(self.tmpdir) + try: + donefiles = self._compile() + if len(donefiles) == 1: + shutil.move(donefiles[0], self.output) + self.log.info("'%s' successfully built" % \ + os.path.basename(self.output)) + else: + for d in donefiles: + shutil.move(d, self.outputdir) + donefiles.sort() + self.log.info("Files successfully built in '%s':\n%s" % \ + (self.outputdir, "\n".join(donefiles))) + finally: + os.chdir(self.cwdir) + if not(self.debug): + shutil.rmtree(self.tmpdir) + else: + self.log.info("%s not removed" % self.tmpdir) + + def _stdin_write(self): + # Find out the stdin working directory + self.inputdir = self.stdindir or self.cwdir + + # Need to dump the stdin input, because of the two passes + self.input = os.path.join(self.tmpdir, "stdin.xml") + f = open(self.input, "w") + for line in sys.stdin: + f.write(line) + f.close() + + def _compile(self): + # The temporary output file + tmpout = os.path.basename(self.input) + for s in (" ", "\t"): + tmpout = tmpout.replace(s, "_") + self.basefile = suffix_replace(tmpout, "." + self.input_format) + + # Convert SGML to XML if needed + if self.input_format == "sgml": + self.make_xml() + + # Build the user XSL stylesheet if needed + self.build_stylesheet() + + # List the documents to build + self.build_doclist() + + # Refresh the TEXINPUTS + self.update_texinputs() + + # For easy debug + if self.debug and os.environ.has_key("TEXINPUTS"): + if os.name != "nt": + f = file("env_tex", "w") + f.write("TEXINPUTS=%s\nexport TEXINPUTS\n" % \ + os.environ["TEXINPUTS"]) + f.close() + else: + f = file("env_tex.bat", "w") + f.write("set TEXINPUTS=%s\n" % os.environ["TEXINPUTS"]) + f.close() + + # Build the tex file(s), and compile it(them) + self.make_listings() + self.make_rawtex() + if self.format == "rtex": + return [ d.rawfile for d in self.documents ] + + self.make_tex() + if self.format == "tex": + return [ d.texfile for d in self.documents ] + + self.make_bin() + return [ d.binfile for d in self.documents ] + + +# +# Command entry point +# +class DbTexCommand: + def __init__(self, base): + prog = os.path.splitext(os.path.basename(sys.argv[0]))[0] + usage = "%s [options] file" % prog + parser = OptionParser(usage=usage) + parser.add_option("-b", "--backend", + help="Backend driver to use. The available drivers" + " are 'pdftex' (default), 'dvips' and 'xetex'.") + parser.add_option("-B", "--no-batch", action="store_true", + help="All the tex output is printed") + parser.add_option("-c", "-S", "--config", action="append", + help="Configuration file") + parser.add_option("-C", "--changedir", + help="Standard input working directory") + parser.add_option("-d", "--debug", action="store_true", + help="Debug mode. Keep the temporary directory in " + "which %s actually works" % prog) + parser.add_option("-D", "--dump", action="store_true", + help="Dump error stack (debug purpose)") + parser.add_option("-e", "--indexstyle", + help="Index Style file to pass to makeindex") + parser.add_option("-f", "--fig-format", + help="Input figure format, used when not deduced " + "from figure extension") + parser.add_option("-F", "--input-format", + help="Input file format: sgml, xml. (default=xml)") + parser.add_option("-i", "--texinputs", action="append", + help="Path added to TEXINPUTS") + parser.add_option("-I", "--fig-path", action="append", + dest="fig_paths", metavar="FIG_PATH", + help="Additional lookup path of the figures") + parser.add_option("-l", "--bst-path", action="append", + dest="bst_paths", metavar="BST_PATH", + help="Bibliography style file path") + parser.add_option("-L", "--bib-path", action="append", + dest="bib_paths", metavar="BIB_PATH", + help="BibTeX database path") + parser.add_option("-m", "--xslt", + help="XSLT engine to use. (default=xsltproc)") + parser.add_option("-o", "--output", dest="output", + help="Output filename. " + "When not used, the input filename " + "is used, with the suffix of the output format") + parser.add_option("-O", "--output-dir", + help="Output directory for the built books." + " When not defined, the current working " + "directory is used. Option used only for " + "a document having a ") + parser.add_option("-p", "--xsl-user", action="append", + help="XSL user configuration file to use") + parser.add_option("-P", "--param", dest="xslparams", + action="append", metavar="PARAM=VALUE", + help="Set an XSL parameter value from command line") + parser.add_option("-q", "--quiet", action="store_true", + help="Less verbose, showing only error messages") + parser.add_option("-r", "--texpost", metavar="SCRIPT", + help="Script called at the very end of the tex " + "compilation. Its role is to modify the tex file " + "or one of the compilation file before the last " + "round.") + parser.add_option("-s", "--texstyle", metavar="STYFILE", + help="Latex style to apply. It can be a package name, or " + "directly a package path that must ends with " + "'.sty'") + parser.add_option("-t", "--type", dest="format", + help="Output format. Available formats:\n" + "tex, dvi, ps, pdf (default=pdf)") + parser.add_option("--dvi", action="store_true", dest="format_dvi", + help="DVI output. Equivalent to -tdvi") + parser.add_option("--pdf", action="store_true", dest="format_pdf", + help="PDF output. Equivalent to -tpdf") + parser.add_option("--ps", action="store_true", dest="format_ps", + help="PostScript output. Equivalent to -tps") + parser.add_option("-T", "--style", + help="Predefined output style") + parser.add_option("--tmpdir", + help="Temporary working directory to use (for debug only)") + parser.add_option("-v", "--version", action="store_true", + help="Print the %s version" % prog) + parser.add_option("-V", "--verbose", action="store_true", + help="Verbose mode, showing the running commands") + parser.add_option("-x", "--xslt-opts", dest="xslopts", + action="append", metavar="XSLT_OPTIONS", + help="Arguments directly passed to the XSLT engine") + parser.add_option("-X", "--no-external", action="store_true", + help="Disable the external text file support used for " + "some callout processing") + + self.parser = parser + self.base = base + self.prog = prog + # The actual engine to use is unknown + self.run = None + + def load_plugin(self, pathname): + moddir, modname = os.path.split(pathname) + try: + filemod, path, descr = imp.find_module(modname, [moddir]) + except ImportError: + try: + filemod, path, descr = imp.find_module(modname) + except ImportError: + failed_exit("Error: '%s' module not found" % modname) + mod = imp.load_module(modname, filemod, path, descr) + filemod.close() + return mod + + def run_setup(self, options): + run = self.run + + if not(options.format): + if options.format_pdf: + options.format = "pdf" + elif options.format_ps: + options.format = "ps" + elif options.format_dvi: + options.format = "dvi" + + if options.format: + try: + run.set_format(options.format) + except Exception, e: + failed_exit("Error: %s" % e) + + # Always set the XSLT (default or not) + try: + run.set_xslt(options.xslt) + except Exception, e: + failed_exit("Error: %s" % e) + + if options.xslopts: + for o in options.xslopts: + run.xslopts += shlex.split(o) + + if options.xslparams: + run.xslparams += options.xslparams + + if options.debug: + run.debug = options.debug + + if options.fig_paths: + run.fig_paths += [os.path.realpath(p) for p in options.fig_paths] + + if options.bib_paths: + run.bib_paths += [os.path.realpath(p) for p in options.bib_paths] + + if options.bst_paths: + run.bst_paths += [os.path.realpath(p) for p in options.bst_paths] + + if options.texstyle: + try: + xslparam, texpath = texstyle_parse(options.texstyle) + except Exception, e: + failed_exit("Error: %s" % e) + run.xslparams.append(xslparam) + if texpath: run.texinputs.append(texpath) + + if options.indexstyle: + run.runtex.index_style = os.path.abspath(options.indexstyle) + + if options.texinputs: + for texinputs in options.texinputs: + run.texinputs += texinputs_parse(texinputs) + + if options.fig_format: + run.fig_format = options.fig_format + + if options.input_format: + run.input_format = options.input_format + + if options.no_batch: + run.texbatch = 0 + + if options.backend: + run.backend = options.backend + + if options.xsl_user: + for xfile in options.xsl_user: + xsluser = os.path.realpath(xfile) + if not(os.path.isfile(xsluser)): + failed_exit("Error: '%s' does not exist" % options.xsl_user) + run.xslusers.append(xsluser) + + if options.texpost: + is_plugin = options.texpost.startswith("plugin:") + if is_plugin: + path = self.load_plugin(options.texpost[len("plugin:"):]) + else: + path = os.path.realpath(options.texpost) + if not(os.path.isfile(path)): + failed_exit("Error: '%s' does not exist" % options.texpost) + run.texpost = path + + if options.no_external: + run.unset_flags(run.USE_MKLISTINGS) + + if options.verbose: + run.verbose = options.verbose + + if options.quiet: + run.verbose = logger.QUIET + run.xslparams.append("output.quietly=1") + + if options.tmpdir: + if not(os.path.exists(options.tmpdir)): + try: + os.mkdir(options.tmpdir) + except Exception, e: + failed_exit("Error: %s" % e) + run.tmpdir_user = os.path.abspath(options.tmpdir) + + if options.dump: + dump_stack() + + def get_config_paths(self): + # Allows user directories where to look for configuration files + paths = [os.getcwd()] + paths.append(os.path.expanduser(os.path.join("~", "."+self.prog))) + + # Unix specific system-wide config files + if "posix" in sys.builtin_module_names: + paths.append(os.path.join("/etc", self.prog)) + + # Last but not least, the tool config dir + paths.append(self.run.confdir) + + # Optionally the paths from an environment variable + conf_paths = os.getenv("DBLATEX_CONFIG_FILES") + if not(conf_paths): + return paths + + paths += conf_paths.split(os.pathsep) + return paths + + def main(self): + (options, args) = self.parser.parse_args() + + run = self.run + parser = self.parser + + if options.version: + version = run.get_version() + print "%s version %s" % (self.prog, version) + if not(args): + sys.exit(0) + + # At least the input file is expected + if not(args): + parser.parse_args(args=["-h"]) + + # Load the specified configurations + conf = DbtexConfig() + if options.dump: + dump_stack() + + if options.style: + try: + conf.paths = self.get_config_paths() + conf.fromstyle(options.style) + except Exception, e: + failed_exit("Error: %s" % e) + + if options.config: + try: + for config in options.config: + conf.fromfile(config) + except Exception, e: + failed_exit("Error: %s" % e) + + if conf.options: + options2, args2 = parser.parse_args(conf.options) + self.run_setup(options2) + + # Now apply the command line setup + self.run_setup(options) + + # Verbose mode + run.log = logger.logger(self.prog, run.verbose) + + # Data from standard input? + if args[0] == "-": + if not(options.output): + failed_exit("Error: -o expected when input from stdin") + input = "" + if options.changedir: + run.stdindir = os.path.realpath(options.changedir) + else: + input = os.path.realpath(args[0]) + + # Output file in case of single document (main case) + if not(options.output): + output = None + else: + output = os.path.realpath(options.output) + + # Output directory in case of chunked books (from a set) + if not(options.output_dir): + outputdir = None + else: + # Check the output dir is OK + outputdir = os.path.realpath(options.output_dir) + if not(os.path.isdir(outputdir)): + failed_exit("Error: '%s' is not a directory" %\ + options.output_dir) + + run.input = input + run.output = output + run.outputdir = outputdir + + # Try to buid the file + try: + run.compile() + except Exception, e: + signal_error(self, e) + failed_exit("Error: %s" % e) + diff --git a/lib/dbtexmf/core/error.py b/lib/dbtexmf/core/error.py index 4bf44ee..2d71599 100644 --- a/lib/dbtexmf/core/error.py +++ b/lib/dbtexmf/core/error.py @@ -4,6 +4,8 @@ # error handler. # - A general API. # +from __future__ import print_function + import sys import traceback @@ -49,7 +51,7 @@ def signal_error(*args, **kwargs): def failure_track(msg): global _dump_stack - print >>sys.stderr, (msg) + print((msg), file=sys.stderr) if _dump_stack: traceback.print_exc() diff --git a/lib/dbtexmf/core/error.py.enable-python3 b/lib/dbtexmf/core/error.py.enable-python3 new file mode 100644 index 0000000..4bf44ee --- /dev/null +++ b/lib/dbtexmf/core/error.py.enable-python3 @@ -0,0 +1,63 @@ +# +# Dblatex Error Handler wrapper providing: +# - The ErrorHandler class definition, that must be the parent of any actual +# error handler. +# - A general API. +# +import sys +import traceback + +class ErrorHandler(object): + """ + Object in charge to handle any error occured during the dblatex + transformation process. The first mandatory argument is the + that signaled the error. + """ + def __init__(self): + pass + + def signal(self, object, *args, **kwargs): + failure_track("Unexpected error occured") + + +_current_handler = None +_dump_stack = False + + +# +# Dblatex Error Handler API +# +# In a complex use of the API, a locking mechanism (thread.lock) should +# be used. The current implementation assumes that setup is done before +# any get(). +# +def get_errhandler(): + global _current_handler + # If nothing set, use a default handler that does nothing + if not(_current_handler): + _current_handler = ErrorHandler() + return _current_handler + +def set_errhandler(handler): + global _current_handler + if not(isinstance(handler, ErrorHandler)): + raise ValueError("%s is not an ErrorHandler" % handler) + _current_handler = handler + +def signal_error(*args, **kwargs): + get_errhandler().signal(*args, **kwargs) + +def failure_track(msg): + global _dump_stack + print >>sys.stderr, (msg) + if _dump_stack: + traceback.print_exc() + +def failed_exit(msg, rc=1): + failure_track(msg) + sys.exit(rc) + +def dump_stack(): + global _dump_stack + _dump_stack = True + diff --git a/lib/dbtexmf/core/imagedata.py b/lib/dbtexmf/core/imagedata.py index ebbd5c7..d01d93a 100644 --- a/lib/dbtexmf/core/imagedata.py +++ b/lib/dbtexmf/core/imagedata.py @@ -3,9 +3,13 @@ import os import re import shutil import logging -import urllib from dbtexmf.core.error import signal_error -from commander import CommandRunner +from dbtexmf.core.commander import CommandRunner + +try: + from urllib import url2pathname +except ImportError: + from urllib.request import url2pathname class ObjectFilter: """ @@ -258,8 +262,10 @@ class Imagedata: backend=backend) def convert(self, fig): + fig = fig.decode("utf-8") + # Translate the URL to an actual local path - fig = urllib.url2pathname(fig) + fig = url2pathname(fig) # Always use '/' in path: work even on windows and is required by tex if os.path.sep != '/': fig = fig.replace(os.path.sep, '/') @@ -273,7 +279,7 @@ class Imagedata: return fig # Check if this image has been already converted - if self.converted.has_key(realfig): + if realfig in self.converted: self.log.info("Image '%s' already converted as %s" % \ (fig, self.converted[realfig])) return self.converted[realfig] @@ -306,7 +312,7 @@ class Imagedata: conv.log = self.log conv.convert(realfig, newfig, self.output_format) self.converted[realfig] = newfig - return newfig + return self._path_encode(newfig) def _safe_file(self, fig, realfig, ext): """ @@ -316,7 +322,7 @@ class Imagedata: # Encode to expected output format. If encoding is OK and # supported by tex, just return the encoded path newfig = self._path_encode(fig) - if newfig and newfig.find(" ") == -1: + if newfig and newfig.find(b" ") == -1: return newfig # Added to the converted list @@ -326,17 +332,17 @@ class Imagedata: # Do the copy shutil.copyfile(realfig, newfig) - return newfig + return self._path_encode(newfig) def _path_encode(self, fig): # Actually, only ASCII characters are sure to match filesystem encoding # so let's be conservative - if self.output_encoding == "utf8": - return fig + if self.output_encoding == "utf-8": + return fig.encode("utf-8") try: - newfig = fig.decode("utf8").encode("ascii") + newfig = fig.encode("ascii") except: - newfig = "" + newfig = b"" return newfig def scanformat(self, fig): diff --git a/lib/dbtexmf/core/imagedata.py.enable-python3 b/lib/dbtexmf/core/imagedata.py.enable-python3 new file mode 100644 index 0000000..ebbd5c7 --- /dev/null +++ b/lib/dbtexmf/core/imagedata.py.enable-python3 @@ -0,0 +1,383 @@ +import sys +import os +import re +import shutil +import logging +import urllib +from dbtexmf.core.error import signal_error +from commander import CommandRunner + +class ObjectFilter: + """ + Its purpose is to select some objects from a list according to specified + criterions. It assumes that '*' applied to a criterion means 'any'. + """ + def __init__(self): + pass + + def _re_multi_or_star(self, searched): + if not(searched): + searched = r"\w*" + else: + s = searched.split() + #searched = "|".join(["(?<=[/ ])%s" % p for p in s]) + searched = "|".join(["%s" % p for p in s]) + searched += r"|\*" + return "("+searched+")" + + def select(self, object_list, **filter_criterions): + for criterion, value in filter_criterions.items(): + filter_criterions[criterion] = self._re_multi_or_star(value) + + founds = [] + for obj in object_list: + object_criterions = obj.criterions() + for criterion, re_expr in filter_criterions.items(): + data = object_criterions.get(criterion, "") + m = re.search(re_expr, data) + #print "Lookup2:", criterion, re_expr, data, not(m is None) + if not(m): break + + if m: founds.append(obj) + #print "Lookup2: found %d" % len(founds) + return founds + + +class PoolManager: + def __init__(self): + self._used_pool = None + self._pending_pools = [] + + def set_pool(self, pool): + self._used_pool = pool + for p in self._pending_pools: + pool.preprend(p) + self._pending_pools = [] + + def prepend_pool(self, pool): + if self._used_pool: + self._used_pool.prepend(pool) + else: + self._pending_pools.append(pool) + +class ImageSetup: + """ + Central imagedata setup, filled by default object configurations and + by the XML configuration + """ + def __init__(self): + self.converter_pool = PoolManager() + self.format_pool = PoolManager() + +_image_setup = ImageSetup() + +def image_setup(): + global _image_setup + return _image_setup + + +# +# Objects to convert an image format to another. Actually use the underlying +# tools. +# +class ImageConverter: + _log = logging.getLogger("dblatex") + + def __init__(self, imgsrc, imgdst="", docformat="", backend=""): + self.imgsrc = imgsrc + self.imgdst = imgdst or "*" + self.docformat = docformat or "*" + self.backend = backend or "*" + self.command = CommandRunner(log=self._log) + + def criterions(self): + return { "imgsrc": self.imgsrc, + "imgdst": self.imgdst, + "docformat": self.docformat, + "backend": self.backend } + + def add_command(self, *args, **kwargs): + self.command.add_command(*args, **kwargs) + + def convert(self, input, output, format, doexec=1): + rc = self.command.run(kw={"input": input, "output": output, + "dst": format}) + if rc != 0: signal_error(self, "") + +class ImageConverterPool: + def __init__(self): + self.converters = [] + self._filter = ObjectFilter() + + def add_converter(self, converter): + self.converters.append(converter) + + def extend(self, other): + self.converters.extend(other.converters) + + def prepend(self, other): + self.converters = other.converters + self.converters + + def get_converters(self, imgsrc="", imgdst="", docformat="", backend=""): + founds = self._filter.select(self.converters, + imgsrc=imgsrc, + imgdst=imgdst, + docformat=docformat, + backend=backend) + return founds + + +class ImageConverters(ImageConverterPool): + def __init__(self): + ImageConverterPool.__init__(self) + # Default setup + self.add_converter(GifConverter("gif")) + self.add_converter(EpsConverter("eps", "pdf")) + self.add_converter(EpsConverter("eps", "png")) + self.add_converter(FigConverter("fig", "pdf")) + self.add_converter(FigConverter("fig", "png")) + self.add_converter(SvgConverter("svg")) + + # Register as main pool + image_setup().converter_pool.set_pool(self) + + +class GifConverter(ImageConverter): + def __init__(self, imgsrc, imgdst="", docformat="", backend=""): + ImageConverter.__init__(self, imgsrc="gif bmp", imgdst="*") + self.add_command(["convert", "%(input)s", "%(output)s"]) + +class EpsConverter(ImageConverter): + def __init__(self, imgsrc, imgdst="", docformat="", backend=""): + ImageConverter.__init__(self, imgsrc="eps", imgdst=imgdst) + if imgdst == "pdf": + self.add_command(["epstopdf", "--outfile=%(output)s", "%(input)s"], + shell=True) + elif imgdst == "png": + self.add_command(["convert", "%(input)s", "%(output)s"]) + +class FigConverter(ImageConverter): + def __init__(self, imgsrc, imgdst="", docformat="", backend=""): + ImageConverter.__init__(self, imgsrc="fig", imgdst=imgdst) + self.add_command(["fig2dev", "-L", "eps", "%(input)s"], + stdout="%(output)s") + if imgdst != "eps": + self.conv_next = EpsConverter("eps", imgdst=imgdst) + else: + self.conv_next = None + + def convert(self, input, output, format): + if self.conv_next: + epsfile = "tmp_fig.eps" + else: + epsfile = output + ImageConverter.convert(self, input, epsfile, "eps") + if self.conv_next: + self.conv_next.convert(epsfile, output, format) + +class SvgConverter(ImageConverter): + def __init__(self, imgsrc, imgdst="", docformat="", backend=""): + ImageConverter.__init__(self, imgsrc="svg", imgdst=imgdst) + self.add_command(["inkscape", "-z", "-D", "--export-%(dst)s=%(output)s", + "%(input)s"]) + + +class FormatRule: + def __init__(self, imgsrc="", imgdst="", docformat="", backend=""): + self.imgsrc = imgsrc or "*" + self.imgdst = imgdst or "*" + self.docformat = docformat or "*" + self.backend = backend or "*" + + def criterions(self): + return { "imgsrc": self.imgsrc, + "imgdst": self.imgdst, + "docformat": self.docformat, + "backend": self.backend } + +class ImageFormatPool: + def __init__(self): + self.rules = [] + self._filter = ObjectFilter() + + def add_rule(self, rule): + self.rules.append(rule) + + def prepend(self, other): + self.rules = other.rules + self.rules + + def output_format(self, imgsrc="", docformat="", backend=""): + founds = self._filter.select(self.rules, + imgsrc=imgsrc, + docformat=docformat, + backend=backend) + if founds: + return founds[0].imgdst + else: + return "" + +class ImageFormatRuleset(ImageFormatPool): + def __init__(self): + ImageFormatPool.__init__(self) + # There can be a mismatch between PDF-1.4 images and PDF-1.3 + # document produced by XeTeX + self.add_rule(FormatRule(docformat="pdf", backend="xetex", + imgdst="png")) + self.add_rule(FormatRule(docformat="pdf", imgdst="pdf")) + self.add_rule(FormatRule(docformat="dvi", imgdst="eps")) + self.add_rule(FormatRule(docformat="ps", imgdst="eps")) + + # Register as main pool + image_setup().format_pool.set_pool(self) + +# +# The Imagedata class handles all the image transformation +# process, from the discovery of the actual image involved to +# the conversion process. +# +class Imagedata: + def __init__(self): + self.paths = [] + self.input_format = "png" + self.output_format = "pdf" + self.docformat = "pdf" + self.backend = "" + self.rules = ImageFormatRuleset() + self.converters = ImageConverters() + self.converted = {} + self.log = logging.getLogger("dblatex") + self.output_encoding = "" + + def set_encoding(self, output_encoding): + self.output_encoding = output_encoding + + def set_format(self, docformat, backend): + self.docformat = docformat + self.backend = backend + self.output_format = self.rules.output_format(docformat=docformat, + backend=backend) + + def convert(self, fig): + # Translate the URL to an actual local path + fig = urllib.url2pathname(fig) + + # Always use '/' in path: work even on windows and is required by tex + if os.path.sep != '/': fig = fig.replace(os.path.sep, '/') + + # First, scan the available formats + (realfig, ext) = self.scanformat(fig) + + # No real file found, give up + if not(realfig): + self.log.warning("Image '%s' not found" % fig) + return fig + + # Check if this image has been already converted + if self.converted.has_key(realfig): + self.log.info("Image '%s' already converted as %s" % \ + (fig, self.converted[realfig])) + return self.converted[realfig] + + # No format found, take the default one + if not(ext): + ext = self.input_format + + # Natively supported format? + if (ext == self.output_format): + return self._safe_file(fig, realfig, ext) + + # Try to convert + count = len(self.converted) + newfig = "fig%d.%s" % (count, self.output_format) + + conv = self.converters.get_converters(imgsrc=ext, + imgdst=self.output_format, + backend=self.backend) + if not(conv): + self.log.debug("Cannot convert '%s' to %s" % (fig, + self.output_format)) + # Unknown conversion to do, or nothing to do + return self._safe_file(fig, realfig, ext) + else: + # Take the first converter that does the trick + conv = conv[0] + + # Convert the image and put it in the cache + conv.log = self.log + conv.convert(realfig, newfig, self.output_format) + self.converted[realfig] = newfig + return newfig + + def _safe_file(self, fig, realfig, ext): + """ + Copy the file in the working directory if its path contains characters + unsupported by latex, like spaces. + """ + # Encode to expected output format. If encoding is OK and + # supported by tex, just return the encoded path + newfig = self._path_encode(fig) + if newfig and newfig.find(" ") == -1: + return newfig + + # Added to the converted list + count = len(self.converted) + newfig = "figcopy%d.%s" % (count, ext) + self.converted[realfig] = newfig + + # Do the copy + shutil.copyfile(realfig, newfig) + return newfig + + def _path_encode(self, fig): + # Actually, only ASCII characters are sure to match filesystem encoding + # so let's be conservative + if self.output_encoding == "utf8": + return fig + try: + newfig = fig.decode("utf8").encode("ascii") + except: + newfig = "" + return newfig + + def scanformat(self, fig): + (root, ext) = os.path.splitext(fig) + + if (ext): + realfig = self.find(fig) + return (realfig, ext[1:]) + + # Lookup for the best suited available figure + if (self.output_format == "pdf"): + formats = ("png", "pdf", "jpg", "eps", "gif", "fig", "svg") + else: + formats = ("eps", "fig", "pdf", "png", "svg") + + for format in formats: + realfig = self.find("%s.%s" % (fig, format)) + if realfig: + self.log.info("Found %s for '%s'" % (format, fig)) + break + + # Maybe a figure with no extension + if not(realfig): + realfig = self.find(fig) + format = "" + + return (realfig, format) + + def find(self, fig): + # First, the obvious absolute path case + if os.path.isabs(fig): + if os.path.isfile(fig): + return fig + else: + return None + + # Then, look for the file in known paths + for path in self.paths: + realfig = os.path.join(path, fig) + if os.path.isfile(realfig): + return realfig + + return None + diff --git a/lib/dbtexmf/core/sgmlxml.py b/lib/dbtexmf/core/sgmlxml.py index 30e6b84..80b28b3 100644 --- a/lib/dbtexmf/core/sgmlxml.py +++ b/lib/dbtexmf/core/sgmlxml.py @@ -7,6 +7,7 @@ import sys import re import logging from subprocess import call +from io import open class Osx: def __init__(self): @@ -20,7 +21,7 @@ class Osx: def replace_entities(self, entfile, mapfile, outfile=None): # Find out the SDATA entities to replace re_ent = re.compile('') - f = open(entfile) + f = open(entfile, "rt", encoding="latin-1") lines = f.readlines() f.close() @@ -37,7 +38,7 @@ class Osx: entpat = "^(%s)\s+[^\s]+\s+0(x[^\s]+)" % "|".join([x for x, y in ents]) re_map = re.compile(entpat) entmap = [] - f = open(mapfile) + f = open(mapfile, "rt", encoding="latin-1") for line in f: entmap += re_map.findall(line.split("#")[0]) f.close() @@ -63,18 +64,18 @@ class Osx: del entdict[ent] if not(outfile): outfile = entfile - f = open(outfile, "w") + f = open(outfile, "wt", encoding="latin-1") f.writelines(nlines) f.close() def run(self, sgmlfile, xmlfile): errfile = "errors.osx" - f = open(xmlfile, "w") + f = open(xmlfile, "wb") rc = call(["osx"] + self.opts + ["-f", errfile, sgmlfile], stdout=f) f.close() if rc != 0: i = 0 - f = open(errfile) + f = open(errfile, "rt") for line in f: sys.stderr.write(line) i += 1 diff --git a/lib/dbtexmf/core/sgmlxml.py.enable-python3 b/lib/dbtexmf/core/sgmlxml.py.enable-python3 new file mode 100644 index 0000000..30e6b84 --- /dev/null +++ b/lib/dbtexmf/core/sgmlxml.py.enable-python3 @@ -0,0 +1,88 @@ +# +# Basic class handling osx() call. It tries to replace the entities by +# the equivalent unicode characters. +# +import os +import sys +import re +import logging +from subprocess import call + +class Osx: + def __init__(self): + self.opts = ["-xlower", + "-xno-nl-in-tag", + "-xempty", + "-xno-expand-internal", + "-xid"] # To have id() working without a DTD + self.log = logging.getLogger("dblatex") + + def replace_entities(self, entfile, mapfile, outfile=None): + # Find out the SDATA entities to replace + re_ent = re.compile('') + f = open(entfile) + lines = f.readlines() + f.close() + + # Trivial case where no entities to map + if not(lines): + return + + ents = [] + for line in lines: + ents += re_ent.findall(line) + self.log.debug("Entities to map: %s" % ents) + + # Now, get their Unicode mapping + entpat = "^(%s)\s+[^\s]+\s+0(x[^\s]+)" % "|".join([x for x, y in ents]) + re_map = re.compile(entpat) + entmap = [] + f = open(mapfile) + for line in f: + entmap += re_map.findall(line.split("#")[0]) + f.close() + self.log.debug("Entity map: %s" % entmap) + + # Replace the entity definitions by their Unicode equivalent + entdict = {} + for ent, uval in entmap: + entdict[ent] = \ + (re.compile('' % ent), + '' % (ent, uval)) + + nlines = [] + for line in lines: + mapped = [] + for ent in entdict: + reg, rep = entdict[ent] + line, n = reg.subn(rep, line) + if n: + mapped.append(ent) + nlines.append(line) + for ent in mapped: + del entdict[ent] + + if not(outfile): outfile = entfile + f = open(outfile, "w") + f.writelines(nlines) + f.close() + + def run(self, sgmlfile, xmlfile): + errfile = "errors.osx" + f = open(xmlfile, "w") + rc = call(["osx"] + self.opts + ["-f", errfile, sgmlfile], stdout=f) + f.close() + if rc != 0: + i = 0 + f = open(errfile) + for line in f: + sys.stderr.write(line) + i += 1 + if i == 10: break + f.close() + raise OSError("osx failed") + + # Now, replace the SDATA entities + sgmlmap = os.path.join(os.path.dirname(__file__), "sgmlent.txt") + self.replace_entities("intEntities.dtf", sgmlmap) + diff --git a/lib/dbtexmf/core/txtparser.py b/lib/dbtexmf/core/txtparser.py index 709d877..cc0c4bd 100644 --- a/lib/dbtexmf/core/txtparser.py +++ b/lib/dbtexmf/core/txtparser.py @@ -3,6 +3,7 @@ # import os import re +from io import open # # Functions used by the config parsers and by the dbtex command parser @@ -86,7 +87,7 @@ class TextConfig: def fromfile(self, file): dir = os.path.dirname(os.path.realpath(file)) - f = open(file) + f = open(file, "rt") for line in f: # Remove the comment @@ -96,7 +97,7 @@ class TextConfig: continue key = m.group(1) value = m.group(2).strip() - if not self.conf_mapping.has_key(key): + if key not in self.conf_mapping: continue o = self.conf_mapping[key] diff --git a/lib/dbtexmf/core/txtparser.py.enable-python3 b/lib/dbtexmf/core/txtparser.py.enable-python3 new file mode 100644 index 0000000..709d877 --- /dev/null +++ b/lib/dbtexmf/core/txtparser.py.enable-python3 @@ -0,0 +1,107 @@ +# +# DbTex configuration parser. Maybe we could use or extend ConfigParser. +# +import os +import re + +# +# Functions used by the config parsers and by the dbtex command parser +# +def texinputs_parse(strpath, basedir=None): + """ + Transform the TEXINPUTS string to absolute normalized paths, + but keep intact the '//' suffix if any. The absolute paths are + computed from current one or from when specified. + """ + paths = [] + for p in strpath.split(os.pathsep): + if not(os.path.isabs(p)): + if not(basedir): + d = os.path.realpath(p) + else: + d = os.path.normpath(os.path.join(basedir, p)) + else: + d = os.path.normpath(p) + if p.endswith("//"): + d += "//" + paths.append(d) + return paths + +def texstyle_parse(texstyle): + sty = os.path.basename(texstyle) + dir = os.path.dirname(texstyle) + if sty.endswith(".sty"): + path = os.path.realpath(dir) + sty = sty[:-4] + if not(os.path.isfile(texstyle)): + raise ValueError("Latex style '%s' not found" % texstyle) + elif (dir): + raise ValueError("Invalid latex style path: missing .sty") + else: + path = "" + return ("latex.style=%s" % sty, path) + + + +class OptMap: + def __init__(self, option): + self.option = option + + def format(self, dir, value): + return ["%s=%s" % (self.option, value)] + +class PathMap(OptMap): + def format(self, dir, value): + if not(os.path.isabs(value)): + value = os.path.normpath(os.path.join(dir, value)) + return OptMap.format(self, dir, value) + +class TexMap(OptMap): + def format(self, dir, value): + paths = texinputs_parse(value, basedir=dir) + return OptMap.format(self, dir, ":".join(paths)) + +class NoneMap(OptMap): + def format(self, dir, value): + return value.split() + + +class TextConfig: + conf_mapping = { + 'TexInputs' : TexMap('--texinputs'), + #'PdfInputs' : OptMap('--pdfinputs'), + 'TexPost' : PathMap('--texpost'), + 'FigPath' : PathMap('--fig-path'), + 'XslParam' : PathMap('--xsl-user'), + 'TexStyle' : OptMap('--texstyle'), + 'Options' : NoneMap('') + } + + def __init__(self): + self._options = [] + self.reparam = re.compile("^\s*([^:=\s]+)\s*:\s*(.*)") + + def options(self): + return self._options + + def fromfile(self, file): + dir = os.path.dirname(os.path.realpath(file)) + f = open(file) + + for line in f: + # Remove the comment + line = line.split("#")[0] + m = self.reparam.match(line) + if not(m): + continue + key = m.group(1) + value = m.group(2).strip() + if not self.conf_mapping.has_key(key): + continue + o = self.conf_mapping[key] + + # The paths can be relative to the config file + self._options += o.format(dir, value) + + f.close() + diff --git a/lib/dbtexmf/core/xmlparser.py b/lib/dbtexmf/core/xmlparser.py index 501b7f7..25b1230 100644 --- a/lib/dbtexmf/core/xmlparser.py +++ b/lib/dbtexmf/core/xmlparser.py @@ -1,7 +1,7 @@ import os import re import xml.etree.ElementTree as ET -from txtparser import texinputs_parse +from dbtexmf.core.txtparser import texinputs_parse class BaseOption: def __init__(self, config, optname): diff --git a/lib/dbtexmf/core/xmlparser.py.enable-python3 b/lib/dbtexmf/core/xmlparser.py.enable-python3 new file mode 100644 index 0000000..501b7f7 --- /dev/null +++ b/lib/dbtexmf/core/xmlparser.py.enable-python3 @@ -0,0 +1,325 @@ +import os +import re +import xml.etree.ElementTree as ET +from txtparser import texinputs_parse + +class BaseOption: + def __init__(self, config, optname): + self.config = config + self.optname = optname + self._value = None + + def optvalue(self): + return self._value + + def get(self, what, default=None): + return None + + def options(self): + value = self.optvalue() + if self.optname and value: + return ["%s=%s" % (self.optname, value)] + else: + return [] + + def fromnode(self, xmlnode): + self._value = xmlnode.text + + def modules(self): + return {} + +class CommandConfig: + def __init__(self, config, type="command"): + self.config = config + self.type = type + self.args = [] + self.stdin = None + self.stdout = None + self.shell = False + + def options(self): + return self.args + + def modules(self): + return {} + + def fromnode(self, xmlnode): + self.stdin = xmlnode.get("input") + self.stdout = xmlnode.get("output") + self.shell = xmlnode.get("shell") + args = (xmlnode.text or "").split() + for arg in xmlnode: + if arg.text: args.append(arg.text) + args.extend((arg.tail or "").split()) + self.args = args + +class TexStyle(BaseOption): + def __init__(self, config, optname): + BaseOption.__init__(self, config, optname) + self.filepath = "" + + def optvalue(self): + return self.filepath + + def fromnode(self, xmlnode): + self.filepath = xmlnode.get("fileref") or xmlnode.get("use") + +class TexPath(BaseOption): + def __init__(self, config, optname): + BaseOption.__init__(self, config, optname) + self.paths = [] + + def optvalue(self): + return os.pathsep.join(self.paths) + + def fromnode(self, xmlnode): + if not(xmlnode.text): return + self.paths = texinputs_parse(xmlnode.text, self.config.basedir) + +class FilePath(BaseOption): + def __init__(self, config, optname): + BaseOption.__init__(self, config, optname) + self.filepath = "" + + def optvalue(self): + return self.filepath + + def fromnode(self, xmlnode): + filepath = xmlnode.get("fileref") + if not(filepath): + return + if not(os.path.isabs(filepath)): + filepath = os.path.normpath(os.path.join(self.config.basedir, + filepath)) + self.filepath = filepath + + +class ModuleConfig(BaseOption): + def __init__(self, config, optname): + BaseOption.__init__(self, config, optname) + self.commands = [] + self.extra_args = None + self.module_name = "" + self.module_file = "" + + def optvalue(self): + return self.module_name or self.module_file + + def modules(self): + if self.module_name: + return {self.module_name: self} + else: + return {} + + def fromnode(self, xmlnode): + ns = { "x": self.config.xmlns } + self._handle_location(xmlnode) + xmlopts = xmlnode.find("x:options", ns) + xmlcmds = xmlnode.find("x:command", ns) + xmlchain = xmlnode.find("x:commandchain", ns) + if not(xmlchain is None): + xmlcmds = xmlchain.findall("x:command", ns) + for cmd in xmlcmds: + args = CommandConfig(self.config) + args.fromnode(cmd) + self.commands.append(args) + elif not(xmlcmds is None): + args = CommandConfig(self.config) + args.fromnode(xmlcmds) + self.commands.append(args) + elif not(xmlopts is None): + # FIXME + self.extra_args = CommandConfig(self.config, type="option") + self.extra_args.fromnode(xmlopts) + + def _handle_location(self, xmlnode): + self.module_name = xmlnode.get("use") + self.module_file = xmlnode.get("fileref") + if not(self.module_name) and self.module_file: + p = FilePath(self.config, "") + p.fromnode(xmlnode) + self.module_file = p.filepath + +class ImageConverterConfig(ModuleConfig): + def __init__(self, config, optname): + ModuleConfig.__init__(self, config, optname) + + def __repr__(self): + return self.module_name + + def fromnode(self, xmlnode): + self.imgsrc = xmlnode.get("src") + self.imgdst = xmlnode.get("dst") + self.docformat = xmlnode.get("docformat") or "*" + self.backend = xmlnode.get("backend") or "*" + ModuleConfig.fromnode(self, xmlnode) + name = "%s/%s/%s/%s" % (self.imgsrc, self.imgdst, + self.docformat, self.backend) + self.module_name = name + +class ImageFormatConfig(BaseOption): + def __init__(self, config, optname): + BaseOption.__init__(self, config, optname) + self.imgsrc = "" + self.imgdst = "" + self.docformat = "" + self.backend = "" + + def fromnode(self, xmlnode): + self.imgsrc = xmlnode.get("src") + self.imgdst = xmlnode.get("dst") + self.docformat = xmlnode.get("docformat") or "*" + self.backend = xmlnode.get("backend") or "*" + +class XsltEngineConfig(ModuleConfig): + def __init__(self, config, optname): + ModuleConfig.__init__(self, config, optname) + + def __repr__(self): + return self.module_name + + def fromnode(self, xmlnode): + self.param_format = xmlnode.get("param-format") + ModuleConfig.fromnode(self, xmlnode) + if not(self.module_name or self.module_file): + self.module_name = "xsltconf" + +class XmlConfigGroup: + node_parsers = {} + + def __init__(self, config): + self.config = config + self.tagname = "" + self.infos = {} + + def get(self, tag, default=""): + if default == "": default = BaseOption(self, "") + return self.infos.get(tag, default) + + def _register(self, xmlnode, info): + tag = self.strip_ns(xmlnode.tag) + taglist = self.infos.get(tag, []) + taglist.append(info) + self.infos[tag] = taglist + + def strip_ns(self, tag): + return self.config.strip_ns(tag) + + def options(self): + opts = [] + for parsers in self.infos.values(): + for parser in parsers: + opts.extend(parser.options()) + return opts + + def modules(self): + mods = {} + for parsers in self.infos.values(): + for parser in parsers: + mods.update(parser.modules()) + return mods + + def fromnode(self, xmlnode): + self.tagname = xmlnode.tag + for child in xmlnode: + found = self.node_parsers.get(self.strip_ns(child.tag)) + if found: + optname, parser_cls = found + parser = parser_cls(self.config, optname) + parser.fromnode(child) + self._register(child, parser) + +class LatexConfig(XmlConfigGroup): + node_parsers = { + "texinputs": ("--texinputs", TexPath), + "bibinputs": ("--bib-path", TexPath), + "bstinputs": ("--bst-path", TexPath), + "texstyle": ("--texstyle", TexStyle), + "indexstyle": ("--indexstyle", FilePath), + "backend": ("--backend", ModuleConfig), + "texpost": ("--texpost", ModuleConfig) + } + +class XsltConfig(XmlConfigGroup): + node_parsers = { + "stylesheet": ("--xsl-user", FilePath), + "engine": ("--xslt", XsltEngineConfig) + } + +class ImageConfig(XmlConfigGroup): + node_parsers = { + "figpath": ("--fig-path", FilePath), + "figformat": ("--fig-format", BaseOption), + "converter": ("", ImageConverterConfig), + "formatrule": ("", ImageFormatConfig) + } + + +class XmlConfig: + """ + Parses an XML configuration file and stores its data in + configuration objects. + """ + node_parsers = { + "latex": LatexConfig, + "xslt": XsltConfig, + "imagedata": ImageConfig, + "options": CommandConfig + } + xmlns = "http://dblatex.sourceforge.net/config" + root_tag = "config" + + def __init__(self): + self.basedir = "" + self.infos = {} + + def _register(self, xmlnode, info): + self.infos[self.strip_ns(xmlnode.tag)] = info + + def get(self, tag, default=""): + if default == "": default = BaseOption(self, "") + return self.infos.get(tag, default) + + def options(self): + opts = [] + for parser in self.infos.values(): + opts.extend(parser.options()) + return opts + + def modules(self): + mods = {} + for parser in self.infos.values(): + mods.update(parser.modules()) + return mods + + def strip_ns(self, tag): + return tag.replace("{%s}" % self.xmlns, "", 1) + + def fromfile(self, filename): + self.basedir = os.path.dirname(os.path.realpath(filename)) + + document = ET.parse(filename) + root = document.getroot() + self._check_root(root.tag) + for child in root: + parser_cls = self.node_parsers.get(self.strip_ns(child.tag)) + if parser_cls: + parser = parser_cls(self) + parser.fromnode(child) + self._register(child, parser) + + def _check_root(self, root): + xmlns, tag = self._split_xmlns(root) + if tag != self.root_tag: + raise ValueError("Expect the XML config root element being '%s'" % \ + self.root_tag) + if xmlns and xmlns != self.xmlns: + raise ValueError("Invalid XML config xmlns: '%s'" % xmlns) + + def _split_xmlns(self, tag): + m = re.match("{([^}]+)}(.*)", tag) + if m: + return m.group(1), m.group(2) + else: + return "", tag + diff --git a/lib/dbtexmf/dblatex/dblatex.py b/lib/dbtexmf/dblatex/dblatex.py index 0e9ae71..e22b965 100644 --- a/lib/dbtexmf/dblatex/dblatex.py +++ b/lib/dbtexmf/dblatex/dblatex.py @@ -7,8 +7,8 @@ import os from dbtexmf.core.sgmlxml import Osx from dbtexmf.core.dbtex import DbTex, DbTexCommand -from rawtex import RawLatex -from runtex import RunLatex +from dbtexmf.dblatex.rawtex import RawLatex +from dbtexmf.dblatex.runtex import RunLatex class DbLatex(DbTex): diff --git a/lib/dbtexmf/dblatex/dblatex.py.enable-python3 b/lib/dbtexmf/dblatex/dblatex.py.enable-python3 new file mode 100644 index 0000000..0e9ae71 --- /dev/null +++ b/lib/dbtexmf/dblatex/dblatex.py.enable-python3 @@ -0,0 +1,46 @@ +# +# DbLatex main class handling the compilation of a DocBook file via +# XSL Transformation and LaTeX compilation. +# +import os + +from dbtexmf.core.sgmlxml import Osx +from dbtexmf.core.dbtex import DbTex, DbTexCommand + +from rawtex import RawLatex +from runtex import RunLatex + + +class DbLatex(DbTex): + + def __init__(self, base=""): + DbTex.__init__(self, base=base) + self.name = "dblatex" + + # Engines to use + self.runtex = RunLatex() + self.runtex.index_style = os.path.join(self.topdir, + "latex", "scripts", "doc.ist") + self.rawtex = RawLatex() + self.sgmlxml = Osx() + + def set_base(self, topdir): + DbTex.set_base(self, topdir) + self.xslmain = os.path.join(self.topdir, "xsl", "latex_book_fast.xsl") + self.xsllist = os.path.join(self.topdir, + "xsl", "common", "mklistings.xsl") + self.texdir = os.path.join(self.topdir, "latex") + self.texlocal = os.path.join(self.topdir, "latex", "style") + self.confdir = os.path.join(self.topdir, "latex", "specs") + + +# +# Command entry point +# +def main(base=""): + command = DbTexCommand(base) + command.run = DbLatex(base=base) + command.main() + +if __name__ == "__main__": + main() diff --git a/lib/dbtexmf/dblatex/grubber/bibtex.py b/lib/dbtexmf/dblatex/grubber/bibtex.py index 7615cfc..296a992 100644 --- a/lib/dbtexmf/dblatex/grubber/bibtex.py +++ b/lib/dbtexmf/dblatex/grubber/bibtex.py @@ -18,11 +18,12 @@ import os, sys from os.path import * import re, string import subprocess +from io import open #from grubber import _ #from grubber import * -from msg import _, msg -from plugins import TexModule +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.plugins import TexModule re_bibdata = re.compile(r"\\bibdata{(?P.*)}") re_citation = re.compile(r"\\citation{(?P.*)}") @@ -111,7 +112,7 @@ class BibTex(TexModule): """ if self.style: old_bst = self.style + ".bst" - if exists(old_bst) and self.doc.sources.has_key(old_bst): + if exists(old_bst) and old_bst in self.doc.sources: del self.doc.sources[old_bst] self.style = style @@ -174,7 +175,7 @@ class BibTex(TexModule): pkg="bibtex") return 1 - blg = open(self.blgfile) + blg = open(self.blgfile, "rt", encoding="latin-1") for line in blg.readlines(): if re_error.search(line): blg.close() @@ -202,12 +203,12 @@ class BibTex(TexModule): auxfiles.append(self.auxfile) for auxname in auxfiles: - aux = open(auxname) + aux = open(auxname, "rt", encoding="latin-1") for line in aux: m = re_citation.match(line) if m: cite = m.group("cite") - if not cites.has_key(cite): + if cite not in cites: last = last + 1 cites[cite] = last continue @@ -358,7 +359,7 @@ class BibTex(TexModule): """ if not exists(self.blgfile): return 0 - log = open(self.blgfile) + log = open(self.blgfile, "rt", encoding="latin-1") line = log.readline() while line != "": if line.startswith("The style file: "): @@ -376,7 +377,7 @@ class BibTex(TexModule): """ if not exists(self.blgfile): return - log = open(self.blgfile) + log = open(self.blgfile, "rt", encoding="latin-1") last_line = "" for line in log: m = re_error.search(line) @@ -400,9 +401,9 @@ class BibTex(TexModule): file = d["file"] if file[-4:] == ".bib": file = file[:-4] - if self.db.has_key(file): + if file in self.db: d["file"] = self.db[file] - elif self.db.has_key(file + ".bib"): + elif file + ".bib" in self.db: d["file"] = self.db[file + ".bib"] yield d last_line = line diff --git a/lib/dbtexmf/dblatex/grubber/bibtex.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/bibtex.py.enable-python3 new file mode 100644 index 0000000..7615cfc --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/bibtex.py.enable-python3 @@ -0,0 +1,415 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +BibTeX support for Rubber + +This module is a special one: it is triggered by the macros \\bibliography and +\\bibliographystyle and not as a package, so the main system knows about it. +The module provides the following commands: + + path = adds to the search path for databases + stylepath = adds to the search path for styles +""" + +# Stop python 2.2 from calling "yield" statements syntax errors. +from __future__ import generators + +import os, sys +from os.path import * +import re, string +import subprocess + +#from grubber import _ +#from grubber import * +from msg import _, msg +from plugins import TexModule + +re_bibdata = re.compile(r"\\bibdata{(?P.*)}") +re_citation = re.compile(r"\\citation{(?P.*)}") +re_undef = re.compile("LaTeX Warning: Citation `(?P.*)' .*undefined.*") + +# The regular expression that identifies errors in BibTeX log files is heavily +# heuristic. The remark is that all error messages end with a text of the form +# "---line xxx of file yyy" or "---while reading file zzz". The actual error +# is either the text before the dashes or the text on the previous line. + +re_error = re.compile( + "---(line (?P[0-9]+) of|while reading) file (?P.*)") + +class BibTex(TexModule): + """ + This class is the module that handles BibTeX in Rubber. It provides the + funcionality required when compiling documents as well as material to + parse blg files for diagnostics. + """ + def __init__ (self, doc, dict, base=None): + """ + Initialize the state of the module and register appropriate functions + in the main process. The extra arugment 'base' can be used to specify + the base name of the aux file, it defaults to the document name. + """ + self.doc = doc + self.env = doc.env + + if not(base): + self.base = doc.src_base + else: + self.base = base + + self.bblfile = self.base + ".bbl" + self.blgfile = self.base + ".blg" + self.auxfile = self.base + ".aux" + +# cwd = self.env.vars["cwd"] +# cwd = "" +# self.bib_path = [cwd] +# if doc.src_path != cwd: +# self.bib_path.append(doc.src_path) +# self.bst_path = [cwd] + self.bib_path = [] + self.bst_path = [] + + self.undef_cites = None + self.used_cites = None + self.style = None + self.set_style("plain") + self.db = {} + self.sorted = 1 + self.run_needed = 0 + + # + # The following method are used to specify the various datafiles that + # BibTeX uses. + # + + def do_path (self, path): + self.bib_path.append(self.doc.abspath(path)) + + def do_stylepath (self, path): + self.bst_path.append(self.doc.abspath(path)) + + def do_sorted (self, mode): + self.sorted = mode in ("true", "yes", "1") + + def add_db (self, name): + """ + Register a bibliography database file. + """ + for dir in self.bib_path: + bib = join(dir, name + ".bib") + if exists(bib): + self.db[name] = bib + self.doc.sources[bib] = DependLeaf(self.env, bib) + self.doc.not_included.append(bib) + return + + def set_style (self, style): + """ + Define the bibliography style used. This method is called when + \\bibliographystyle is found. If the style file is found in the + current directory, it is considered a dependency. + """ + if self.style: + old_bst = self.style + ".bst" + if exists(old_bst) and self.doc.sources.has_key(old_bst): + del self.doc.sources[old_bst] + + self.style = style + for dir in self.bst_path: + new_bst = join(dir, style + ".bst") + if exists(new_bst): + self.bst_file = new_bst + self.doc.sources[new_bst] = DependLeaf(self.env, new_bst) + return + self.bst_file = None + + # + # The following methods are responsible of detecting when running BibTeX + # is needed and actually running it. + # + + def pre_compile (self): + """ + Run BibTeX if needed before the first compilation. This function also + checks if BibTeX has been run by someone else, and in this case it + tells the system that it should recompile the document. + """ + if exists(self.doc.auxfile): + self.used_cites, self.prev_dbs = self.parse_aux() + else: + self.prev_dbs = None + if self.doc.log.lines: + self.undef_cites = self.list_undefs() + + self.run_needed = self.first_run_needed() + if self.doc.must_compile: + # If a LaTeX compilation is going to happen, it is not necessary + # to bother with BibTeX yet. + return 0 + if self.run_needed: + return self.run() + + if (exists(self.bblfile) and + getmtime(self.bblfile) > getmtime(self.doc.logfile)): + self.doc.must_compile = 1 + return 0 + + def first_run_needed (self): + """ + The condition is only on the database files' modification dates, but + it would be more clever to check if the results have changed. + BibTeXing is also needed when the last run of BibTeX failed, and in + the very particular case when the style has changed since last + compilation. + """ + if not exists(self.auxfile): + return 0 + if not exists(self.blgfile): + return 1 + + dtime = getmtime(self.blgfile) + for db in self.db.values(): + if getmtime(db) > dtime: + msg.log(_("bibliography database %s was modified") % db, + pkg="bibtex") + return 1 + + blg = open(self.blgfile) + for line in blg.readlines(): + if re_error.search(line): + blg.close() + msg.log(_("last BibTeXing failed"), pkg="bibtex") + return 1 + blg.close() + + if self.style_changed(): + return 1 + if self.bst_file and getmtime(self.bst_file) > dtime: + msg.log(_("the bibliography style file was modified"), pkg="bibtex") + return 1 + return 0 + + def parse_aux (self): + """ + Parse the aux files and return the list of all defined citations and + the list of databases used. + """ + last = 0 + cites = {} + dbs = [] + auxfiles = [self.doc.auxfile] + if self.auxfile != self.doc.auxfile: + auxfiles.append(self.auxfile) + + for auxname in auxfiles: + aux = open(auxname) + for line in aux: + m = re_citation.match(line) + if m: + cite = m.group("cite") + if not cites.has_key(cite): + last = last + 1 + cites[cite] = last + continue + m = re_bibdata.match(line) + if m: + dbs.extend(m.group("data").split(",")) + aux.close() + dbs.sort() + + if self.sorted: + list = cites.keys() + list.sort() + return list, dbs + else: + list = [(n,c) for (c,n) in cites.items()] + list.sort() + return [c for (n,c) in list], dbs + + def list_undefs (self): + """ + Return the list of all undefined citations. + """ + cites = {} + for line in self.doc.log.lines: + match = re_undef.match(line) + if match: + cites[match.group("cite")] = None + list = cites.keys() + list.sort() + return list + + def post_compile (self): + """ + This method runs BibTeX if needed to solve undefined citations. If it + was run, then force a new LaTeX compilation. + """ + if not self.bibtex_needed(): + msg.log(_("no BibTeXing needed"), pkg="bibtex") + return 0 + return self.run() + + def run (self): + """ + This method actually runs BibTeX with the appropriate environment + variables set. + """ + msg.progress(_("running BibTeX on %s") % self.base) + doc = {} + if len(self.bib_path) != 1: + os.environ["BIBINPUTS"] = string.join(self.bib_path + + [os.getenv("BIBINPUTS", "")], ":") + if len(self.bst_path) != 1: + os.environ["BSTINPUTS"] = string.join(self.bst_path + + [os.getenv("BSTINPUTS", "")], ":") + rc = subprocess.call(["bibtex", self.base], stdout=msg.stdout) + if rc != 0: + msg.error(_("There were errors making the bibliography.")) + return 1 + self.run_needed = 0 + self.doc.must_compile = 1 + return 0 + + def bibtex_needed (self): + """ + Return true if BibTeX must be run. + """ + if self.run_needed: + return 1 + msg.log(_("checking if BibTeX must be run..."), pkg="bibtex") + + newcites, dbs = self.parse_aux() + + # If there was a list of used citations, we check if it has + # changed. If it has, we have to rerun. + + if self.prev_dbs is not None and self.prev_dbs != dbs: + msg.log(_("the set of databases changed"), pkg="bibtex") + self.prev_dbs = dbs + self.used_cites = newcites + self.undef_cites = self.list_undefs() + return 1 + self.prev_dbs = dbs + + # If there was a list of used citations, we check if it has + # changed. If it has, we have to rerun. + + if self.used_cites and newcites != self.used_cites: + msg.log(_("the list of citations changed"), pkg="bibtex") + self.used_cites = newcites + self.undef_cites = self.list_undefs() + return 1 + self.used_cites = newcites + + # If there was a list of undefined citations, we check if it has + # changed. If it has and it is not empty, we have to rerun. + + if self.undef_cites: + new = self.list_undefs() + if new == []: + msg.log(_("no more undefined citations"), pkg="bibtex") + self.undef_cites = new + else: + for cite in new: + if cite in self.undef_cites: + continue + msg.log(_("there are new undefined citations"), pkg="bibtex") + self.undef_cites = new + return 1 + msg.log(_("there is no new undefined citation"), pkg="bibtex") + self.undef_cites = new + return 0 + else: + self.undef_cites = self.list_undefs() + + # At this point we don't know if undefined citations changed. If + # BibTeX has not been run before (i.e. there is no log file) we know + # that it has to be run now. + + if not exists(self.blgfile): + msg.log(_("no BibTeX log file"), pkg="bibtex") + return 1 + + # Here, BibTeX has been run before but we don't know if undefined + # citations changed. + + if self.undef_cites == []: + msg.log(_("no undefined citations"), pkg="bibtex") + return 0 + + if getmtime(self.blgfile) < getmtime(self.doc.logfile): + msg.log(_("BibTeX's log is older than the main log"), pkg="bibtex") + return 1 + + return 0 + + def clean (self): + self.doc.remove_suffixes([".bbl", ".blg"]) + + # + # The following method extract information from BibTeX log files. + # + + def style_changed (self): + """ + Read the log file if it exists and check if the style used is the one + specified in the source. This supposes that the style is mentioned on + a line with the form 'The style file: foo.bst'. + """ + if not exists(self.blgfile): + return 0 + log = open(self.blgfile) + line = log.readline() + while line != "": + if line.startswith("The style file: "): + if line.rstrip()[16:-4] != self.style: + msg.log(_("the bibliography style was changed"), pkg="bibtex") + log.close() + return 1 + line = log.readline() + log.close() + return 0 + + def get_errors (self): + """ + Read the log file, identify error messages and report them. + """ + if not exists(self.blgfile): + return + log = open(self.blgfile) + last_line = "" + for line in log: + m = re_error.search(line) + if m: + # TODO: it would be possible to report the offending code. + if m.start() == 0: + text = string.strip(last_line) + else: + text = string.strip(line[:m.start()]) + line = m.group("line") + if line: line = int(line) + d = { + "pkg": "bibtex", + "kind": "error", + "text": text + } + d.update( m.groupdict() ) + + # BibTeX does not report the path of the database in its log. + + file = d["file"] + if file[-4:] == ".bib": + file = file[:-4] + if self.db.has_key(file): + d["file"] = self.db[file] + elif self.db.has_key(file + ".bib"): + d["file"] = self.db[file + ".bib"] + yield d + last_line = line + log.close() + + +class Module(BibTex): + """ + Module to load to handle a bibtex + """ diff --git a/lib/dbtexmf/dblatex/grubber/bibtopic.py b/lib/dbtexmf/dblatex/grubber/bibtopic.py index 4c1ef9f..af36830 100644 --- a/lib/dbtexmf/dblatex/grubber/bibtopic.py +++ b/lib/dbtexmf/dblatex/grubber/bibtopic.py @@ -1,6 +1,6 @@ -from plugins import TexModule -from bibtex import BibTex +from dbtexmf.dblatex.grubber.plugins import TexModule +from dbtexmf.dblatex.grubber.bibtex import BibTex class BibSect(BibTex): diff --git a/lib/dbtexmf/dblatex/grubber/bibtopic.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/bibtopic.py.enable-python3 new file mode 100644 index 0000000..4c1ef9f --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/bibtopic.py.enable-python3 @@ -0,0 +1,66 @@ + +from plugins import TexModule +from bibtex import BibTex + + +class BibSect(BibTex): + def __init__(self, doc, bibaux, bibfiles, bibstyle): + self.bibfiles = bibfiles.split(",") + self.bibstyle = bibstyle + self.bibaux = bibaux + BibTex.__init__(self, doc, {}, bibaux) + for bib in self.bibfiles: + self.add_db(bib) + if self.bibstyle: + self.set_style(self.bibstyle) + +class BibNull(BibSect): + """ + Null biblio section having no bibfile + """ + def __init__(self, doc, bibaux): + pass + + def pre_compile(self): + return 0 + + def post_compile(self): + return 0 + + +class Bibtopic(TexModule): + def __init__(self, doc, dict): + self.doc = doc + self.btsects = [] + doc.parser.add_hook("begin{btSect}", self.add_sect) + + # If loaded from a sect, register this sect too + if dict["name"] == "begin{btSect}": + self.add_sect(dict) + + def add_sect(self, dict): + bibaux = "%s%d" % (self.doc.src_base, len(self.btsects) + 1) + if dict["arg"]: + btsect = BibSect(self.doc, bibaux, dict["arg"], dict["opt"]) + else: + btsect = BibNull(self.doc, bibaux) + self.btsects.append(btsect) + + def pre_compile(self): + rc = 0 + for bib in self.btsects: + rc += bib.pre_compile() + return rc + + def post_compile(self): + rc = 0 + for bib in self.btsects: + rc += bib.post_compile() + return rc + + +class Module(Bibtopic): + """ + Module to load to handle bibtopic + """ + diff --git a/lib/dbtexmf/dblatex/grubber/dvips.py b/lib/dbtexmf/dblatex/grubber/dvips.py index 83bafa7..43f9326 100644 --- a/lib/dbtexmf/dblatex/grubber/dvips.py +++ b/lib/dbtexmf/dblatex/grubber/dvips.py @@ -13,9 +13,9 @@ import os from os.path import * import subprocess -from msg import _ , msg -from plugins import TexModule -from maker import Depend +from dbtexmf.dblatex.grubber.msg import _ , msg +from dbtexmf.dblatex.grubber.plugins import TexModule +from dbtexmf.dblatex.grubber.maker import Depend class Dep (Depend): def __init__ (self, doc, target, source, node): diff --git a/lib/dbtexmf/dblatex/grubber/dvips.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/dvips.py.enable-python3 new file mode 100644 index 0000000..83bafa7 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/dvips.py.enable-python3 @@ -0,0 +1,62 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +PostScript generation through dvips with Rubber. + +This module has specific support for Omega: when the name of the main compiler +is "Omega" (instead of "TeX" for instance), then "odvips" is used instead of +"dvips". +""" + +import sys +import os +from os.path import * +import subprocess + +from msg import _ , msg +from plugins import TexModule +from maker import Depend + +class Dep (Depend): + def __init__ (self, doc, target, source, node): + self.doc = doc + self.env = doc.env + self.source = source + self.target = target + Depend.__init__(self, doc.env, prods=[target], sources={source: node}) + self.options = [] + if self.doc.engine == "Omega": + self.cmdexec = "odvips" + else: + self.cmdexec = "dvips" + self.options.append("-R0") + + def run (self): + cmd = [self.cmdexec] + msg.progress(_("running %s on %s") % (cmd[0], self.source)) + for opt in self.doc.paper.split(): + cmd.extend(["-t", opt]) + cmd.extend(self.options + ["-o", self.target, self.source]) + msg.debug(" ".join(cmd)) + rc = subprocess.call(cmd, stdout=msg.stdout) + if rc != 0: + msg.error(_("%s failed on %s") % (cmd[0], self.source)) + return 1 + return 0 + +class Module (TexModule): + def __init__ (self, doc, dict): + self.doc = doc + lastdep = doc.env.dep_last() + dvi = lastdep.prods[0] + root, ext = os.path.splitext(dvi) + if ext != ".dvi": + msg.error(_("I can't use dvips when not producing a DVI")) + sys.exit(2) + ps = root + ".ps" + self.dep = Dep(doc, ps, dvi, lastdep) + doc.env.dep_append(self.dep) + + def do_options (self, *args): + self.dep.options.extend(args) + diff --git a/lib/dbtexmf/dblatex/grubber/index.py b/lib/dbtexmf/dblatex/grubber/index.py index 2194b02..564ff8f 100644 --- a/lib/dbtexmf/dblatex/grubber/index.py +++ b/lib/dbtexmf/dblatex/grubber/index.py @@ -38,6 +38,7 @@ this argument, they apply to all indices declared at the point where they occur. """ +import sys import os from os.path import * import re, string @@ -45,9 +46,11 @@ import subprocess import xml.dom.minidom from subprocess import Popen, PIPE -from msg import _, msg -from plugins import TexModule -from util import md5_file +from io import open + +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.plugins import TexModule +from dbtexmf.dblatex.grubber.util import md5_file class Xindy: @@ -133,7 +136,7 @@ class Xindy: # Texindy produces latin-* indexes. Try to find out which one from # the modules loaded by the script (language dependent) re_lang = re.compile("loading module \"lang/.*/(latin[^.-]*)") - logfile = open(logname) + logfile = open(logname, "rt", encoding="latin-1") encoding = "" for line in logfile: m = re_lang.search(line) @@ -145,7 +148,7 @@ class Xindy: return encoding def _index_is_unicode(self): - f = file(self.target, "r") + f = open(self.target, "rb") is_unicode = True for line in f: try: @@ -162,20 +165,20 @@ class Xindy: # with Xindy. If not, the following error is raised by Xindy: # "WARNING: unknown cross-reference-class `hyperindexformat'! (ignored)" # - f = file(self.idxfile, "r") + f = open(self.idxfile, "rt", encoding="latin-1") data = f.read() f.close() data, nsub = self._re_hyperindex.subn(r"\1}{", data) if not(nsub): return msg.debug("Remove %d unsupported 'hyperindexformat' calls" % nsub) - f = file(self.idxfile, "w") + f = open(self.idxfile, "wt", encoding="latin-1") f.write(data) f.close() def _fix_invalid_ranges(self): if not(self.invalid_index_ranges): return - f = open(self.idxfile) + f = open(self.idxfile, "rt", encoding="latin-1") lines = f.readlines() f.close() @@ -199,7 +202,7 @@ class Xindy: skip_lines.reverse() for line_num in skip_lines: del lines[line_num] - f = open(self.idxfile, "w") + f = open(self.idxfile, "wt", encoding="latin-1") f.writelines(lines) f.close() @@ -232,9 +235,11 @@ class Xindy: # Collect the script output, and errors logname = join(dirname(self.target), "xindy.log") - logfile = open(logname, "w") + logfile = open(logname, "wb") p = Popen(cmd, stdout=logfile, stderr=PIPE) errdata = p.communicate()[1] + if isinstance(errdata, bytes): + errdata = errdata.decode(sys.getdefaultencoding()) rc = p.wait() if msg.stdout: msg.stdout.write(errdata) @@ -264,7 +269,7 @@ class Indexentry: """ Index entry wrapper from idxfile. Its role is to detect range anomalies """ - _re_entry = re.compile("\indexentry{(.*)\|([\(\)]?).*}{(\d+)}", re.DOTALL) + _re_entry = re.compile(r"\\indexentry{(.*)\|([\(\)]?).*}{(\d+)}", re.DOTALL) def __init__(self, index_key): self.index_key = index_key @@ -330,7 +335,7 @@ class Makeindex: return cmd def _index_is_unicode(self): - f = file(self.target, "r") + f = open(self.target, "rb") is_unicode = True for line in f: try: @@ -514,7 +519,7 @@ class Module (TexModule): index = self.indices[name] = Index(self.doc, idx, ind, ilg) for cmd in self.defaults: index.command(*cmd) - if self.commands.has_key(name): + if name in self.commands: for cmd in self.commands[name]: index.command(*cmd) @@ -548,9 +553,9 @@ class Module (TexModule): self.defaults.append([cmd, args]) names = indices.keys() for index in names: - if indices.has_key(index): + if index in indices: indices[index].command(cmd, args[1:]) - elif self.commands.has_key(index): + elif index in self.commands: self.commands[index].append([cmd, args]) else: self.commands[index] = [[cmd, args]] diff --git a/lib/dbtexmf/dblatex/grubber/index.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/index.py.enable-python3 new file mode 100644 index 0000000..2194b02 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/index.py.enable-python3 @@ -0,0 +1,568 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2004--2006 +""" +Indexing support with package 'index'. + +This module handles the processing of the document's indices using a tool like +makeindex or xindy. It stores an MD5 sum of the source (.idx) file between two +runs, in order to detect modifications. + +The following directives are provided to specify options for makeindex: + + tool = + Choose which indexing tool should be used. Currently this can be either + "makeindex" (by default) or "xindy". + + language = + Choose the language used for sorting the index (xindy only). + + modules ... = + Specify which modules xindy should use for the index. + + order = + Modify the ordering to be used (makeindex only, supported by xindy with + warnings). The argument must be a space separated list of: + - standard = use default ordering (no options, this is the default) + - german = use German ordering (option "-g") + - letter = use letter instead of word ordering (option "-l") + + path = + Add the specified directory to the search path for styles. + + style = + Use the specified style file. + +They all accept an optional argument first, enclosed in parentheses as in +"index.path (foo,bar) here/", to specify which index they apply to. Without +this argument, they apply to all indices declared at the point where they +occur. +""" + +import os +from os.path import * +import re, string +import subprocess +import xml.dom.minidom + +from subprocess import Popen, PIPE +from msg import _, msg +from plugins import TexModule +from util import md5_file + + +class Xindy: + """ + Xindy command wrapper + """ + def __init__(self, doc, idxfile, target, transcript="", + opts=None, modules=None, + index_lang="", style=""): + self.doc = doc + self.idxfile = idxfile + self.target = target + self.transcript = transcript + self.opts = opts or [] + self.modules = modules or [] + self.index_lang = index_lang + self.path_var = "XINDY_SEARCHPATH" + mapfile = os.path.join(os.path.dirname(__file__), "xindylang.xml") + self.languages = self.map_languages(mapfile) + self._re_hyperindex = re.compile(r"hyperindexformat{\\(.*?)}}{", + re.M|re.DOTALL) + self.invalid_index_ranges = [] + + def map_languages(self, mapfile): + languages = {} + dom_document = xml.dom.minidom.parse(mapfile) + for dom_fontspec in dom_document.getElementsByTagName('map'): + lang = dom_fontspec.getAttribute('lang') + xindylang = dom_fontspec.getAttribute('xindylang') + if xindylang: + languages[lang] = xindylang + dom_document.unlink() + return languages + + def command(self): + cmd = [] + if self.doc.program == "xelatex": + # If raw index is in UTF-8 the texindy command cannot be used + cmd.extend(["xindy", "-M", "texindy", "-C", self.doc.encoding]) + # To behave even more like texindy + cmd.extend(["-q", "-M", "page-ranges"]) + else: + # Call texindy to handle LICR encoded raw index + # Equivalent to xindy arguments (beware of module order): + # "xindy", "-M", "tex/inputenc/latin", + # "-M", "texindy", "-C", "latin", + # "-I", "latex" + cmd.extend(["texindy"]) + + # Specific output files? + if self.target: + cmd.extend(["-o", self.target]) + if self.transcript: + cmd.extend(["-t", self.transcript]) + + # Find out which language to use + if self.index_lang: + lang = self.index_lang + elif self.doc.lang: + lang = self.languages.get(self.doc.lang) + if not(lang): + msg.warn(_("xindy: lang '%s' not found" % \ + self.doc.lang), pkg="index") + else: + msg.log(_("xindy: lang '%s' mapped to '%s'" % \ + (self.doc.lang, lang)), pkg="index") + else: + lang = None + + if lang: + cmd.extend(["-L", lang]) + + for mod in self.modules: + cmd.extend(["-M", mod]) + + if self.opts: + cmd.extend(self.opts) + + cmd.append(self.idxfile) + return cmd + + def _find_index_encoding(self, logname): + # Texindy produces latin-* indexes. Try to find out which one from + # the modules loaded by the script (language dependent) + re_lang = re.compile("loading module \"lang/.*/(latin[^.-]*)") + logfile = open(logname) + encoding = "" + for line in logfile: + m = re_lang.search(line) + if m: + encoding = m.group(1) + break + + logfile.close() + return encoding + + def _index_is_unicode(self): + f = file(self.target, "r") + is_unicode = True + for line in f: + try: + line.decode("utf8") + except: + is_unicode = False + break + f.close() + return is_unicode + + def _sanitize_idxfile(self): + # + # Remove the 'hyperindexformat' of the new hyperref that makes a mess + # with Xindy. If not, the following error is raised by Xindy: + # "WARNING: unknown cross-reference-class `hyperindexformat'! (ignored)" + # + f = file(self.idxfile, "r") + data = f.read() + f.close() + data, nsub = self._re_hyperindex.subn(r"\1}{", data) + if not(nsub): + return + msg.debug("Remove %d unsupported 'hyperindexformat' calls" % nsub) + f = file(self.idxfile, "w") + f.write(data) + f.close() + + def _fix_invalid_ranges(self): + if not(self.invalid_index_ranges): return + f = open(self.idxfile) + lines = f.readlines() + f.close() + + # Track the lines with the wrong index ranges + for i, line in enumerate(lines): + for entry in self.invalid_index_ranges: + if entry.index_key in line: + entry.add_line(i, line) + + # Summary of the lines to remove in order to fix the ranges + skip_lines = [] + for entry in self.invalid_index_ranges: + skip_lines.extend(entry.skip_lines) + entry.reinit() + if not(skip_lines): return + + # Remove the lines starting from the end to always have valid line num + msg.debug("xindy: lines to remove from %s to fix ranges: %s" %\ + (self.idxfile, skip_lines)) + skip_lines.sort() + skip_lines.reverse() + for line_num in skip_lines: + del lines[line_num] + f = open(self.idxfile, "w") + f.writelines(lines) + f.close() + + def _detect_invalid_ranges(self, data): + # Look for warnings like this: + # + # WARNING: Found a :close-range in the index that wasn't opened before! + # Location-reference is 76 in keyword (Statute of Anne (1710)) + # I'll continue and ignore this. + # + # Do it only once on the first run to find wrong indexes. + if (self.invalid_index_ranges): return + blocks = re.split("(WARNING:|ERROR:)", data, re.M) + check_next_block = False + for block in blocks: + if "WARNING" in block: + check_next_block = True + elif check_next_block: + m = re.search("Found.*?-range .*"\ + "Location-reference is \d+ in keyword \((.*)\)", + block, re.M|re.DOTALL) + if m: self.invalid_index_ranges.append(Indexentry(m.group(1))) + check_next_block = False + + def run(self): + self._sanitize_idxfile() + self._fix_invalid_ranges() + cmd = self.command() + msg.debug(" ".join(cmd)) + + # Collect the script output, and errors + logname = join(dirname(self.target), "xindy.log") + logfile = open(logname, "w") + p = Popen(cmd, stdout=logfile, stderr=PIPE) + errdata = p.communicate()[1] + rc = p.wait() + if msg.stdout: + msg.stdout.write(errdata) + else: + msg.warn(_(errdata.strip())) + logfile.close() + if (rc != 0): + msg.error(_("could not make index %s") % self.target) + return 1 + + self._detect_invalid_ranges(errdata) + + # Now convert the built index to UTF-8 if required + if cmd[0] == "texindy" and self.doc.encoding == "utf8": + if not(self._index_is_unicode()): + encoding = self._find_index_encoding(logname) + tmpindex = join(dirname(self.target), "new.ind") + cmd = ["iconv", "-f", encoding, "-t", "utf8", + "-o", tmpindex, self.target] + msg.debug(" ".join(cmd)) + rc = subprocess.call(cmd) + if rc == 0: os.rename(tmpindex, self.target) + + return rc + +class Indexentry: + """ + Index entry wrapper from idxfile. Its role is to detect range anomalies + """ + _re_entry = re.compile("\indexentry{(.*)\|([\(\)]?).*}{(\d+)}", re.DOTALL) + + def __init__(self, index_key): + self.index_key = index_key + self.skip_lines = [] + self.last_range_page = 0 + self.last_range_line = -1 + self.last_range_open = False + + def reinit(self): + self.__init__(self.index_key) + + def add_line(self, line_num, indexentry): + m = self._re_entry.search(indexentry) + if not(m): + return + index_key = m.group(1).split("!")[-1] + if index_key != self.index_key: + return + range_state = m.group(2) + page = int(m.group(3)) + + #print "Found %s at %d" % (index_key, page) + if range_state == "(": + # If a starting range overlap the previous range remove + # this intermediate useless range close/open + if page <= self.last_range_page: + self.skip_lines += [self.last_range_line, line_num] + self.last_range_page = page + self.last_range_line = line_num + self.last_range_open = True + elif range_state == ")": + self.last_range_page = page + self.last_range_line = line_num + self.last_range_open = False + elif range_state == "": + # If a single indexentry is within a range, skip it + if self.last_range_open == True: + self.skip_lines += [line_num] + + +class Makeindex: + """ + Makeindex command wrapper + """ + def __init__(self, doc, idxfile, target, transcript="", + opts=None, modules=None, + index_lang="", style=""): + self.doc = doc + self.idxfile = idxfile + self.target = target + self.transcript = transcript + self.opts = opts or [] + self.path_var = "INDEXSTYLE" + self.style = style + + def command(self): + cmd = ["makeindex", "-o", self.target] + self.opts + if self.transcript: + cmd.extend(["-t", self.transcript]) + if self.style: + cmd.extend(["-s", self.style]) + cmd.append(self.idxfile) + return cmd + + def _index_is_unicode(self): + f = file(self.target, "r") + is_unicode = True + for line in f: + try: + line.decode("utf8") + except: + is_unicode = False + break + f.close() + return is_unicode + + def run(self): + cmd = self.command() + msg.debug(" ".join(cmd)) + + # Makeindex outputs everything to stderr, even progress messages + rc = subprocess.call(cmd, stderr=msg.stdout) + if (rc != 0): + msg.error(_("could not make index %s") % self.target) + return 1 + + # Beware with UTF-8 encoding, makeindex with headings can be messy + # because it puts in the headings the first 8bits char of the words + # under the heading which can be an invalid character in UTF-8 + if (self.style and self.doc.encoding == "utf8"): + if not(self._index_is_unicode()): + # Retry without style to avoid headings + msg.warn(_("makeindex on UTF8 failed. Retry...")) + self.style = "" + return self.run() + + return rc + + +class Index(TexModule): + """ + This class represents a single index. + """ + def __init__ (self, doc, source, target, transcript): + """ + Initialize the index, by specifying the source file (generated by + LaTeX), the target file (the output of makeindex) and the transcript + (e.g. .ilg) file. Transcript is used by glosstex.py. + """ + self.paranoid = True + self.doc = doc + self.pbase = doc.src_base + self.source = doc.src_base + "." + source + self.target = doc.src_base + "." + target + self.transcript = doc.src_base + "." + transcript + + # In paranoid mode, can output only in current working dir + if self.paranoid and (os.path.dirname(self.target) == os.getcwd()): + self.target = os.path.basename(self.target) + self.transcript = os.path.basename(self.transcript) + + if os.path.exists(self.source): + self.md5 = md5_file(self.source) + else: + self.md5 = None + + self.tool = "makeindex" + self.tool_obj = None + self.lang = None # only for xindy + self.modules = [] # only for xindy + self.opts = [] + self.path = [] + self.style = None # only for makeindex + + + def do_language (self, lang): + self.lang = lang + + def do_modules (self, *args): + self.modules.extend(args) + + def do_order (self, *args): + for opt in args: + if opt == "standard": self.opts = [] + elif opt == "german": self.opts.append("-g") + elif opt == "letter": self.opts.append("-l") + else: msg.warn( + _("unknown option '%s' for 'makeidx.order'") % opt) + + def do_path (self, path): + self.path.append(self.doc.abspath(path)) + + def do_style (self, style): + self.style = style + + def do_tool (self, tool): + if tool not in ("makeindex", "xindy"): + msg.error(_("unknown indexing tool '%s'") % tool) + self.tool = tool + + + def post_compile (self): + """ + Run the indexer tool + """ + if not os.path.exists(self.source): + msg.log(_("strange, there is no %s") % self.source, pkg="index") + return 0 + if not self.run_needed(): + return 0 + + msg.progress(_("processing index %s") % self.source) + + if not(self.tool_obj): + if self.tool == "makeindex": + index_cls = Makeindex + elif self.tool == "xindy": + index_cls = Xindy + + self.tool_obj = index_cls(self.doc, + self.source, + self.target, + transcript=self.transcript, + opts=self.opts, + modules=self.modules, + index_lang=self.lang, + style=self.style) + + rc = self.tool_obj.run() + if rc != 0: + return rc + + self.doc.must_compile = 1 + return 0 + + def run_needed (self): + """ + Check if makeindex has to be run. This is the case either if the + target file does not exist or if the source file has changed. + """ + if os.path.getsize(self.source) == 0: + msg.log(_("the index file %s is empty") % self.source, pkg="index") + return 0 + new = md5_file(self.source) + if not os.path.exists(self.target): + self.md5 = new + return 1 + if not self.md5: + self.md5 = new + msg.log(_("the index file %s is new") % self.source, pkg="index") + return 1 + if self.md5 == new: + msg.log(_("the index %s did not change") % self.source, pkg="index") + return 0 + self.md5 = new + msg.log(_("the index %s has changed") % self.source, pkg="index") + return 1 + + def clean (self): + """ + Remove all generated files related to the index. + """ + for file in self.source, self.target, self.transcript: + if exists(file): + msg.log(_("removing %s") % file, pkg="index") + os.unlink(file) + +re_newindex = re.compile(" *{(?P[^{}]*)} *{(?P[^{}]*)}") +re_optarg = re.compile("\((?P[^()]*)\) *") + +class Module (TexModule): + def __init__ (self, doc, dict): + """ + Initialize the module with no index. + """ + self.doc = doc + self.indices = {} + self.defaults = [] + self.commands = {} + doc.parser.add_hook("makeindex", self.makeindex) + doc.parser.add_hook("newindex", self.newindex) + + def register (self, name, idx, ind, ilg): + """ + Register a new index. + """ + index = self.indices[name] = Index(self.doc, idx, ind, ilg) + for cmd in self.defaults: + index.command(*cmd) + if self.commands.has_key(name): + for cmd in self.commands[name]: + index.command(*cmd) + + def makeindex (self, dict): + """ + Register the standard index. + """ + self.register("default", "idx", "ind", "ilg") + + def newindex (self, dict): + """ + Register a new index. + """ + m = re_newindex.match(dict["line"]) + if not m: + return + index = dict["arg"] + d = m.groupdict() + self.register(index, d["idx"], d["ind"], "ilg") + msg.log(_("index %s registered") % index, pkg="index") + + def command (self, cmd, args): + indices = self.indices + names = None + if len(args) > 0: + m = re_optarg.match(args[0]) + if m: + names = m.group("list").split(",") + args = args[1:] + if names is None: + self.defaults.append([cmd, args]) + names = indices.keys() + for index in names: + if indices.has_key(index): + indices[index].command(cmd, args[1:]) + elif self.commands.has_key(index): + self.commands[index].append([cmd, args]) + else: + self.commands[index] = [[cmd, args]] + + def post_compile (self): + for index in self.indices.values(): + if index.post_compile(): + return 1 + return 0 + + def clean (self): + for index in self.indices.values(): + index.clean() + return 0 + diff --git a/lib/dbtexmf/dblatex/grubber/latex.py b/lib/dbtexmf/dblatex/grubber/latex.py index 1708cd9..8524fd8 100644 --- a/lib/dbtexmf/dblatex/grubber/latex.py +++ b/lib/dbtexmf/dblatex/grubber/latex.py @@ -10,13 +10,14 @@ import os import sys import time import subprocess +from io import open -from msg import _, msg -from util import Watcher -from logparser import LogParser -from texparser import TexParser -from plugins import Modules -from maker import Depend +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.util import Watcher +from dbtexmf.dblatex.grubber.logparser import LogParser +from dbtexmf.dblatex.grubber.texparser import TexParser +from dbtexmf.dblatex.grubber.plugins import Modules +from dbtexmf.dblatex.grubber.maker import Depend class Latex(Depend): @@ -122,7 +123,7 @@ class Latex(Depend): Prepare the compilation by parsing the source file. The parsing loads all the necessary modules required by the packages used, etc. """ - f = open(self.srcfile) + f = open(self.srcfile, "rt", encoding="latin-1") self.parser.parse(f, exclude_mods=exclude_mods) f.close() diff --git a/lib/dbtexmf/dblatex/grubber/latex.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/latex.py.enable-python3 new file mode 100644 index 0000000..1708cd9 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/latex.py.enable-python3 @@ -0,0 +1,279 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +LaTeX document building system for Rubber. + +This module contains all the code in Rubber that actually does the job of +building a LaTeX document from start to finish. +""" +import os +import sys +import time +import subprocess + +from msg import _, msg +from util import Watcher +from logparser import LogParser +from texparser import TexParser +from plugins import Modules +from maker import Depend + + +class Latex(Depend): + def __init__(self, maker): + Depend.__init__(self, maker) + self.logfile = None + self.auxfile = None + self.srcfile = None + self.srcbase = None + self.outfile = None + self.program = "latex" + self.engine = "TeX" + self.paper = "" + self.prods = [] + self.must_compile = 0 + self.something_done = 0 + self.failed_module = None + self.watcher = Watcher() + self.log = LogParser() + self.modules = Modules(self) + self.parser = TexParser(self) + self.date = None + # Is the final output expected? + self.draft_only = False + self.draft_support = False + self.batch = 1 + self.opts = [] + + def reinit(self): + # Restart with a clean module set, parser and logger + self.__init__(self.env) + + def set_source(self, input): + self.srcfile = os.path.realpath(input) + self.srcbase = os.path.splitext(self.srcfile)[0] + self.src_base = self.srcbase + self.logfile = self.srcbase + ".log" + self.auxfile = self.srcbase + ".aux" + self.set_format("dvi") + + def set_format(self, format): + self.outfile = self.srcbase + "." + format + self.prods = [self.outfile] + + def compile_needed (self): + """ + Returns true if a first compilation is needed. This method supposes + that no compilation was done (by the script) yet. + """ + if self.must_compile: + return 1 + msg.log(_("checking if compiling is necessary...")) + if not self.draft_support and not os.path.exists(self.outfile): + msg.debug(_("the output file doesn't exist")) + return 1 + if not os.path.exists(self.logfile): + msg.debug(_("the log file does not exist")) + return 1 + if (not self.draft_support and + (os.path.getmtime(self.outfile) < os.path.getmtime(self.srcfile))): + msg.debug(_("the source is younger than the output file")) + return 1 + if self.log.read(self.logfile): + msg.debug(_("the log file is not produced by TeX")) + return 1 + return self.recompile_needed() + + def recompile_needed (self): + """ + Returns true if another compilation is needed. This method is used + when a compilation has already been done. + """ + changed = self.watcher.update() + if self.must_compile: + return 1 + if self.log.errors(): + msg.debug(_("last compilation failed")) + return 1 +# if self.deps_modified(os.path.getmtime(self.outfile)): +# msg.debug(_("dependencies were modified")) +# return 1 + if changed and (len(changed) > 1 or changed[0] != self.auxfile): + msg.debug(_("the %s file has changed") % changed[0]) + return 1 + if self.log.run_needed(): + msg.debug(_("LaTeX asks to run again")) + if (not(changed)): + msg.debug(_("but the aux files are unchanged")) + return 0 + return 1 + if changed: + msg.debug(_("the %s file has changed but no re-run required?") \ + % changed[0]) + if self.program == "xelatex": + msg.debug(_("force recompilation (XeTeX engine)")) + return 1 + + msg.debug(_("no new compilation is needed")) + return 0 + + def prepare(self, exclude_mods=None): + """ + Prepare the compilation by parsing the source file. The parsing + loads all the necessary modules required by the packages used, etc. + """ + f = open(self.srcfile) + self.parser.parse(f, exclude_mods=exclude_mods) + f.close() + + def force_run(self): + self.run(force=1) + + def run(self, force=0): + """ + Run the building process until the last compilation, or stop on error. + This method supposes that the inputs were parsed to register packages + and that the LaTeX source is ready. If the second (optional) argument + is true, then at least one compilation is done. As specified by the + class Depend, the method returns 0 on success and 1 on failure. + """ + if self.pre_compile(force): + return 1 + + # If an error occurs after this point, it will be while LaTeXing. + self.failed_dep = self + self.failed_module = None + + if self.batch: + self.opts.append("-interaction=batchmode") + + need_compile = force or self.compile_needed() + while need_compile: + if self.compile(): return 1 + if self.post_compile(): return 1 + need_compile = self.recompile_needed() + + # Finally there was no error. + self.failed_dep = None + + if self.last_compile(): + return 1 + + if self.something_done: + self.date = int(time.time()) + return 0 + + def pre_compile(self, force): + """ + Prepare the source for compilation using package-specific functions. + This function must return true on failure. This function sets + `must_compile' to 1 if we already know that a compilation is needed, + because it may avoid some unnecessary preprocessing (e.g. BibTeXing). + """ + # Watch for the changes of these working files + for ext in ("aux", "toc", "lot", "lof"): + self.watcher.watch(self.srcbase + "." + ext) + + msg.log(_("building additional files...")) + for mod in self.modules.objects.values(): + if mod.pre_compile(): + self.failed_module = mod + return 1 + return 0 + + def post_compile(self): + """ + Run the package-specific operations that are to be performed after + each compilation of the main source. Returns true on failure. + """ + msg.log(_("running post-compilation scripts...")) + + for mod in self.modules.objects.values(): + if mod.post_compile(): + self.failed_module = mod + return 1 + return 0 + + def last_compile(self): + """ + Run the module-specific operations that are to be performed after + the last compilation of the main source. Returns true on failure. + """ + msg.log(_("running last-compilation scripts...")) + + for mod in self.modules.objects.values(): + if mod.last_compile(): + self.failed_module = mod + return 1 + return 0 + + def compile(self): + self.must_compile = 0 + cmd = [self.program] + self.opts + [os.path.basename(self.srcfile)] + msg.log(" ".join(cmd)) + rc = subprocess.call(cmd, stdout=msg.stdout) + if rc != 0: + msg.error(_("%s failed") % self.program) + # Whatever the result is, read the log file + if self.log.read(self.logfile): + msg.error(_("Could not run %s.") % self.program) + return 1 + if self.log.errors(): + return 1 + return rc + + def clean(self): + """ + Remove all files that are produced by compilation. + """ + self.remove_suffixes([".log", ".aux", ".toc", ".lof", ".lot", + ".out", ".glo", ".cb"]) + + msg.log(_("cleaning additional files...")) + # for dep in self.sources.values(): + # dep.clean() + + for mod in self.modules.objects.values(): + mod.clean() + + def remove_suffixes (self, list): + """ + Remove all files derived from the main source with one of the + specified suffixes. + """ + for suffix in list: + file = self.src_base + suffix + if os.path.exists(file): + msg.log(_("removing %s") % file) + os.unlink(file) + + def get_errors (self): + if not(self.failed_module): + return self.log.get_errors() + else: + return self.failed_module.get_errors() + + def print_misschars(self): + """ + Sort the characters not handled by the selected font, + and print them as a warning. + """ + missed_chars = [] + for c in self.log.get_misschars(): + missed_chars.append((c["uchar"], c["font"])) + # Strip redundant missed chars + missed_chars = list(set(missed_chars)) + missed_chars.sort() + for m in missed_chars: + # The log file is encoded in UTF8 (xetex) or in latin1 (pdftex) + try: + uchar = m[0].decode("utf8") + except: + uchar = m[0].decode("latin1") + # Check we have a real char (e.g. not something like '^^a3') + if len(uchar) == 1: + msg.warn("Character U+%X (%s) not in font '%s'" % \ + (ord(uchar), m[0], m[1])) + else: + msg.warn("Character '%s' not in font '%s'" % (m[0], m[1])) + diff --git a/lib/dbtexmf/dblatex/grubber/logparser.py b/lib/dbtexmf/dblatex/grubber/logparser.py index d7a00b9..e490d9f 100644 --- a/lib/dbtexmf/dblatex/grubber/logparser.py +++ b/lib/dbtexmf/dblatex/grubber/logparser.py @@ -8,8 +8,9 @@ This module defines the class that parses the LaTeX log files. from __future__ import generators import re +from io import open -from msg import _, msg +from dbtexmf.dblatex.grubber.msg import _, msg class LogParser: """ @@ -51,7 +52,7 @@ class LogParser: """ self.lines = [] try: - file = open(name) + file = open(name, "rt") except IOError: return 2 line = file.readline() @@ -188,7 +189,7 @@ class LogParser: m = self.re_ignored.search(error) if m: d["file"] = last_file - if d.has_key("code"): + if "code" in d: del d["code"] d.update( m.groupdict() ) elif pos[-1] is None: diff --git a/lib/dbtexmf/dblatex/grubber/logparser.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/logparser.py.enable-python3 new file mode 100644 index 0000000..d7a00b9 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/logparser.py.enable-python3 @@ -0,0 +1,374 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +LaTeX document building system for Rubber. + +This module defines the class that parses the LaTeX log files. +""" +from __future__ import generators + +import re + +from msg import _, msg + +class LogParser: + """ + This class performs all the extraction of information from the log file. + For efficiency, the instances contain the whole file as a list of strings + so that it can be read several times with no disk access. + """ + + re_loghead = re.compile("This is [0-9a-zA-Z-]*(TeX|Omega)") + re_rerun = re.compile( + "(LaTeX|Package longtable|Package bibtopic) Warning:.*Rerun") + re_rerun2 = re.compile("\(Changebar\).*Rerun") + re_file = re.compile("(\\((?P[^ \n\t(){}]*)|\\))") + re_badbox = re.compile(r"(Ov|Und)erfull \\[hv]box ") + re_line = re.compile(r"(l\.(?P[0-9]+)( (?P.*))?$|<\*>)") + re_cseq = re.compile(r".*(?P\\[^ ]*) ?$") + re_page = re.compile("\[(?P[0-9]+)\]") + re_atline = re.compile( + "( detected| in paragraph)? at lines? (?P[0-9]*)(--(?P[0-9]*))?") + re_reference = re.compile("LaTeX Warning: Reference `(?P.*)' \ + on page (?P[0-9]*) undefined on input line (?P[0-9]*)\\.$") + re_label = re.compile("LaTeX Warning: (?PLabel .*)$") + re_warning = re.compile( + "(LaTeX|Package)( (?P.*))? Warning: (?P.*)$") + re_online = re.compile("(; reported)? on input line (?P[0-9]*)") + re_ignored = re.compile("; all text was ignored after line (?P[0-9]*).$") + re_misschar = re.compile("Missing character: There is no (?P[^ ]*) in font (?P.*)!") + + #-- Initialization {{{2 + + def __init__ (self): + self.lines = [] + + def read (self, name): + """ + Read the specified log file, checking that it was produced by the + right compiler. Returns true if the log file is invalid or does not + exist. + """ + self.lines = [] + try: + file = open(name) + except IOError: + return 2 + line = file.readline() + if not line: + file.close() + return 1 + if not self.re_loghead.match(line): + file.close() + return 1 + self.lines = file.readlines() + file.close() + return 0 + + #-- Process information {{{2 + + def errors (self): + """ + Returns true if there was an error during the compilation. + """ + skipping = 0 + for line in self.lines: + if line.strip() == "": + skipping = 0 + continue + if skipping: + continue + m = self.re_badbox.match(line) + if m: + skipping = 1 + continue + if line[0] == "!": + # We check for the substring "pdfTeX warning" because pdfTeX + # sometimes issues warnings (like undefined references) in the + # form of errors... + + if line.find("pdfTeX warning") == -1: + return 1 + return 0 + + def run_needed (self): + """ + Returns true if LaTeX indicated that another compilation is needed. + """ + for line in self.lines: + if self.re_rerun.match(line): + return 1 + if self.re_rerun2.match(line): + return 1 + return 0 + + #-- Information extraction {{{2 + + def continued (self, line): + """ + Check if a line in the log is continued on the next line. This is + needed because TeX breaks messages at 79 characters per line. We make + this into a method because the test is slightly different in Metapost. + """ + return len(line) == 79 + + def parse (self, errors=0, boxes=0, refs=0, warnings=0, misschars=0): + """ + Parse the log file for relevant information. The named arguments are + booleans that indicate which information should be extracted: + - errors: all errors + - boxes: bad boxes + - refs: warnings about references + - warnings: all other warnings + The function returns a generator. Each generated item is a dictionary + that contains (some of) the following entries: + - kind: the kind of information ("error", "box", "ref", "warning") + - text: the text of the error or warning + - code: the piece of code that caused an error + - file, line, last, pkg: as used by Message.format_pos. + """ + if not self.lines: + return + last_file = None + pos = [last_file] + page = 1 + parsing = 0 # 1 if we are parsing an error's text + skipping = 0 # 1 if we are skipping text until an empty line + something = 0 # 1 if some error was found + prefix = None # the prefix for warning messages from packages + accu = "" # accumulated text from the previous line + for line in self.lines: + line = line[:-1] # remove the line feed + + # TeX breaks messages at 79 characters, just to make parsing + # trickier... + + if self.continued(line): + accu += line + continue + line = accu + line + accu = "" + + # Text that should be skipped (from bad box messages) + + if prefix is None and line == "": + skipping = 0 + continue + + if skipping: + continue + + # Errors (including aborted compilation) + + if parsing: + if error == "Undefined control sequence.": + # This is a special case in order to report which control + # sequence is undefined. + m = self.re_cseq.match(line) + if m: + error = "Undefined control sequence %s." % m.group("seq") + m = self.re_line.match(line) + if m: + parsing = 0 + skipping = 1 + pdfTeX = error.find("pdfTeX warning") != -1 + if (pdfTeX and warnings) or (errors and not pdfTeX): + if pdfTeX: + d = { + "kind": "warning", + "pkg": "pdfTeX", + "text": error[error.find(":")+2:] + } + else: + d = { + "kind": "error", + "text": error + } + d.update( m.groupdict() ) + m = self.re_ignored.search(error) + if m: + d["file"] = last_file + if d.has_key("code"): + del d["code"] + d.update( m.groupdict() ) + elif pos[-1] is None: + d["file"] = last_file + else: + d["file"] = pos[-1] + yield d + elif line[0] == "!": + error = line[2:] + elif line[0:3] == "***": + parsing = 0 + skipping = 1 + if errors: + yield { + "kind": "abort", + "text": error, + "why" : line[4:], + "file": last_file + } + elif line[0:15] == "Type X to quit ": + parsing = 0 + skipping = 0 + if errors: + yield { + "kind": "error", + "text": error, + "file": pos[-1] + } + continue + + if len(line) > 0 and line[0] == "!": + error = line[2:] + parsing = 1 + continue + + if line == "Runaway argument?": + error = line + parsing = 1 + continue + + # Long warnings + + if prefix is not None: + if line[:len(prefix)] == prefix: + text.append(line[len(prefix):].strip()) + else: + text = " ".join(text) + m = self.re_online.search(text) + if m: + info["line"] = m.group("line") + text = text[:m.start()] + text[m.end():] + if warnings: + info["text"] = text + d = { "kind": "warning" } + d.update( info ) + yield d + prefix = None + continue + + # Undefined references + + m = self.re_reference.match(line) + if m: + if refs: + d = { + "kind": "warning", + "text": _("Reference `%s' undefined.") % m.group("ref"), + "file": pos[-1] + } + d.update( m.groupdict() ) + yield d + continue + + m = self.re_label.match(line) + if m: + if refs: + d = { + "kind": "warning", + "file": pos[-1] + } + d.update( m.groupdict() ) + yield d + continue + + # Other warnings + + if line.find("Warning") != -1: + m = self.re_warning.match(line) + if m: + info = m.groupdict() + info["file"] = pos[-1] + info["page"] = page + if info["pkg"] is None: + del info["pkg"] + prefix = "" + else: + prefix = ("(%s)" % info["pkg"]) + prefix = prefix.ljust(m.start("text")) + text = [info["text"]] + continue + + # Bad box messages + + m = self.re_badbox.match(line) + if m: + if boxes: + mpos = { "file": pos[-1], "page": page } + m = self.re_atline.search(line) + if m: + md = m.groupdict() + for key in "line", "last": + if md[key]: mpos[key] = md[key] + line = line[:m.start()] + d = { + "kind": "warning", + "text": line + } + d.update( mpos ) + yield d + skipping = 1 + continue + + # Missing characters in a font + if misschars: + m = self.re_misschar.match(line) + if m: + d = { + "kind": "warning", + "uchar": m.group("uchar"), + "font": m.group("font"), + } + yield d + continue + + # If there is no message, track source names and page numbers. + + last_file = self.update_file(line, pos, last_file) + page = self.update_page(line, page) + + def get_errors (self): + return self.parse(errors=1) + def get_boxes (self): + return self.parse(boxes=1) + def get_references (self): + return self.parse(refs=1) + def get_warnings (self): + return self.parse(warnings=1) + def get_misschars (self): + return self.parse(misschars=1) + + def update_file (self, line, stack, last): + """ + Parse the given line of log file for file openings and closings and + update the list `stack'. Newly opened files are at the end, therefore + stack[1] is the main source while stack[-1] is the current one. The + first element, stack[0], contains the value None for errors that may + happen outside the source. Return the last file from which text was + read (the new stack top, or the one before the last closing + parenthesis). + """ + m = self.re_file.search(line) + while m: + if line[m.start()] == '(': + last = m.group("file") + stack.append(last) + else: + last = stack[-1] + del stack[-1] + line = line[m.end():] + m = self.re_file.search(line) + return last + + def update_page (self, line, before): + """ + Parse the given line and return the number of the page that is being + built after that line, assuming the current page before the line was + `before'. + """ + ms = self.re_page.findall(line) + if ms == []: + return before + return int(ms[-1]) + 1 + diff --git a/lib/dbtexmf/dblatex/grubber/makeidx.py b/lib/dbtexmf/dblatex/grubber/makeidx.py index d4c5f18..1157832 100644 --- a/lib/dbtexmf/dblatex/grubber/makeidx.py +++ b/lib/dbtexmf/dblatex/grubber/makeidx.py @@ -24,7 +24,7 @@ The following directives are provided to specify options for makeindex: """ import sys -from index import Index +from dbtexmf.dblatex.grubber.index import Index class Module (Index): def __init__ (self, doc, dict): diff --git a/lib/dbtexmf/dblatex/grubber/makeidx.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/makeidx.py.enable-python3 new file mode 100644 index 0000000..d4c5f18 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/makeidx.py.enable-python3 @@ -0,0 +1,34 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +Indexing support with package 'makeidx'. + +This module handles the processing of the document's index using makeindex. +It stores an MD5 sum of the .idx file between two runs, in order to detect +modifications. + +The following directives are provided to specify options for makeindex: + + order = + Modify the ordering to be used. The argument must be a space separated + list of: + - standard = use default ordering (no options, this is the default) + - german = use German ordering (option "-g") + - letter = use letter instead of word ordering (option "-l") + + path = + Add the specified directory to the search path for styles. + + style = + Use the specified style file. +""" +import sys + +from index import Index + +class Module (Index): + def __init__ (self, doc, dict): + """ + Initialize the module, checking if there is already an index. + """ + Index.__init__(self, doc, "idx", "ind", "ilg") diff --git a/lib/dbtexmf/dblatex/grubber/maker.py b/lib/dbtexmf/dblatex/grubber/maker.py index 14408f8..75d1dc3 100644 --- a/lib/dbtexmf/dblatex/grubber/maker.py +++ b/lib/dbtexmf/dblatex/grubber/maker.py @@ -4,11 +4,12 @@ This module contains all the classes used to manage the building dependencies. """ +from __future__ import print_function import os import time import subprocess -from msg import _, msg +from dbtexmf.dblatex.grubber.msg import _, msg class Depend (object): #{{{2 """ @@ -81,7 +82,7 @@ class Depend (object): #{{{2 on this one have to be remade) """ if self.making: - print "FIXME: cyclic make" + print("FIXME: cyclic make") return 1 self.making = 1 diff --git a/lib/dbtexmf/dblatex/grubber/maker.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/maker.py.enable-python3 new file mode 100644 index 0000000..14408f8 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/maker.py.enable-python3 @@ -0,0 +1,249 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +This module contains all the classes used to manage the building +dependencies. +""" +import os +import time +import subprocess + +from msg import _, msg + +class Depend (object): #{{{2 + """ + This is a base class to represent file dependencies. It provides the base + functionality of date checking and recursive making, supposing the + existence of a method `run()' in the object. This method is supposed to + rebuild the files of this node, returning zero on success and something + else on failure. + """ + def __init__ (self, env, prods=None, sources={}, loc={}): + """ + Initialize the object for a given set of output files and a given set + of sources. The argument `prods' is a list of file names, and the + argument `sources' is a dictionary that associates file names with + dependency nodes. The optional argument `loc' is a dictionary that + describes where in the sources this dependency was created. + """ + self.env = env + if prods: + self.prods = prods + else: + self.prods = [] + self.set_date() + self.sources = sources + self.making = 0 + self.failed_dep = None + self.loc = loc + + def set_date (self): + """ + Define the date of the last build of this node as that of the most + recent file among the products. If some product does not exist or + there are ne products, the date is set to None. + """ + if self.prods == []: + # This is a special case used in rubber.Environment + self.date = None + else: + try: + # We set the node's date to that of the most recently modified + # product file, assuming all other files were up to date then + # (though not necessarily modified). + self.date = max(map(os.path.getmtime, self.prods)) + except OSError: + # If some product file does not exist, set the last + # modification date to None. + self.date = None + + def should_make (self): + """ + Check the dependencies. Return true if this node has to be recompiled, + i.e. if some dependency is modified. Nothing recursive is done here. + """ + if not self.date: + return 1 + for src in self.sources.values(): + if src.date > self.date: + return 1 + return 0 + + def make (self, force=0): + """ + Make the destination file. This recursively makes all dependencies, + then compiles the target if dependencies were modified. The semantics + of the return value is the following: + - 0 means that the process failed somewhere (in this node or in one of + its dependencies) + - 1 means that nothing had to be done + - 2 means that something was recompiled (therefore nodes that depend + on this one have to be remade) + """ + if self.making: + print "FIXME: cyclic make" + return 1 + self.making = 1 + + # Make the sources + self.failed_dep = None + must_make = force + for src in self.sources.values(): + ret = src.make() + if ret == 0: + self.making = 0 + self.failed_dep = src.failed_dep + return 0 + if ret == 2: + must_make = 1 + + # Make this node if necessary + + if must_make or self.should_make(): + if force: + ret = self.force_run() + else: + ret = self.run() + if ret: + self.making = 0 + self.failed_dep = self + return 0 + + # Here we must take the integer part of the value returned by + # time.time() because the modification times for files, returned + # by os.path.getmtime(), is an integer. Keeping the fractional + # part could lead to errors in time comparison with the main log + # file when the compilation of the document is shorter than one + # second... + + self.date = int(time.time()) + self.making = 0 + return 2 + self.making = 0 + return 1 + + def force_run (self): + """ + This method is called instead of 'run' when rebuilding this node was + forced. By default it is equivalent to 'run'. + """ + return self.run() + + def failed (self): + """ + Return a reference to the node that caused the failure of the last + call to "make". If there was no failure, return None. + """ + return self.failed_dep + + def get_errors (self): + """ + Report the errors that caused the failure of the last call to run. + """ + if None: + yield None + + def clean (self): + """ + Remove the files produced by this rule and recursively clean all + dependencies. + """ + for file in self.prods: + if os.path.exists(file): + msg.log(_("removing %s") % file) + os.unlink(file) + for src in self.sources.values(): + src.clean() + self.date = None + + def reinit (self): + """ + Reinitializing depends on actual dependency leaf + """ + pass + + def leaves (self): + """ + Return a list of all source files that are required by this node and + cannot be built, i.e. the leaves of the dependency tree. + """ + if self.sources == {}: + return self.prods + ret = [] + for dep in self.sources.values(): + ret.extend(dep.leaves()) + return ret + + +class DependLeaf (Depend): #{{{2 + """ + This class specializes Depend for leaf nodes, i.e. source files with no + dependencies. + """ + def __init__ (self, env, *dest, **args): + """ + Initialize the node. The arguments of this method are the file + names, since one single node may contain several files. + """ + Depend.__init__(self, env, prods=list(dest), **args) + + def run (self): + # FIXME + if len(self.prods) == 1: + msg.error(_("%r does not exist") % self.prods[0], **self.loc) + else: + msg.error(_("one of %r does not exist") % self.prods, **self.loc) + return 1 + + def clean (self): + pass + + +class DependShell (Depend): #{{{2 + """ + This class specializes Depend for generating files using shell commands. + """ + def __init__ (self, env, cmd, **args): + Depend.__init__(self, env, **args) + self.cmd = cmd + + def run (self): + msg.progress(_("running %s") % self.cmd[0]) + rc = subprocess.call(self.cmd, stdout=msg.stdout) + if rc != 0: + msg.error(_("execution of %s failed") % self.cmd[0]) + return 1 + return 0 + + +class Maker: + """ + Very simple builder environment. Much simpler than the original rubber + Environment. + """ + def __init__(self): + self.dep_nodes = [] + + def dep_last(self): + if not(self.dep_nodes): + return None + else: + return self.dep_nodes[-1] + + def dep_append(self, dep): + self.dep_nodes.append(dep) + + def make(self, force=0): + if not(self.dep_nodes): + return 0 + # Just ask the last one to compile + rc = self.dep_nodes[-1].make(force=force) + if (rc == 0): + return -1 + else: + return 0 + + def reinit(self): + # Forget the old dependency nodes + self.__init__() + diff --git a/lib/dbtexmf/dblatex/grubber/msg.py b/lib/dbtexmf/dblatex/grubber/msg.py index 4ebb38f..8c2fffc 100644 --- a/lib/dbtexmf/dblatex/grubber/msg.py +++ b/lib/dbtexmf/dblatex/grubber/msg.py @@ -4,9 +4,12 @@ This module defines the messages diplay class, and creates the application-wide msg object. """ +from __future__ import print_function + import os, os.path import sys import logging +from io import open def _(txt): return txt @@ -32,14 +35,14 @@ class Message (object): self._log = logging.getLogger("dblatex") level = self._log.getEffectiveLevel() if level >= logging.WARNING: - self.stdout = open(os.devnull, "w") + self.stdout = open(os.devnull, "wb") else: self.stdout = None def write_stdout(self, text, level=0): - print text + print(text) def write_stderr(self, text, level=0): - print >>sys.stderr, text + print(text, file=sys.stderr) def push_pos (self, pos): self.pos.append(pos) @@ -66,7 +69,7 @@ class Message (object): if text[0:13] == "LaTeX Error: ": text = text[13:] self._log.error(self.format_pos(info, text)) - if info.has_key("code") and info["code"] and not self.short: + if "code" in info and info["code"] and not self.short: self._log.error(self.format_pos(info, _("leading text: ") + info["code"])) @@ -100,24 +103,24 @@ class Message (object): the dictionary given as first argument. """ if len(self.pos) > 0: - if where is None or not where.has_key("file"): + if where is None or "file" not in where: where = self.pos[-1] elif where is None or where == {}: return text - if where.has_key("file") and where["file"] is not None: + if "file" in where and where["file"] is not None: pos = self.simplify(where["file"]) - if where.has_key("line") and where["line"]: + if "line" in where and where["line"]: pos = "%s:%d" % (pos, int(where["line"])) - if where.has_key("last"): + if "last" in where: if where["last"] != where["line"]: pos = "%s-%d" % (pos, int(where["last"])) pos = pos + ": " else: pos = "" - if where.has_key("page"): + if "page" in where: text = "%s (page %d)" % (text, int(where["page"])) - if where.has_key("pkg"): + if "pkg" in where: text = "[%s] %s" % (where["pkg"], text) return pos + text diff --git a/lib/dbtexmf/dblatex/grubber/msg.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/msg.py.enable-python3 new file mode 100644 index 0000000..4ebb38f --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/msg.py.enable-python3 @@ -0,0 +1,147 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +This module defines the messages diplay class, and creates the application-wide +msg object. +""" +import os, os.path +import sys +import logging + +def _(txt): return txt + +class Message (object): + """ + All messages in the program are output using the `msg' object in the + main package. This class defines the interface for this object. + """ + def __init__ (self, level=1, write=None): + """ + Initialize the object with the specified verbosity level and an + optional writing function. If no such function is specified, no + message will be output until the 'write' field is changed. + """ + self.level = level + self.write = self.write_stdout + if write: + self.write = write + self.short = 0 + self.path = "" + self.cwd = "./" + self.pos = [] + self._log = logging.getLogger("dblatex") + level = self._log.getEffectiveLevel() + if level >= logging.WARNING: + self.stdout = open(os.devnull, "w") + else: + self.stdout = None + + def write_stdout(self, text, level=0): + print text + def write_stderr(self, text, level=0): + print >>sys.stderr, text + + def push_pos (self, pos): + self.pos.append(pos) + def pop_pos (self): + del self.pos[-1] + + def __call__ (self, level, text): + """ + This is the low level printing function, it receives a line of text + with an associated verbosity level, so that output can be filtered + depending on command-line options. + """ + if self.write and level <= self.level: + self.write(text, level=level) + + def display (self, kind, text, **info): + """ + Print an error or warning message. The argument 'kind' indicates the + kind of message, among "error", "warning", "abort", the argument + 'text' is the main text of the message, the other arguments provide + additional information, including the location of the error. + """ + if kind == "error": + if text[0:13] == "LaTeX Error: ": + text = text[13:] + self._log.error(self.format_pos(info, text)) + if info.has_key("code") and info["code"] and not self.short: + self._log.error(self.format_pos(info, + _("leading text: ") + info["code"])) + + elif kind == "abort": + if self.short: + msg = _("compilation aborted ") + info["why"] + else: + msg = _("compilation aborted: %s %s") % (text, info["why"]) + self._log.error(self.format_pos(info, msg)) + +# elif kind == "warning": +# self._log.warning(self.format_pos(info, text)) + + def error (self, text, **info): + self.display(kind="error", text=text, **info) + def warn (self, what, **where): + self._log.warning(self.format_pos(where, what)) + def progress (self, what, **where): + self._log.info(self.format_pos(where, what + "...")) + def info (self, what, **where): + self._log.info(self.format_pos(where, what)) + def log (self, what, **where): + self._log.debug(self.format_pos(where, what)) + def debug (self, what, **where): + self._log.debug(self.format_pos(where, what)) + + def format_pos (self, where, text): + """ + Format the given text into a proper error message, with file and line + information in the standard format. Position information is taken from + the dictionary given as first argument. + """ + if len(self.pos) > 0: + if where is None or not where.has_key("file"): + where = self.pos[-1] + elif where is None or where == {}: + return text + + if where.has_key("file") and where["file"] is not None: + pos = self.simplify(where["file"]) + if where.has_key("line") and where["line"]: + pos = "%s:%d" % (pos, int(where["line"])) + if where.has_key("last"): + if where["last"] != where["line"]: + pos = "%s-%d" % (pos, int(where["last"])) + pos = pos + ": " + else: + pos = "" + if where.has_key("page"): + text = "%s (page %d)" % (text, int(where["page"])) + if where.has_key("pkg"): + text = "[%s] %s" % (where["pkg"], text) + return pos + text + + def simplify (self, name): + """ + Simplify an path name by removing the current directory if the + specified path is in a subdirectory. + """ + path = os.path.normpath(os.path.join(self.path, name)) + if path[:len(self.cwd)] == self.cwd: + return path[len(self.cwd):] + return path + + def display_all (self, generator, writer=None): + if writer: + write = self.write + self.write = writer + something = 0 + for msg in generator: + self.display(**msg) + something = 1 + if writer: + self.write = write + return something + +msg = Message() + diff --git a/lib/dbtexmf/dblatex/grubber/pdftex.py b/lib/dbtexmf/dblatex/grubber/pdftex.py index b10b2e2..f5f6635 100644 --- a/lib/dbtexmf/dblatex/grubber/pdftex.py +++ b/lib/dbtexmf/dblatex/grubber/pdftex.py @@ -9,13 +9,14 @@ using pdfTeX. The module optimizes the pdflatex calls by setting -draftmode and apply a last call to build the final PDF output. """ +import sys import os import re import subprocess from subprocess import Popen, PIPE -from msg import _, msg -from plugins import TexModule +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.plugins import TexModule class Module (TexModule): @@ -73,6 +74,8 @@ class Module (TexModule): # Grab the major version number p = Popen("pdflatex -version", shell=True, stdout=PIPE) data = p.communicate()[0] + if isinstance(data, bytes): + data = data.decode(sys.getdefaultencoding()) m = re.search("pdfTeX.*3.14[^-]*-(\d*.\d*)", data, re.M) if not(m): return "" diff --git a/lib/dbtexmf/dblatex/grubber/pdftex.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/pdftex.py.enable-python3 new file mode 100644 index 0000000..b10b2e2 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/pdftex.py.enable-python3 @@ -0,0 +1,81 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +pdfLaTeX support for Rubber. + +When this module loaded with the otion 'dvi', the document is compiled to DVI +using pdfTeX. + +The module optimizes the pdflatex calls by setting -draftmode and apply a last +call to build the final PDF output. +""" +import os +import re +import subprocess +from subprocess import Popen, PIPE + +from msg import _, msg +from plugins import TexModule + + +class Module (TexModule): + def __init__ (self, doc, dict): + self.doc = doc + doc.program = "pdflatex" + doc.engine = "pdfTeX" + # FIXME: how to handle opt=dvi with file.tex passed? + # FIXME: can we add commands after the file? + doc.set_format("pdf") + + # Check the version to know if -draftmode is supported + if (self._draft_is_supported()): + self.doc.draft_support = True + else: + self.doc.draft_support = False + #self.draft_support = False + + def _draft_is_supported(self): + # FIXME: find a clean method to pass these options + opts = os.getenv("DBLATEX_PDFTEX_OPTIONS", "") + if not("-draftmode" in opts): + return False + return (self._get_version() == "1.40") + + def pre_compile(self): + if not(self.doc.draft_support): + return + + # Add -draftmode to prevent intermediate pdf output + self.doc.opts.append("-draftmode") + + def last_compile(self): + # If pdftex has no ability to work in draftmode, or if no final PDF + # is required, do nothing + if not(self.doc.draft_support) or self.doc.draft_only: + return + + # Remove the -draftmode to have the PDF output, and compile again + self.doc.opts.remove("-draftmode") + rc = self.doc.compile() + return rc + + def _get_version(self): + """ + Parse something like: + + pdfTeX using libpoppler 3.141592-1.40.3-2.2 (Web2C 7.5.6) + kpathsea version 3.5.6 + Copyright 2007 Peter Breitenlohner (eTeX)/Han The Thanh (pdfTeX). + Kpathsea is copyright 2007 Karl Berry and Olaf Weber. + ... + and return '1.40' + """ + # Grab the major version number + p = Popen("pdflatex -version", shell=True, stdout=PIPE) + data = p.communicate()[0] + m = re.search("pdfTeX.*3.14[^-]*-(\d*.\d*)", data, re.M) + if not(m): + return "" + else: + return m.group(1) + diff --git a/lib/dbtexmf/dblatex/grubber/plugins.py b/lib/dbtexmf/dblatex/grubber/plugins.py index f72bd13..01264fc 100644 --- a/lib/dbtexmf/dblatex/grubber/plugins.py +++ b/lib/dbtexmf/dblatex/grubber/plugins.py @@ -7,7 +7,7 @@ All the modules must be derived from the TexModule class. import imp from os.path import * -from msg import _, msg +from dbtexmf.dblatex.grubber.msg import _, msg import sys @@ -106,7 +106,7 @@ class Plugins (object): dictionary. Return 0 if no module was found, 1 if a module was found and loaded, and 2 if the module was found but already loaded. """ - if self.modules.has_key(name): + if name in self.modules: return 2 try: file, path, descr = imp.find_module(name, [""]) @@ -151,11 +151,11 @@ class Modules (Plugins): """ return self.objects[name] - def has_key (self, name): + def __contains__ (self, name): """ Check if a given module is loaded. """ - return self.objects.has_key(name) + return name in self.objects def register (self, name, dict={}): """ @@ -165,7 +165,7 @@ class Modules (Plugins): delayed commands for this module. The dictionary describes the command that caused the registration. """ - if self.has_key(name): + if name in self: msg.debug(_("module %s already registered") % name) return 2 @@ -191,7 +191,7 @@ class Modules (Plugins): # Run any delayed commands. - if self.commands.has_key(name): + if name in self.commands: for (cmd, args, vars) in self.commands[name]: msg.push_pos(vars) try: @@ -219,10 +219,10 @@ class Modules (Plugins): Send a command to a particular module. If this module is not loaded, store the command so that it will be sent when the module is register. """ - if self.objects.has_key(mod): + if mod in self.objects: self.objects[mod].command(cmd, args) else: - if not self.commands.has_key(mod): + if mod not in self.commands: self.commands[mod] = [] self.commands[mod].append((cmd, args, self.env.vars.copy())) diff --git a/lib/dbtexmf/dblatex/grubber/plugins.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/plugins.py.enable-python3 new file mode 100644 index 0000000..f72bd13 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/plugins.py.enable-python3 @@ -0,0 +1,228 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +Mechanisms to dynamically load extra modules to help the LaTeX compilation. +All the modules must be derived from the TexModule class. +""" +import imp + +from os.path import * +from msg import _, msg + +import sys + +class TexModule (object): + """ + This is the base class for modules. Each module should define a class + named 'Module' that derives from this one. The default implementation + provides all required methods with no effects. + """ + def __init__ (self, env, dict): + """ + The constructor receives two arguments: 'env' is the compiling + environment, 'dict' is a dictionary that describes the command that + caused the module to load. + """ + + def pre_compile (self): + """ + This method is called before the first LaTeX compilation. It is + supposed to build any file that LaTeX would require to compile the + document correctly. The method must return true on failure. + """ + return 0 + + def post_compile (self): + """ + This method is called after each LaTeX compilation. It is supposed to + process the compilation results and possibly request a new + compilation. The method must return true on failure. + """ + return 0 + + def last_compile (self): + """ + This method is called after the last LaTeX compilation. + It is supposed to terminate the compilation for its specific needs. + The method must return true on failure. + """ + return 0 + + def clean (self): + """ + This method is called when cleaning the compiled files. It is supposed + to remove all the files that this modules generates. + """ + + def command (self, cmd, args): + """ + This is called when a directive for the module is found in the source. + The method can raise 'AttributeError' when the directive does not + exist and 'TypeError' if the syntax is wrong. By default, when called + with argument "foo" it calls the method "do_foo" if it exists, and + fails otherwise. + """ + getattr(self, "do_" + cmd)(*args) + + def get_errors (self): + """ + This is called if something has failed during an operation performed + by this module. The method returns a generator with items of the same + form as in LaTeXDep.get_errors. + """ + if None: + yield None + + +class Plugins (object): + """ + This class gathers operations related to the management of external Python + modules. Modules are requested through the `register' method, and + they are searched for first in the current directory, then in the + (possibly) specified Python package (using Python's path). + """ + def __init__ (self, path=None): + """ + Initialize the module set, possibly setting a path name in which + modules will be searched for. + """ + self.modules = {} + if not path: + self.path = [dirname(__file__)] + sys.path.append(self.path[0]) + else: + self.path = path + + def __getitem__ (self, name): + """ + Return the module object of the given name. + """ + return self.modules[name] + + def register (self, name): + """ + Attempt to register a module with the specified name. If an + appropriate module is found, load it and store it in the object's + dictionary. Return 0 if no module was found, 1 if a module was found + and loaded, and 2 if the module was found but already loaded. + """ + if self.modules.has_key(name): + return 2 + try: + file, path, descr = imp.find_module(name, [""]) + except ImportError: + if not self.path: + return 0 + try: + file, path, descr = imp.find_module(name, self.path) + except ImportError: + return 0 + module = imp.load_module(name, file, path, descr) + file.close() + self.modules[name] = module + return 1 + + def clear(self): + """ + Empty the module table, unregistering every module registered. No + modules are unloaded, however, but this has no other effect than + speeding the registration if the modules are loaded again. + """ + self.modules.clear() + + +class Modules (Plugins): + """ + This class gathers all operations related to the management of modules. + The modules are searched for first in the current directory, then as + scripts in the 'modules' directory in the program's data directort, then + as a Python module in the package `rubber.latex'. + """ + def __init__ (self, env): + #Plugins.__init__(self, rubber.rules.latex.__path__) + Plugins.__init__(self) + self.env = env + self.objects = {} + self.commands = {} + + def __getitem__ (self, name): + """ + Return the module object of the given name. + """ + return self.objects[name] + + def has_key (self, name): + """ + Check if a given module is loaded. + """ + return self.objects.has_key(name) + + def register (self, name, dict={}): + """ + Attempt to register a package with the specified name. If a module is + found, create an object from the module's class called `Module', + passing it the environment and `dict' as arguments, and execute all + delayed commands for this module. The dictionary describes the + command that caused the registration. + """ + if self.has_key(name): + msg.debug(_("module %s already registered") % name) + return 2 + + # First look for a script + + moddir = "" + mod = None + for path in "", join(moddir, "modules"): + file = join(path, name + ".rub") + if exists(file): + mod = ScriptModule(self.env, file) + msg.log(_("script module %s registered") % name) + break + + # Then look for a Python module + + if not mod: + if Plugins.register(self, name) == 0: + msg.debug(_("no support found for %s") % name) + return 0 + mod = self.modules[name].Module(self.env, dict) + msg.log(_("built-in module %s registered") % name) + + # Run any delayed commands. + + if self.commands.has_key(name): + for (cmd, args, vars) in self.commands[name]: + msg.push_pos(vars) + try: + mod.command(cmd, args) + except AttributeError: + msg.warn(_("unknown directive '%s.%s'") % (name, cmd)) + except TypeError: + msg.warn(_("wrong syntax for '%s.%s'") % (name, cmd)) + msg.pop_pos() + del self.commands[name] + + self.objects[name] = mod + return 1 + + def clear (self): + """ + Unregister all modules. + """ + Plugins.clear(self) + self.objects = {} + self.commands = {} + + def command (self, mod, cmd, args): + """ + Send a command to a particular module. If this module is not loaded, + store the command so that it will be sent when the module is register. + """ + if self.objects.has_key(mod): + self.objects[mod].command(cmd, args) + else: + if not self.commands.has_key(mod): + self.commands[mod] = [] + self.commands[mod].append((cmd, args, self.env.vars.copy())) + diff --git a/lib/dbtexmf/dblatex/grubber/ps2pdf.py b/lib/dbtexmf/dblatex/grubber/ps2pdf.py index d7e1f33..02dfa60 100644 --- a/lib/dbtexmf/dblatex/grubber/ps2pdf.py +++ b/lib/dbtexmf/dblatex/grubber/ps2pdf.py @@ -7,9 +7,9 @@ PostScript to PDF conversion using GhostScript. import sys import os -from msg import _, msg -from maker import DependShell -from plugins import TexModule +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.maker import DependShell +from dbtexmf.dblatex.grubber.plugins import TexModule class Module (TexModule): diff --git a/lib/dbtexmf/dblatex/grubber/ps2pdf.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/ps2pdf.py.enable-python3 new file mode 100644 index 0000000..d7e1f33 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/ps2pdf.py.enable-python3 @@ -0,0 +1,30 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2004--2006 +""" +PostScript to PDF conversion using GhostScript. +""" + +import sys +import os + +from msg import _, msg +from maker import DependShell +from plugins import TexModule + + +class Module (TexModule): + def __init__ (self, doc, dict): + env = doc.env + ps = env.dep_last().prods[0] + root, ext = os.path.splitext(ps) + if ext != ".ps": + msg.error(_("I can't use ps2pdf when not producing a PS")) + sys.exit(2) + pdf = root + ".pdf" + cmd = ["ps2pdf"] + for opt in doc.paper.split(): + cmd.append("-sPAPERSIZE=" + opt) + cmd.extend([ps, pdf]) + dep = DependShell(env, cmd, prods=[pdf], sources={ ps: env.dep_last() }) + env.dep_append(dep) + diff --git a/lib/dbtexmf/dblatex/grubber/texbuilder.py b/lib/dbtexmf/dblatex/grubber/texbuilder.py index bcb02ba..adce529 100644 --- a/lib/dbtexmf/dblatex/grubber/texbuilder.py +++ b/lib/dbtexmf/dblatex/grubber/texbuilder.py @@ -9,9 +9,9 @@ of the rubber internals. import subprocess import os import shlex -from msg import _, msg -from maker import Maker -from latex import Latex +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.maker import Maker +from dbtexmf.dblatex.grubber.latex import Latex class IndexBuilder: @@ -90,7 +90,7 @@ class LatexBuilder: self.tex.prepare() # Set the index configuration - if self.tex.modules.has_key("makeidx"): + if "makeidx" in self.tex.modules: idx = self.tex.modules["makeidx"] if self.index.style: idx.do_style(self.index.style) if self.index.tool: idx.do_tool(self.index.tool) diff --git a/lib/dbtexmf/dblatex/grubber/texbuilder.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/texbuilder.py.enable-python3 new file mode 100644 index 0000000..bcb02ba --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/texbuilder.py.enable-python3 @@ -0,0 +1,134 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +LaTeX document building system for Grubber. + +This module is specific to Grubber and provides a class that encapsulates some +of the rubber internals. +""" +import subprocess +import os +import shlex +from msg import _, msg +from maker import Maker +from latex import Latex + + +class IndexBuilder: + """ + Index configuration data to set the index tool maker to use + """ + def __init__(self): + self.style = "" + self.tool = "" + self.lang = "" + +class LatexBuilder: + """ + Main (g)rubber wrapper hiding all the internals and compiling the + required tex file. + """ + def __init__(self): + # The actual workers + self.maker = Maker() + self.tex = Latex(self.maker) + self.maker.dep_append(self.tex) + + # What to do + self.backend = "pdftex" + self.format = "pdf" + self.index_style = "" + self.batch = 1 + self.encoding = "latin-1" + self.texpost = "" + self.options = "" + self.lang = "" + self.index = IndexBuilder() + + def set_format(self, format): + # Just record it + self.format = format + + def set_backend(self, backend): + self.backend = backend + +# def set_index_style(self, index_style): +# self.index_style = index_style + + def _texpost_call(self, source, msg): + if isinstance(self.texpost, str): + # Expect an external script + cmd = [self.texpost, source] + msg.log(" ".join(cmd)) + rc = subprocess.call(cmd, stdout=msg.stdout) + else: + # Expect a loaded python module + rc = self.texpost.main(source, msg.stdout) + return rc + + def compile(self, source): + self.tex.batch = self.batch + self.tex.encoding = self.encoding + self.tex.lang = self.lang + self.tex.set_source(source) + if self.options: + self.tex.opts += shlex.split(self.options) + + # Load the modules needed to produce the expected output format + if (self.format == "pdf"): + if (self.backend == "pdftex"): + self.tex.modules.register("pdftex") + elif (self.backend == "xetex"): + self.tex.modules.register("xetex") + else: + self.tex.modules.register("dvips") + self.tex.modules.register("ps2pdf") + elif (self.format == "ps"): + self.tex.modules.register("dvips") + + # Now load other the modules required to compile this file + self.tex.prepare() + + # Set the index configuration + if self.tex.modules.has_key("makeidx"): + idx = self.tex.modules["makeidx"] + if self.index.style: idx.do_style(self.index.style) + if self.index.tool: idx.do_tool(self.index.tool) + if self.index.lang: idx.do_language(self.index.lang) + + # Let's go... + rc = self.maker.make() + if rc != 0: + raise OSError("%s compilation failed" % self.tex.program) + + # Post process script to call? + if not(self.texpost): + return + + os.environ["LATEX"] = self.tex.program + rc = self._texpost_call(source, msg) + if rc == 1: + return + if rc != 0: + raise OSError("%s texpost failed" % self.texpost) + + rc = self.maker.make(force=1) + if rc != 0: + raise OSError("%s post compilation failed" % self.tex.program) + + def clean(self): + self.tex.clean() + self.reinit() + + def reinit(self): + self.tex.reinit() + self.maker.reinit() + self.maker.dep_append(self.tex) + + def print_errors(self): + msg.display_all(self.tex.get_errors(), writer=msg.write_stderr) + + def print_misschars(self): + # Show the characters not handled by fonts + self.tex.print_misschars() + diff --git a/lib/dbtexmf/dblatex/grubber/util.py b/lib/dbtexmf/dblatex/grubber/util.py index fa3bda6..289acaf 100644 --- a/lib/dbtexmf/dblatex/grubber/util.py +++ b/lib/dbtexmf/dblatex/grubber/util.py @@ -11,7 +11,9 @@ except ImportError: # Fallback for python 2.4: import md5 as hashlib import os -from msg import _, msg +from io import open + +from dbtexmf.dblatex.grubber.msg import _, msg def md5_file(fname): @@ -19,7 +21,7 @@ def md5_file(fname): Compute the MD5 sum of a given file. """ m = hashlib.md5() - file = open(fname) + file = open(fname, "rb") for line in file.readlines(): m.update(line) file.close() diff --git a/lib/dbtexmf/dblatex/grubber/util.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/util.py.enable-python3 new file mode 100644 index 0000000..fa3bda6 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/util.py.enable-python3 @@ -0,0 +1,57 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +This module contains utility functions and classes used by the main system and +by the modules for various tasks. +""" + +try: + import hashlib +except ImportError: + # Fallback for python 2.4: + import md5 as hashlib +import os +from msg import _, msg + + +def md5_file(fname): + """ + Compute the MD5 sum of a given file. + """ + m = hashlib.md5() + file = open(fname) + for line in file.readlines(): + m.update(line) + file.close() + return m.digest() + + +class Watcher: + """ + Watch for any changes of the files to survey, by checking the file MD5 sums. + """ + def __init__(self): + self.files = {} + + def watch(self, file): + if os.path.exists(file): + self.files[file] = md5_file(file) + else: + self.files[file] = None + + def update(self): + """ + Update the MD5 sums of all files watched, and return the name of one + of the files that changed, or None of they didn't change. + """ + changed = [] + for file in self.files.keys(): + if os.path.exists(file): + new = md5_file(file) + if self.files[file] != new: + msg.debug(_("%s MD5 checksum changed") % \ + os.path.basename(file)) + changed.append(file) + self.files[file] = new + return changed + diff --git a/lib/dbtexmf/dblatex/grubber/xetex.py b/lib/dbtexmf/dblatex/grubber/xetex.py index f39a941..63bfe9d 100644 --- a/lib/dbtexmf/dblatex/grubber/xetex.py +++ b/lib/dbtexmf/dblatex/grubber/xetex.py @@ -2,7 +2,7 @@ XeTeX support for Rubber. """ -from plugins import TexModule +from dbtexmf.dblatex.grubber.plugins import TexModule class Module (TexModule): def __init__ (self, doc, dict): diff --git a/lib/dbtexmf/dblatex/grubber/xetex.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/xetex.py.enable-python3 new file mode 100644 index 0000000..f39a941 --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/xetex.py.enable-python3 @@ -0,0 +1,13 @@ +""" +XeTeX support for Rubber. +""" + +from plugins import TexModule + +class Module (TexModule): + def __init__ (self, doc, dict): + doc.program = "xelatex" + doc.engine = "dvipdfmx" + doc.encoding = "utf8" + doc.set_format("pdf") + diff --git a/lib/dbtexmf/dblatex/grubber/xr-hyper.py b/lib/dbtexmf/dblatex/grubber/xr-hyper.py index d7bee5c..36f4761 100644 --- a/lib/dbtexmf/dblatex/grubber/xr-hyper.py +++ b/lib/dbtexmf/dblatex/grubber/xr-hyper.py @@ -9,9 +9,9 @@ file, so this support package registers these files as dependencies. """ import os -from msg import _, msg -from plugins import TexModule -from latex import Latex +from dbtexmf.dblatex.grubber.msg import _, msg +from dbtexmf.dblatex.grubber.plugins import TexModule +from dbtexmf.dblatex.grubber.latex import Latex class Module(TexModule): def __init__ (self, doc, dict): @@ -23,7 +23,7 @@ class Module(TexModule): # remember the engine used to build the main latex document self.texmodules = [] for m in ("pdftex", "xetex"): - if doc.modules.has_key(m): + if m in doc.modules: self.texmodules.append(m) # want to track each external document whose .aux is required diff --git a/lib/dbtexmf/dblatex/grubber/xr-hyper.py.enable-python3 b/lib/dbtexmf/dblatex/grubber/xr-hyper.py.enable-python3 new file mode 100644 index 0000000..d7bee5c --- /dev/null +++ b/lib/dbtexmf/dblatex/grubber/xr-hyper.py.enable-python3 @@ -0,0 +1,57 @@ +# This file is part of Rubber and thus covered by the GPL +# (c) Emmanuel Beffara, 2002--2006 +""" +Dependency analysis for the xr package. + +The xr package allows one to put references in one document to other +(external) LaTeX documents. It works by reading the external document's .aux +file, so this support package registers these files as dependencies. +""" +import os + +from msg import _, msg +from plugins import TexModule +from latex import Latex + +class Module(TexModule): + def __init__ (self, doc, dict): + # is the main Latex() document to compile + # is the maker engine + self.doc = doc + self.env = doc.env + + # remember the engine used to build the main latex document + self.texmodules = [] + for m in ("pdftex", "xetex"): + if doc.modules.has_key(m): + self.texmodules.append(m) + + # want to track each external document whose .aux is required + doc.parser.add_hook("externaldocument", self.externaldocument) + + def externaldocument (self, dict): + # .aux document needed to cross-ref with xr + auxfile = dict["arg"] + ".aux" + texfile = dict["arg"] + ".tex" + + # Ignore the dependency if no tex source found + if not(os.path.isfile(texfile)): + msg.log(_("file %s is required by xr package but not found")\ + % texfile, pkg="xr") + return + + # Ask to compile the related .tex file to have the .aux + texdep = Latex(self.env) + texdep.set_source(texfile) + texdep.batch = self.doc.batch + texdep.encoding = self.doc.encoding + texdep.draft_only = True # Final output not required here + for m in self.texmodules: + texdep.modules.register(m) + # Load other modules from source, except xr to avoid loops + texdep.prepare(exclude_mods=["xr-hyper"]) + + # Add the .aux as an expected input for compiling the doc + self.doc.sources[auxfile] = texdep + msg.log(_( + "dependency %s added for external references") % auxfile, pkg="xr") diff --git a/lib/dbtexmf/dblatex/rawparse.py b/lib/dbtexmf/dblatex/rawparse.py index a06a61d..9af17fa 100644 --- a/lib/dbtexmf/dblatex/rawparse.py +++ b/lib/dbtexmf/dblatex/rawparse.py @@ -1,7 +1,7 @@ import re -from texcodec import LatexCodec, TexCodec -from texhyphen import BasicHyphenator, UrlHyphenator +from dbtexmf.dblatex.texcodec import LatexCodec, TexCodec +from dbtexmf.dblatex.texhyphen import BasicHyphenator, UrlHyphenator def utf8(u): @@ -31,7 +31,7 @@ class RawLatexParser: self.hypof = re.compile(utf8(u"\u0371h")) def parse(self, line): - lout = "" + lout = b"" while (line): self.key_in.pos = line.find(self.key_in.key) self.key_out.pos = line.find(self.key_out.key) @@ -48,14 +48,14 @@ class RawLatexParser: line = line[key.pos + key.len:] else: text = line - line = "" + line = b"" if (text): if self.depth > 0: lout += self.translate(text) else: - text, hon = self.hypon.subn("", text) - text, hof = self.hypof.subn("", text) + text, hon = self.hypon.subn(b"", text) + text, hof = self.hypof.subn(b"", text) self.hyphenate += (hon - hof) lout += text diff --git a/lib/dbtexmf/dblatex/rawparse.py.enable-python3 b/lib/dbtexmf/dblatex/rawparse.py.enable-python3 new file mode 100644 index 0000000..a06a61d --- /dev/null +++ b/lib/dbtexmf/dblatex/rawparse.py.enable-python3 @@ -0,0 +1,89 @@ +import re + +from texcodec import LatexCodec, TexCodec +from texhyphen import BasicHyphenator, UrlHyphenator + + +def utf8(u): + return u.encode("utf8") + +class RawKey: + def __init__(self, key, incr): + self.key = key + self.depth = incr + self.pos = -1 + self.len = len(key) + +class RawLatexParser: + def __init__(self, + key_in=utf8(u"\u0370t"), key_out=utf8(u"\u0371t"), + codec=None, output_encoding="latin-1"): + self.key_in = RawKey(key_in, 1) + self.key_out = RawKey(key_out, -1) + self.depth = 0 + self.hyphenate = 0 + self.codec = codec or LatexCodec(output_encoding=output_encoding) + #self.hyphenator = BasicHyphenator(codec=self.codec) + self.hyphenator = UrlHyphenator(codec=self.codec) + + # hyphenation patterns + self.hypon = re.compile(utf8(u"\u0370h")) + self.hypof = re.compile(utf8(u"\u0371h")) + + def parse(self, line): + lout = "" + while (line): + self.key_in.pos = line.find(self.key_in.key) + self.key_out.pos = line.find(self.key_out.key) + + if (self.key_out.pos == -1 or + (self.key_in.pos >= 0 and + (self.key_in.pos < self.key_out.pos))): + key = self.key_in + else: + key = self.key_out + + if key.pos != -1: + text = line[:key.pos] + line = line[key.pos + key.len:] + else: + text = line + line = "" + + if (text): + if self.depth > 0: + lout += self.translate(text) + else: + text, hon = self.hypon.subn("", text) + text, hof = self.hypof.subn("", text) + self.hyphenate += (hon - hof) + lout += text + + if key.pos != -1: + self.depth += key.depth + + return lout + + def translate(self, text): + # Now hyphenate if needed + if self.hyphenate: + text = self.hyphenator.hyphenate(text) + else: + text = self.codec.decode(text) + text = self.codec.encode(text) + return text + + +class RawUtfParser(RawLatexParser): + "Just encode from UTF-8 without latex escaping" + + def __init__(self, codec=None, output_encoding="latin-1"): + texcodec = codec or TexCodec(output_encoding=output_encoding) + RawLatexParser.__init__(self, utf8(u"\u0370u"), utf8(u"\u0371u"), + texcodec) + + def translate(self, text): + # Currently no hyphenation stuff, just encode + text = self.codec.decode(text) + return self.codec.encode(text) + diff --git a/lib/dbtexmf/dblatex/rawtex.py b/lib/dbtexmf/dblatex/rawtex.py index cc70d25..be3daee 100644 --- a/lib/dbtexmf/dblatex/rawtex.py +++ b/lib/dbtexmf/dblatex/rawtex.py @@ -7,10 +7,11 @@ import sys import os import re +from io import open -from rawparse import RawLatexParser, RawUtfParser -from rawverb import VerbParser -from xetex.codec import XetexCodec +from dbtexmf.dblatex.rawparse import RawLatexParser, RawUtfParser +from dbtexmf.dblatex.rawverb import VerbParser +from dbtexmf.dblatex.xetex.codec import XetexCodec from dbtexmf.core.imagedata import * @@ -18,9 +19,9 @@ class RawLatex: "Main latex file parser" def __init__(self): self.figre = \ - re.compile(r"(\\includegraphics[\[]?|"\ - r"\\begin{overpic}|"\ - r"\\imgexits)[^{]*{([^}]*)}") + re.compile(br"(\\includegraphics[\[]?|"\ + br"\\begin{overpic}|"\ + br"\\imgexits)[^{]*{([^}]*)}") self.image = Imagedata() self.parsers = [] self.format = None @@ -35,7 +36,7 @@ class RawLatex: output_encoding = "utf8" codec = XetexCodec() elif not(output_encoding): - f = file(input) + f = open(input, "rt", encoding="latin-1") params = {} started = 0 for line in f: @@ -68,8 +69,8 @@ class RawLatex: def parse(self, input, output): self.set_parsers(input) - f = file(input) - o = file(output, "w") + f = open(input, "rb") + o = open(output, "wb") for line in f: if self.format: line = self.figconvert(line) @@ -95,7 +96,7 @@ class RawLatex: # If something done, replace the figure in the tex file if newfig != fig: - line = re.sub(r"{%s}" % fig, r"{%s}" % newfig, line) + line = re.sub(br"{%s}" % fig, br"{%s}" % newfig, line) return line diff --git a/lib/dbtexmf/dblatex/rawtex.py.enable-python3 b/lib/dbtexmf/dblatex/rawtex.py.enable-python3 new file mode 100644 index 0000000..cc70d25 --- /dev/null +++ b/lib/dbtexmf/dblatex/rawtex.py.enable-python3 @@ -0,0 +1,109 @@ +# +# Dblatex parser. Its role is to: +# - encode the raw tex file to the expected output encoding, taking care about +# the special characters to escape, +# - convert the document images to the appropriate format if needed +# +import sys +import os +import re + +from rawparse import RawLatexParser, RawUtfParser +from rawverb import VerbParser +from xetex.codec import XetexCodec +from dbtexmf.core.imagedata import * + + +class RawLatex: + "Main latex file parser" + def __init__(self): + self.figre = \ + re.compile(r"(\\includegraphics[\[]?|"\ + r"\\begin{overpic}|"\ + r"\\imgexits)[^{]*{([^}]*)}") + self.image = Imagedata() + self.parsers = [] + self.format = None + self.backend = None + + def set_fig_paths(self, paths): + self.image.paths = paths + + def set_parsers(self, input, output_encoding=""): + codec = None + if self.backend == "xetex": + output_encoding = "utf8" + codec = XetexCodec() + elif not(output_encoding): + f = file(input) + params = {} + started = 0 + for line in f: + if not(started): + if line.startswith("%%"): started = 1 + continue + if line.startswith("%%"): + break + p = line.split() + params[p[1]] = p[2] + output_encoding = params.get("latex.encoding", "latin-1") + + self.parsers = [VerbParser(output_encoding=output_encoding), + RawLatexParser(codec=codec, + output_encoding=output_encoding), + RawUtfParser(output_encoding=output_encoding)] + self.image.set_encoding(output_encoding or "latin-1") + + def set_format(self, format, backend=None): + # Adjust the actual format from backend + if (format == "pdf" and backend == "dvips"): + format = "ps" + self.format = format + self.backend = backend + self.image.set_format(format, backend) + + def fig_format(self, format): + # TODO: consistency check? + self.image.input_format = format + + def parse(self, input, output): + self.set_parsers(input) + f = file(input) + o = file(output, "w") + for line in f: + if self.format: + line = self.figconvert(line) + for p in self.parsers: + line = p.parse(line) + if not(line): + break + if line: + o.write(line) + o.close() + f.close() + + def figconvert(self, line): + # Is there one or more images included here + mlist = self.figre.findall(line) + if not(mlist): + return line + + # Try to convert each found image + for m in mlist: + fig = m[1] + newfig = self.image.convert(fig) + + # If something done, replace the figure in the tex file + if newfig != fig: + line = re.sub(r"{%s}" % fig, r"{%s}" % newfig, line) + + return line + + +def main(): + c = RawLatex() + c.set_fig_paths([os.getcwd()]) + c.parse(sys.argv[1], sys.argv[2]) + +if __name__ == "__main__": + main() diff --git a/lib/dbtexmf/dblatex/rawverb.py b/lib/dbtexmf/dblatex/rawverb.py index 36b30cf..49c0d0a 100644 --- a/lib/dbtexmf/dblatex/rawverb.py +++ b/lib/dbtexmf/dblatex/rawverb.py @@ -10,10 +10,11 @@ # if necessary. # import re +from io import open -from texcodec import TexCodec -from texcodec import tex_handler_counter -from rawparse import RawUtfParser +from dbtexmf.dblatex.texcodec import TexCodec +from dbtexmf.dblatex.texcodec import tex_handler_counter +from dbtexmf.dblatex.rawparse import RawUtfParser class VerbCodec(TexCodec): @@ -49,13 +50,13 @@ class VerbParser: def __init__(self, output_encoding="latin-1"): # The listing environment can be different from 'lstlisting' # but the rule is that it must begin with 'lst' - self.start_re = re.compile(r"\\begin{lst[^}]*}") - self.stop_re = re.compile(r"\\end{lst[^}]*}") - self.esc_re = re.compile(r"escapeinside={([^}]*)}{([^}]*)}") - self.block = "" + self.start_re = re.compile(br"\\begin{lst[^}]*}") + self.stop_re = re.compile(br"\\end{lst[^}]*}") + self.esc_re = re.compile(br"escapeinside={([^}]*)}{([^}]*)}") + self.block = b"" self.encoding = output_encoding - self.default_esc_start = "<:" - self.default_esc_stop = ":>" + self.default_esc_start = b"<:" + self.default_esc_stop = b":>" self.default_codec = VerbCodec(self.default_esc_start, self.default_esc_stop, output_encoding=output_encoding) @@ -79,13 +80,13 @@ class VerbParser: self.command = line[m.start():m.end()] line = line[m.end():] # By default, no escape sequence defined yet - self.esc_start = "" - self.esc_stop = "" - self.options = "" + self.esc_start = b"" + self.esc_stop = b"" + self.options = b"" # If there are some options, look for escape specs - if line[0] == "[": - e = line.find("]")+1 + if line[0] == b"[": + e = line.find(b"]")+1 self.options = line[:e] line = line[e:] m = self.esc_re.search(self.options) @@ -109,28 +110,28 @@ class VerbParser: # Add the escape option if necessary if not(self.esc_start) and c.get_errors() != 0: - escopt = "escapeinside={%s}{%s}" % (c.pre, c.post) + escopt = b"escapeinside={%s}{%s}" % (c.pre, c.post) if self.options: if self.options[-2] != ",": - escopt = "," + escopt + escopt = b"," + escopt self.options = self.options[:-1] + escopt + "]" else: - self.options = "[" + escopt + "]" + self.options = b"[" + escopt + b"]" block = self.command + self.options + text + line[m.start():] - self.block = "" + self.block = b"" return block def block_grow(self, line): self.block += line - return "" + return b"" def get_codec(self): # Something already specified if (self.esc_start): if self.esc_start != self.default_esc_start: return VerbCodec(self.esc_start, self.esc_stop, - "verbtex" + self.esc_start, + b"verbtex" + self.esc_start, output_encoding=self.encoding) else: return self.default_codec @@ -140,7 +141,7 @@ class VerbParser: iter = 0 i = self.block.find(s) while (i != -1): - s = "<" + str(iter) + ":" + s = b"<" + bytes(iter) + b":" i = self.block.find(s) iter += 1 @@ -148,16 +149,17 @@ class VerbParser: if (s == self.default_esc_start): return self.default_codec - return VerbCodec(s, self.default_esc_stop, "verbtex" + s, + return VerbCodec(s, self.default_esc_stop, b"verbtex" + s, output_encoding=self.encoding) if __name__ == "__main__": import sys v = VerbParser() - f = open(sys.argv[1]) + buf = getattr(sys.stdout, "buffer", sys.stdout) + f = open(sys.argv[1], "rb") for line in f: text = v.parse(line) if text: - sys.stdout.write(text) + buf.write(text) diff --git a/lib/dbtexmf/dblatex/rawverb.py.enable-python3 b/lib/dbtexmf/dblatex/rawverb.py.enable-python3 new file mode 100644 index 0000000..36b30cf --- /dev/null +++ b/lib/dbtexmf/dblatex/rawverb.py.enable-python3 @@ -0,0 +1,163 @@ +# +# The role of the verbatim parser is to encode properly from UTF-8 verbatim +# text to valid latin-1 text. Two goals must be met: +# +# - Just encode the characters, but don't escape latex characters like in normal +# text. This is why a dedicated latex encoder is used. +# - When the characters are translated to macros, escape the whole sequence +# to allow tex execute the macro embedded in verbatim text. +# - When the escape sequence is required, update the listing environment options +# if necessary. +# +import re + +from texcodec import TexCodec +from texcodec import tex_handler_counter +from rawparse import RawUtfParser + + +class VerbCodec(TexCodec): + def __init__(self, pre, post, errors="verbtex", + input_encoding="utf8", output_encoding="latin-1"): + self.pre = pre + self.post = post + self.output_encoding = output_encoding + TexCodec.__init__(self, input_encoding, output_encoding, + errors=errors, pre=pre, post=post) + + def decode(self, text): + global tex_handler_counter + ntext = TexCodec.decode(self, text) + if self.output_encoding != "utf8": + return ntext + + # Funnily listings cannot handle unicode characters greater than 255. + # The loop just escapes them by wrapping with
 and  and
+        # emulates the corresponding encoding exception
+        text = ""
+        n = tex_handler_counter[self._errors]
+        for c in ntext:
+            if ord(c) > 255:
+                c = self.pre + c + self.post
+                n += 1
+            text += c
+        tex_handler_counter[self._errors] = n
+        return text
+
+
+class VerbParser:
+    def __init__(self, output_encoding="latin-1"):
+        # The listing environment can be different from 'lstlisting'
+        # but the rule is that it must begin with 'lst'
+        self.start_re = re.compile(r"\\begin{lst[^}]*}")
+        self.stop_re = re.compile(r"\\end{lst[^}]*}")
+        self.esc_re = re.compile(r"escapeinside={([^}]*)}{([^}]*)}")
+        self.block = ""
+        self.encoding = output_encoding
+        self.default_esc_start = "<:"
+        self.default_esc_stop = ":>"
+        self.default_codec = VerbCodec(self.default_esc_start,
+                                       self.default_esc_stop,
+                                       output_encoding=output_encoding)
+
+    def parse(self, line):
+        if not(self.block):
+            m = self.start_re.search(line)
+            if not(m):
+                return line
+            else:
+                return self.parse_begin(line, m)
+        else:
+            m = self.stop_re.search(line)
+            if not(m):
+                return self.block_grow(line)
+            else:
+                return self.parse_end(line, m)
+
+    def parse_begin(self, line, m):
+        preblock = line[:m.start()]
+        self.command = line[m.start():m.end()]
+        line = line[m.end():]
+        # By default, no escape sequence defined yet
+        self.esc_start = ""
+        self.esc_stop = ""
+        self.options = ""
+
+        # If there are some options, look for escape specs
+        if line[0] == "[":
+            e = line.find("]")+1
+            self.options = line[:e]
+            line = line[e:]
+            m = self.esc_re.search(self.options)
+            if m:
+                self.esc_start = m.group(1)
+                self.esc_stop = m.group(2)
+
+        self.block_grow(line)
+        return preblock
+
+    def parse_end(self, line, m):
+        self.block_grow(line[:m.start()])
+
+        # The block is complete, find out the codec with escape sequence
+        c = self.get_codec()
+        c.clear_errors()
+
+        # Now, parse/encode the block
+        p = RawUtfParser(codec=c)
+        text = p.parse(self.block)
+
+        # Add the escape option if necessary
+        if not(self.esc_start) and c.get_errors() != 0:
+            escopt = "escapeinside={%s}{%s}" % (c.pre, c.post)
+            if self.options:
+                if self.options[-2] != ",":
+                    escopt = "," + escopt
+                self.options = self.options[:-1] + escopt + "]"
+            else:
+                self.options = "[" + escopt + "]"
+
+        block = self.command + self.options + text + line[m.start():]
+        self.block = ""
+        return block
+
+    def block_grow(self, line):
+        self.block += line
+        return ""
+
+    def get_codec(self):
+        # Something already specified
+        if (self.esc_start):
+            if self.esc_start != self.default_esc_start:
+                return VerbCodec(self.esc_start, self.esc_stop,
+                                 "verbtex" + self.esc_start,
+                                 output_encoding=self.encoding)
+            else:
+                return self.default_codec
+
+        # Find the starting escape sequence that does not occur in verbatim text
+        s = self.default_esc_start
+        iter = 0
+        i = self.block.find(s)
+        while (i != -1):
+            s = "<" + str(iter) + ":"
+            i = self.block.find(s)
+            iter += 1
+
+        # By luck the default is enough
+        if (s == self.default_esc_start):
+            return self.default_codec
+
+        return VerbCodec(s, self.default_esc_stop, "verbtex" + s,
+                         output_encoding=self.encoding)
+
+
+if __name__ == "__main__":
+    import sys
+    v = VerbParser()
+    f = open(sys.argv[1])
+    for line in f:
+        text = v.parse(line)
+        if text:
+            sys.stdout.write(text)
+
diff --git a/lib/dbtexmf/dblatex/runtex.py b/lib/dbtexmf/dblatex/runtex.py
index c89e43d..91a88ae 100644
--- a/lib/dbtexmf/dblatex/runtex.py
+++ b/lib/dbtexmf/dblatex/runtex.py
@@ -4,8 +4,9 @@
 import os
 import re
 import shutil
+from io import open
 
-from grubber.texbuilder import LatexBuilder
+from dbtexmf.dblatex.grubber.texbuilder import LatexBuilder
 
 
 class RunLatex:
@@ -86,16 +87,16 @@ class RunLatex:
         texout = root + "." + format
 
         # The temporary file contains the extra paths
-        f = file(tmptex, "w")
+        f = open(tmptex, "wt", encoding="latin-1")
         if self.fig_paths:
             paths = "{" + "//}{".join(self.fig_paths) + "//}"
-            f.write("\\makeatletter\n")
-            f.write("\\def\\input@path{%s}\n" % paths)
-            f.write("\\makeatother\n")
+            f.write(u"\\makeatletter\n")
+            f.write(u"\\def\\input@path{%s}\n" % paths)
+            f.write(u"\\makeatother\n")
 
         # Copy the original file and collect parameters embedded in the tex file
         self._clear_params()
-        input = file(texfile)
+        input = open(texfile, "rt", encoding="latin-1")
         for line in input:
             self._set_params(line)
             f.write(line)
diff --git a/lib/dbtexmf/dblatex/runtex.py.enable-python3 b/lib/dbtexmf/dblatex/runtex.py.enable-python3
new file mode 100644
index 0000000..c89e43d
--- /dev/null
+++ b/lib/dbtexmf/dblatex/runtex.py.enable-python3
@@ -0,0 +1,136 @@
+#
+# Basic module compiling a file with LaTeX
+#
+import os
+import re
+import shutil
+
+from grubber.texbuilder import LatexBuilder
+
+
+class RunLatex:
+    def __init__(self):
+        self.fig_paths = []
+        self.index_style = ""
+        self.backend = "pdftex"
+        self.texpost = ""
+        self.texer = LatexBuilder()
+
+    def set_fig_paths(self, paths):
+        # Assume the paths are already absolute
+        if not(paths):
+            return
+
+        # Use TEXINPUTS to handle paths containing spaces
+        paths_blank = []
+        paths_input = []
+        for p in paths:
+            if p.find(" ") != -1:
+                paths_blank.append(p + "//")
+            else:
+                paths_input.append(p)
+
+        if paths_blank:
+            texinputs = os.pathsep.join(paths_blank)
+            os.environ["TEXINPUTS"] = os.getenv("TEXINPUTS") + os.pathsep + \
+                                      texinputs
+
+        paths = paths_input
+
+        # Unixify the paths when under Windows
+        if os.sep != "/":
+            paths = [p.replace(os.sep, "/") for p in paths]
+
+        # Protect from tilde active char (maybe others?)
+        self.fig_paths = [p.replace("~", r"\string~") for p in paths]
+
+    def set_bib_paths(self, bibpaths, bstpaths=None):
+        # Just set BIBINPUTS and/or BSTINPUTS
+        if bibpaths:
+            os.environ["BIBINPUTS"] = os.pathsep.join(bibpaths +
+                                                   [os.getenv("BIBINPUTS", "")])
+        if bstpaths:
+            os.environ["BSTINPUTS"] = os.pathsep.join(bstpaths +
+                                                   [os.getenv("BSTINPUTS", "")])
+
+    def set_backend(self, backend):
+        if not(backend in ("dvips", "pdftex", "xetex")):
+            raise ValueError("'%s': invalid backend" % backend)
+        self.backend = backend
+
+    def get_backend(self):
+        return self.backend
+
+    def _clear_params(self):
+        self._param_started = 0
+        self._param_ended = 0
+        self._params = {}
+
+    def _set_params(self, line):
+        # FIXME
+        if self._param_ended:
+            return
+        if not(self._param_started):
+            if line.startswith("%%"): self._param_started = 1
+            return
+        if line.startswith("%%"):
+            self._param_ended = 1
+            return
+        # Expected format is: '%%  \n'
+        p = line.split(" ", 2)
+        self._params[p[1]] = p[2].strip()
+
+    def compile(self, texfile, binfile, format, batch=1):
+        root = os.path.splitext(texfile)[0]
+        tmptex = root + "_tmp" + ".tex"
+        texout = root + "." + format
+
+        # The temporary file contains the extra paths
+        f = file(tmptex, "w")
+        if self.fig_paths:
+            paths = "{" + "//}{".join(self.fig_paths) + "//}"
+            f.write("\\makeatletter\n")
+            f.write("\\def\\input@path{%s}\n" % paths)
+            f.write("\\makeatother\n")
+
+        # Copy the original file and collect parameters embedded in the tex file
+        self._clear_params()
+        input = file(texfile)
+        for line in input:
+            self._set_params(line)
+            f.write(line)
+        f.close()
+        input.close()
+
+        # Replace the original file with the new one
+        shutil.move(tmptex, texfile)
+
+        # Build the output file
+        try:
+            self.texer.batch = batch
+            self.texer.texpost = self.texpost
+            self.texer.encoding = self._params.get("latex.encoding", "latin-1")
+            self.texer.options = self._params.get("latex.engine.options")
+            self.texer.lang = self._params.get("document.language")
+            self.texer.set_format(format)
+            self.texer.set_backend(self.backend)
+            if self.index_style:
+                self.texer.index.style = self.index_style
+            self.texer.index.tool = self._params.get("latex.index.tool")
+            self.texer.index.lang = self._params.get("latex.index.language")
+            self.texer.compile(texfile)
+            self.texer.print_misschars()
+        except:
+            # On error, dump the log errors and raise again
+            self.texer.print_errors()
+            raise
+
+        if texout != binfile:
+            shutil.move(texout, binfile)
+
+    def clean(self):
+        self.texer.clean()
+
+    def reinit(self):
+        self.texer.reinit()
+ 
diff --git a/lib/dbtexmf/dblatex/texcodec.py b/lib/dbtexmf/dblatex/texcodec.py
index 3dfab6b..67e28b8 100644
--- a/lib/dbtexmf/dblatex/texcodec.py
+++ b/lib/dbtexmf/dblatex/texcodec.py
@@ -2,9 +2,11 @@
 # The Latex Codec handles the encoding from UFT-8 text to latin1
 # latex compatible text.
 #
+from __future__ import print_function
+
 import re
 import codecs
-import unient
+import dbtexmf.dblatex.unient as unient
 
 # Dictionnary of the handlers installed
 tex_handler_installed = {}
@@ -21,7 +23,7 @@ def latex_char_replace(exc, pre, post, name):
         try:
             l.append(unient.unicode_map[ord(c)])
         except KeyError:
-            print "Missing character &#x%x;" % ord(c)
+            print("Missing character &#x%x;" % ord(c))
             l.append(u"\&\#x%x;" % ord(c))
         if post: l.append(post)
         n = n + 1
@@ -32,21 +34,21 @@ def latex_char_replace(exc, pre, post, name):
 class TexCodec:
     # This mapping for characters < 256 seems enough for latin1 output
     charmap = {
-              "\xa0": r"~",
-              # "\xa2": r"\textcent{}",
-              # "\xa4": r"\textcurrency{}",
-              "\xa5": r"$\yen$",
-              # "\xa6": r"\textbrokenbar{}",
-              "\xac": r"\ensuremath{\lnot}",
-              # "\xad": r"", # FIXME: bug around soft hyphen...
-              "\xb0": r"\textdegree{}",
-              "\xb1": r"\ensuremath{\pm}",
-              "\xb2": r"$^2$",
-              "\xb3": r"$^3$",
-              "\xb5": r"$\mathrm{\mu}$",
-              "\xb9": r"$^1$",
-              "\xd7": r"$\times$",
-              "\xf7": r"$\div$"
+              b"\xa0": br"~",
+              # b"\xa2": br"\textcent{}",
+              # b"\xa4": br"\textcurrency{}",
+              b"\xa5": br"$\yen$",
+              # b"\xa6": br"\textbrokenbar{}",
+              b"\xac": br"\ensuremath{\lnot}",
+              # "\xad": br"", # FIXME: bug around soft hyphen...
+              b"\xb0": br"\textdegree{}",
+              b"\xb1": br"\ensuremath{\pm}",
+              b"\xb2": br"$^2$",
+              b"\xb3": br"$^3$",
+              b"\xb5": br"$\mathrm{\mu}$",
+              b"\xb9": br"$^1$",
+              b"\xd7": br"$\times$",
+              b"\xf7": br"$\div$"
               }
 
     def __init__(self, input_encoding="utf8", output_encoding="latin-1",
@@ -60,7 +62,7 @@ class TexCodec:
             self.charmap = {}
             return
 
-        if not(tex_handler_installed.has_key(self._errors)):
+        if not(self._errors in tex_handler_installed):
             f = self.build_error_func(pre, post, errors)
             codecs.register_error(self._errors, f)
             tex_handler_installed[self._errors] = f
@@ -121,19 +123,20 @@ class LatexCodec(TexCodec):
             text = text.replace(c, v)
 
         # Things are done, complete with {}
-        text = text.replace(r"\textbackslash", r"\textbackslash{}")
+        text = text.replace(br"\textbackslash", br"\textbackslash{}")
         return text
 
 
 def main():
     import sys
     c = LatexCodec()
-    f = open(sys.argv[1])
-    text = ""
+    buf = getattr(sys.stdout, "buffer", sys.stdout)
+    f = open(sys.argv[1], "rb")
+    text = "" if buf == sys.stdout else b""
     for line in f:
         text += c.encode(c.decode(line))
         if text:
-            sys.stdout.write(text)
+            buf.write(text)
 
 
 if __name__ == "__main__":
diff --git a/lib/dbtexmf/dblatex/texcodec.py.enable-python3 b/lib/dbtexmf/dblatex/texcodec.py.enable-python3
new file mode 100644
index 0000000..3dfab6b
--- /dev/null
+++ b/lib/dbtexmf/dblatex/texcodec.py.enable-python3
@@ -0,0 +1,140 @@
+#
+# The Latex Codec handles the encoding from UFT-8 text to latin1
+# latex compatible text.
+#
+import re
+import codecs
+import unient
+
+# Dictionnary of the handlers installed
+tex_handler_installed = {}
+tex_handler_counter = {}
+
+def latex_char_replace(exc, pre, post, name):
+    global tex_handler_counter
+    if not isinstance(exc, UnicodeEncodeError):
+        raise TypeError("don't know how to handle %r" % exc)
+    l = []
+    n = tex_handler_counter[name]
+    for c in exc.object[exc.start:exc.end]:
+        if pre: l.append(pre)
+        try:
+            l.append(unient.unicode_map[ord(c)])
+        except KeyError:
+            print "Missing character &#x%x;" % ord(c)
+            l.append(u"\&\#x%x;" % ord(c))
+        if post: l.append(post)
+        n = n + 1
+    tex_handler_counter[name] = n
+    return (u"".join(l), exc.end)
+
+
+class TexCodec:
+    # This mapping for characters < 256 seems enough for latin1 output
+    charmap = {
+              "\xa0": r"~",
+              # "\xa2": r"\textcent{}",
+              # "\xa4": r"\textcurrency{}",
+              "\xa5": r"$\yen$",
+              # "\xa6": r"\textbrokenbar{}",
+              "\xac": r"\ensuremath{\lnot}",
+              # "\xad": r"", # FIXME: bug around soft hyphen...
+              "\xb0": r"\textdegree{}",
+              "\xb1": r"\ensuremath{\pm}",
+              "\xb2": r"$^2$",
+              "\xb3": r"$^3$",
+              "\xb5": r"$\mathrm{\mu}$",
+              "\xb9": r"$^1$",
+              "\xd7": r"$\times$",
+              "\xf7": r"$\div$"
+              }
+
+    def __init__(self, input_encoding="utf8", output_encoding="latin-1",
+                 errors="latexcharreplace", pre="", post=""):
+        self._errors = errors
+        self._decode = codecs.getdecoder(input_encoding)
+        self._encode = codecs.getencoder(output_encoding)
+
+        # No different output encoding expected?
+        if input_encoding == output_encoding:
+            self.charmap = {}
+            return
+
+        if not(tex_handler_installed.has_key(self._errors)):
+            f = self.build_error_func(pre, post, errors)
+            codecs.register_error(self._errors, f)
+            tex_handler_installed[self._errors] = f
+            self.clear_errors()
+
+    def clear_errors(self):
+        tex_handler_counter[self._errors] = 0
+
+    def get_errors(self):
+        return tex_handler_counter[self._errors]
+
+    def build_error_func(self, pre="", post="", errors="charrep"):
+        return lambda exc: latex_char_replace(exc, pre, post, errors)
+
+    def decode(self, text):
+        return self._decode(text)[0]
+
+    def encode(self, text):
+        text = self._encode(text, self._errors)[0]
+        for c, v in self.charmap.items():
+            text = text.replace(c, v)
+        return text
+ 
+
+class LatexCodec(TexCodec):
+    def __init__(self, input_encoding="utf8", output_encoding="latin-1"):
+        TexCodec.__init__(self, input_encoding, output_encoding)
+
+        self.texres = (
+            # Kind of normalize
+            (re.compile("^[\s\n]*$"), r" "),
+            # TeX escapes (the order is important)
+            (re.compile(r"([{}%_^$&#])"), r"\\\1"),
+            # '<' and '>' in the list to avoid french quotation mark symptoms
+            (re.compile(r"([-^<>])"), r"\1{}"),
+            # backtick (`) must not be confused with ‘
+            (re.compile(r"`"), r"\\`{}"),
+            # tilde (~) must not be confused with  
+            (re.compile(r"~"), r"\\textasciitilde{}"))
+
+    def _texescape(self, text):
+        for r, s in self.texres:
+            text = r.sub(s, text)
+        return text
+
+    def encode(self, text):
+        # Preliminary backslash substitution
+        text = text.replace("\\", r"\textbackslash")
+
+        # Basic TeX escape
+        text = self._texescape(text)
+
+        # Encode UTF-8 -> Latin-1 + latex specific
+        text = self._encode(text, self._errors)[0]
+
+        # Special Character Mapping
+        for c, v in self.charmap.items():
+            text = text.replace(c, v)
+
+        # Things are done, complete with {}
+        text = text.replace(r"\textbackslash", r"\textbackslash{}")
+        return text
+
+
+def main():
+    import sys
+    c = LatexCodec()
+    f = open(sys.argv[1])
+    text = ""
+    for line in f:
+        text += c.encode(c.decode(line))
+        if text:
+            sys.stdout.write(text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lib/dbtexmf/dblatex/texhyphen.py b/lib/dbtexmf/dblatex/texhyphen.py
index ab3545b..b28e6f8 100644
--- a/lib/dbtexmf/dblatex/texhyphen.py
+++ b/lib/dbtexmf/dblatex/texhyphen.py
@@ -2,6 +2,8 @@
 # dblatex - Hyphenation classes to provide smart hyphenation of path like
 # strings
 #
+from __future__ import print_function
+
 import re
 
 class Hyphenator:
@@ -18,14 +20,14 @@ class BasicHyphenator(Hyphenator):
     """
     def __init__(self, codec=None):
         self.codec = codec
-        self.hyphenchar = "\-"
+        self.hyphenchar = b"\-"
 
     def hyphenate(self, text):
         if self.codec: text = self.codec.decode(text)
         ntext = "\1".join(list(text))
         if self.codec: ntext = self.codec.encode(ntext)
-        ntext = re.sub("\1? \1?", " ", ntext)
-        ntext = ntext.replace("\1", self.hyphenchar)
+        ntext = re.sub(b"\1? \1?", b" ", ntext)
+        ntext = ntext.replace(b"\1", self.hyphenchar)
         return ntext
 
 
@@ -49,7 +51,7 @@ class UrlHyphenator(Hyphenator):
     existing latex styles.
     """
     def __init__(self, codec=None,
-                 h_sep="\penalty0 ", h_char="\penalty5000 ",
+                 h_sep=b"\penalty0 ", h_char=b"\penalty5000 ",
                  h_start=3, h_stop=3):
         self.codec = codec
         self.seps = r":/\@=?#;-."
@@ -84,17 +86,17 @@ class UrlHyphenator(Hyphenator):
                     nw += "\1".join(list(hword))
                     nw += w[-self.h_stop:]
                     nw = self._translate(nw)
-                    nw = re.sub("\1? \1?", " ", nw)
-                    nw = nw.replace("\1", self.h_char)
+                    nw = re.sub(b"\1? \1?", b" ", nw)
+                    nw = nw.replace(b"\1", self.h_char)
                     vtext.append(nw)
 
-        ntext = "".join(vtext)
+        ntext = b"".join(vtext)
         return ntext
 
 
 if __name__ == "__main__":
-    url = "http://www.fg/foobar fun#fght/fkkkf.tz?id=123"
+    url = b"http://www.fg/foobar fun#fght/fkkkf.tz?id=123"
     h1 = BasicHyphenator()
     h2 = UrlHyphenator()
-    print h1.hyphenate(url)
-    print h2.hyphenate(url)
+    print(h1.hyphenate(url))
+    print(h2.hyphenate(url))
diff --git a/lib/dbtexmf/dblatex/texhyphen.py.enable-python3 b/lib/dbtexmf/dblatex/texhyphen.py.enable-python3
new file mode 100644
index 0000000..ab3545b
--- /dev/null
+++ b/lib/dbtexmf/dblatex/texhyphen.py.enable-python3
@@ -0,0 +1,100 @@
+#
+# dblatex - Hyphenation classes to provide smart hyphenation of path like
+# strings
+#
+import re
+
+class Hyphenator:
+    def __init__(self, codec=None):
+        pass
+    
+    def hyphenate(self, text):
+        return text
+
+
+class BasicHyphenator(Hyphenator):
+    """
+    Hyphenates basically by putting an hyphenation point between each character.
+    """
+    def __init__(self, codec=None):
+        self.codec = codec
+        self.hyphenchar = "\-"
+
+    def hyphenate(self, text):
+        if self.codec: text = self.codec.decode(text)
+        ntext = "\1".join(list(text))
+        if self.codec: ntext = self.codec.encode(ntext)
+        ntext = re.sub("\1? \1?", " ", ntext)
+        ntext = ntext.replace("\1", self.hyphenchar)
+        return ntext
+
+
+class UrlHyphenator(Hyphenator):
+    """
+    Hyphenates  so that cutting is easier on URL separators.
+    The hyphen chars are expected to be void to prevent from spurious
+    characters in displayed filenames or URLs.
+
+    The pathname words can be cut only after the  first characters
+    and before the  characters to avoid a cut just after one or two
+    chars.
+
+    Tip: the inter-chars can be defined with macros \HO and \HL, to be shorter
+    like:
+
+    \def\HL{\penalty9999} (h_char="\HL")
+    \def\HO{\penalty5000} (h_sep="\HO")
+
+    By default these shortcuts are not used to avoid some macro declaration in
+    existing latex styles.
+    """
+    def __init__(self, codec=None,
+                 h_sep="\penalty0 ", h_char="\penalty5000 ",
+                 h_start=3, h_stop=3):
+        self.codec = codec
+        self.seps = r":/\@=?#;-."
+        self.h_sep = h_sep
+        self.h_char = h_char
+        self.h_start = (h_start-1)
+        self.h_stop = (h_stop-1)
+
+    def _translate(self, text):
+        if self.codec:
+            return self.codec.encode(text)
+        else:
+            return text
+
+    def hyphenate(self, text):
+        if self.codec: text = self.codec.decode(text)
+
+        vtext = []
+        p = "([%s])" % re.escape(self.seps)
+        words = re.split(p, text)
+        for w in words:
+            if not(w):
+                continue
+            if w in self.seps:
+                vtext.append(self._translate(w) + self.h_sep)
+            else:
+                hword = w[self.h_start:-self.h_stop]
+                if len(hword) < 2:
+                    vtext.append(self._translate(w))
+                else:
+                    nw = w[:self.h_start]
+                    nw += "\1".join(list(hword))
+                    nw += w[-self.h_stop:]
+                    nw = self._translate(nw)
+                    nw = re.sub("\1? \1?", " ", nw)
+                    nw = nw.replace("\1", self.h_char)
+                    vtext.append(nw)
+
+        ntext = "".join(vtext)
+        return ntext
+
+
+if __name__ == "__main__":
+    url = "http://www.fg/foobar fun#fght/fkkkf.tz?id=123"
+    h1 = BasicHyphenator()
+    h2 = UrlHyphenator()
+    print h1.hyphenate(url)
+    print h2.hyphenate(url)
diff --git a/lib/dbtexmf/dblatex/xetex/codec.py b/lib/dbtexmf/dblatex/xetex/codec.py
index 9ef35b9..b4aad2b 100644
--- a/lib/dbtexmf/dblatex/xetex/codec.py
+++ b/lib/dbtexmf/dblatex/xetex/codec.py
@@ -3,7 +3,7 @@ import os
 import codecs
 
 from dbtexmf.dblatex.texcodec import LatexCodec
-from fsencoder import FontSpecEncoder
+from dbtexmf.dblatex.xetex.fsencoder import FontSpecEncoder
 
 
 class XetexCodec(LatexCodec):
diff --git a/lib/dbtexmf/dblatex/xetex/codec.py.enable-python3 b/lib/dbtexmf/dblatex/xetex/codec.py.enable-python3
new file mode 100644
index 0000000..9ef35b9
--- /dev/null
+++ b/lib/dbtexmf/dblatex/xetex/codec.py.enable-python3
@@ -0,0 +1,75 @@
+import sys
+import os
+import codecs
+
+from dbtexmf.dblatex.texcodec import LatexCodec
+from fsencoder import FontSpecEncoder
+
+
+class XetexCodec(LatexCodec):
+    def __init__(self, fontconfig="", pre="", post=""):
+        # Use the default TeX escapes, and encoding method
+        LatexCodec.__init__(self, input_encoding="utf8",
+                            output_encoding="utf8")
+
+        # XeTeX font manager
+        if not(fontconfig):
+            fontconfig = os.getenv("DBLATEX_FONTSPEC_FILE", "xefont.xml")
+
+        # If not proper fontconfig, fallback to default behaviour
+        try:
+            self._fontmgr = FontSpecEncoder(fontconfig)
+        except:
+            self._fontmgr = None
+            return
+
+        # Ignore the special characters \1 and \2 used as specific
+        # substitution characters
+        self._fontmgr.ignorechars("\1\2\r")
+
+    def clear_errors(self):
+        pass
+
+    def get_errors(self):
+        pass
+
+    def decode(self, text):
+        return self._decode(text)[0]
+
+    def encode(self, text):
+        # If no font manager, behaves as the default latex codec
+        if not(self._fontmgr):
+            return LatexCodec.encode(self, text)
+
+        # Preliminary backslash substitution
+        text = text.replace("\\", "\2")
+
+        # Consider that each text sequence is in his own tex group
+        self._fontmgr.reset()
+
+        # Handle fonts for this Unicode string. We build a list of
+        # strings, where each string is handled by a new font
+        switchfonts = []
+        for c in text:
+            font, char = self._fontmgr.encode(c)
+            # A new font, or empty switchfont list
+            if font or not(switchfonts):
+                sf = [font, char]
+                switchfonts.append(sf)
+            else:
+            # No new font, so extend the current string
+                sf[1] += char
+
+        # Merge each part, after escaping each string
+        text = ""
+        for sf in switchfonts:
+            sf[1] = self._texescape(sf[1])
+            text += "".join(sf)
+
+        # Encode for string output
+        text = self._encode(text)[0]
+
+        # Things are done, substitute the '\'
+        text = text.replace("\2", r"\textbackslash{}")
+        return "{" + text + "}"
+
diff --git a/lib/dbtexmf/dblatex/xetex/fcfallback.py b/lib/dbtexmf/dblatex/xetex/fcfallback.py
index dea9ea2..29c1a44 100644
--- a/lib/dbtexmf/dblatex/xetex/fcfallback.py
+++ b/lib/dbtexmf/dblatex/xetex/fcfallback.py
@@ -1,5 +1,7 @@
-from fontspec import FontSpec
-from fcmanager import FcManager
+from __future__ import print_function
+
+from dbtexmf.dblatex.xetex.fontspec import FontSpec
+from dbtexmf.dblatex.xetex.fcmanager import FcManager
 
 
 class DefaultFontSpec(FontSpec):
@@ -54,8 +56,8 @@ class FcFallbackFontSpec(DefaultFontSpec):
         for fontspec in self.fontspecs:
 
             if fontspec in self.fcmissed:
-                print "Specified font '%s' is missing in the system!" % \
-                      (fontspec.mainfont())
+                print("Specified font '%s' is missing in the system!" % \
+                      (fontspec.mainfont()))
                 continue
 
             fcfont = self.fccache.get(fontspec.mainfont()) or \
diff --git a/lib/dbtexmf/dblatex/xetex/fcfallback.py.enable-python3 b/lib/dbtexmf/dblatex/xetex/fcfallback.py.enable-python3
new file mode 100644
index 0000000..dea9ea2
--- /dev/null
+++ b/lib/dbtexmf/dblatex/xetex/fcfallback.py.enable-python3
@@ -0,0 +1,104 @@
+from fontspec import FontSpec
+from fcmanager import FcManager
+
+
+class DefaultFontSpec(FontSpec):
+    """
+    The default fontspec gives priority to its children, and 
+    contains any character.
+    """
+    def __init__(self):
+        FontSpec.__init__(self, subfont_first=True)
+    
+    def contains(self, char):
+        return True
+
+
+class FcFallbackFontSpec(DefaultFontSpec):
+    """
+    Default fontspec that finds fonts from fontconfig 
+    if the preexisting fontspecs don't match.
+
+    Currently this class is the only interface between the
+    two worlds (fontspec and fontconfig).
+    """
+    def __init__(self):
+        DefaultFontSpec.__init__(self)
+        self.fcmanager = FcManager()
+        self.fccache = {}
+        self.fcmissed = []
+        try:
+            self.fcmanager.build_fonts(partial=True)
+        except:
+            self.fcmanager = None
+    
+    def _loghas(self, id, char):
+        pass 
+
+    def _loghas2(self, id, char):
+        DefaultFontSpec._loghas(self, id, char)
+
+    def match(self, char, excluded=None):
+        fontspec = DefaultFontSpec.match(self, char, excluded)
+        if fontspec != self or not(self.fcmanager):
+            self._loghas2(fontspec.id, char)
+            return fontspec
+
+        if self.isignored(char):
+            self._loghas2(self.id, char)
+            return self
+
+        # Scan again the predefined fontspecs and check with fontconfig
+        # if their charset can be extended
+
+        for fontspec in self.fontspecs:
+
+            if fontspec in self.fcmissed:
+                print "Specified font '%s' is missing in the system!" % \
+                      (fontspec.mainfont())
+                continue
+
+            fcfont = self.fccache.get(fontspec.mainfont()) or \
+                     self.fcmanager.get_font(fontspec.mainfont())
+
+            if not(fcfont):
+                self.fcmissed.append(fontspec)
+                continue
+
+            if fcfont.has_char(char):
+                fontspec.add_char(char)
+                self._loghas2(fontspec.id + "[fc]", char)
+                return fontspec
+
+        # Find the first fcfont that has this char in its charset
+        fcfonts = {}
+        for font_type in ("serif", "sans-serif", "monospace"):
+            fcfonts[font_type] = self.fcmanager.get_font_handling(char,
+                                                         family_type=font_type)
+        # FIXME: attribuer les autres fonts si font nexiste pas dans le type
+        if not(fcfont):
+            self._loghas2(self.id + "[?fc]", char)
+            return self
+
+        # Extend the fontspec group
+        fontspec = self.spawn_fontspec_from_fcfonts(fcfonts, char)
+        self._loghas2(fontspec.id + "[A fc]", char)
+        return fontspec
+
+    def spawn_fontspec_from_fcfonts(self, fcfonts, char):
+        self.log.info("New fontspec '%s' matching U%X from fontconfig"\
+              % (fcfonts["serif"].family, ord(char)))
+        # Create a new font
+        fontspec = FontSpec()
+        fontspec.id = fcfont.family
+        fontspec.transitions["enter"]["main"] = fcfonts["serif"].family
+        fontspec.transitions["enter"]["sans"] = fcfonts["sans-serif"].family
+        fontspec.transitions["enter"]["mono"] = fcfonts["monospace"].family
+        fontspec.add_char(char)
+        fontspec.add_ignored(self._ignored)
+        # Register the font and its related fontconfig object
+        for fcfont in fcfonts.values():
+            self.fccache[fcfont.name] = fcfont
+        self.add_subfont(fontspec)
+        return fontspec
+
diff --git a/lib/dbtexmf/dblatex/xetex/fcmanager.py b/lib/dbtexmf/dblatex/xetex/fcmanager.py
index b972270..91ed6da 100644
--- a/lib/dbtexmf/dblatex/xetex/fcmanager.py
+++ b/lib/dbtexmf/dblatex/xetex/fcmanager.py
@@ -5,12 +5,15 @@
 # An efficient solution should use some python bindings to directly call the
 # C fontconfig library.
 #
+import sys
 import logging
 from subprocess import Popen, PIPE
 
 def execute(cmd):
     p = Popen(cmd, stdout=PIPE)
     data = p.communicate()[0]
+    if isinstance(data, bytes):
+        data = data.decode(sys.getdefaultencoding())
     rc = p.wait()
     if rc != 0:
         raise OSError("'%s' failed (%d)" % (" ".join(cmd), rc))
diff --git a/lib/dbtexmf/dblatex/xetex/fcmanager.py.enable-python3 b/lib/dbtexmf/dblatex/xetex/fcmanager.py.enable-python3
new file mode 100644
index 0000000..b972270
--- /dev/null
+++ b/lib/dbtexmf/dblatex/xetex/fcmanager.py.enable-python3
@@ -0,0 +1,230 @@
+# 
+# Slow interface to fontconfig for Dblatex, that only parses some commmand line
+# output to store the fonts available on the system and their characteristics.
+#
+# An efficient solution should use some python bindings to directly call the
+# C fontconfig library.
+#
+import logging
+from subprocess import Popen, PIPE
+
+def execute(cmd):
+    p = Popen(cmd, stdout=PIPE)
+    data = p.communicate()[0]
+    rc = p.wait()
+    if rc != 0:
+        raise OSError("'%s' failed (%d)" % (" ".join(cmd), rc))
+    return data
+
+
+class FcFont:
+    """
+    Font Object with properties filled with the fc-match command output.
+    """
+    def __init__(self, fontnames, partial=False):
+        self.log = logging.getLogger("dblatex")
+        self.name = fontnames[0]
+        self.aliases = fontnames[1:]
+        self._completed = False
+        if not(partial):
+            self.complete()
+
+    def complete(self):
+        if not(self._completed):
+            d = execute(["fc-match", "--verbose", self.name])
+            d = d.strip()
+            self._build_attr_from(d)
+            self._completed = True
+
+    def _build_attr_from(self, data):
+        ninfos = self._splitinfos(data)
+
+        # Remove the first line
+        ninfos[0] = ninfos[0].split("\n")[1]
+        for i in ninfos:
+            if i: self._buildattr(i)
+        
+        # Check the consistency
+        if self.family != self.name.replace("\-", "-"):
+            raise ValueError("Unknown font '%s' vs '%s'" % (self.name,
+            self.family))
+
+    def _splitinfos(self, data):
+        ninfos = [data]
+        for sep in ("(s)", "(w)", "(=)"):
+            infos = ninfos
+            ninfos = []
+            for i in infos:
+                ni = i.split(sep)
+                ninfos += ni
+        return ninfos
+
+    def _buildattr(self, infos):
+        """
+        Parse things like:
+           'fullname: "Mukti"(s)
+            fullnamelang: "en"(s)
+            slant: 0(i)(s)
+            weight: 80(i)(s)
+            width: 100(i)(s)
+            size: 12(f)(s)'
+        """
+        try:
+            attrname, attrdata = infos.split(":", 1)
+        except:
+            # Skip this row
+            self.log.warning("Wrong data? '%s'" % infos)
+            return
+        
+        #print infos
+        attrname = attrname.strip() # Remove \t
+        attrdata = attrdata.strip() # Remove space
+
+        # Specific case
+        if attrname == "charset":
+            self._build_charset(attrdata)
+            return
+
+        # Get the data type
+        if (not(attrdata) or (attrdata[0] == '"' and attrdata[-1] == '"')):
+            setattr(self, attrname, attrdata.strip('"'))
+            return
+        
+        if (attrdata.endswith("(i)")):
+            setattr(self, attrname, int(attrdata.strip("(i)")))
+            return
+
+        if (attrdata.endswith("(f)")):
+            setattr(self, attrname, float(attrdata.strip("(f)")))
+            return
+
+        if (attrdata == "FcTrue"):
+            setattr(self, attrname, True)
+            return
+
+        if (attrdata == "FcFalse"):
+            setattr(self, attrname, False)
+            return
+
+    def _build_charset(self, charset):
+        """
+        Parse something like:
+           '0000: 00000000 ffffffff ffffffff 7fffffff 00000000 00002001 00800000 00800000
+            0009: 00000000 00000000 00000000 00000030 fff99fee f3c5fdff b080399f 07ffffcf
+            0020: 30003000 00000000 00000010 00000000 00000000 00001000 00000000 00000000
+            0025: 00000000 00000000 00000000 00000000 00000000 00000000 00001000 00000000'
+        """
+        self.charsetstr = charset
+        self.charset = []
+        lines = charset.split("\n")
+        for l in lines:
+            umajor, row = l.strip().split(":", 1)
+            int32s = row.split()
+            p = 0
+            for w in int32s:
+                #print "=> %s" % w
+                v = int(w, 16)
+                for i in range(0, 32):
+                    m = 1 << i
+                    #m = 0x80000000 >> i
+                    if (m & v):
+                        uchar = umajor + "%02X" % (p + i)
+                        #print uchar
+                        self.charset.append(int(uchar, 16))
+                p += 32
+
+    def remove_char(self, char):
+        try:
+            self.charset.remove(char)
+        except:
+            pass
+
+    def has_char(self, char):
+        #print self.family, char, self.charset
+        return (ord(char) in self.charset)
+
+
+class FcManager:
+    """
+    Collect all the fonts available in the system. The building can be partial,
+    i.e. the font objects can be partially created, and updated later (when
+    used).
+
+    The class tries to build three ordered list of fonts, one per standard
+    generic font family:
+    - Serif      : main / body font
+    - Sans-serif : used to render sans-serif forms
+    - Monospace  : used to render verbatim / monospace characters
+    """
+    def __init__(self):
+        self.log = logging.getLogger("dblatex")
+        self.fonts = {}
+        self.fonts_family = {}
+
+    def get_font(self, fontname):
+        font = self.fonts.get(fontname)
+        if font:
+            font.complete()
+        return font
+
+    def get_font_handling(self, char, all=False, family_type=""):
+        if not(family_type):
+            font_family = self.fonts.values()
+        else:
+            font_family = self.fonts_family.get(family_type, None)
+        
+        if not(font_family):
+            return []
+
+        fonts = self.get_font_handling_from(font_family, char, all=all)
+        return fonts
+
+    def get_font_handling_from(self, fontlist, char, all=False):
+        fonts = []
+        # Brutal method to get something...
+        for f in fontlist:
+            f.complete()
+            if f.has_char(char):
+                if all:
+                    fonts.append(f)
+                else:
+                    return f
+        return fonts
+
+    def build_fonts(self, partial=False):
+        self.build_fonts_all(partial=partial)
+        self.build_fonts_family("serif")
+        self.build_fonts_family("sans-serif")
+        self.build_fonts_family("monospace")
+
+    def build_fonts_all(self, partial=False):
+        # Grab all the fonts installed on the system
+        d = execute(["fc-list"])
+        fonts = d.strip().split("\n")
+        for f in fonts:
+            fontnames = f.split(":")[0].split(",")
+            mainname = fontnames[0]
+            if not(mainname):
+                continue
+            if self.fonts.get(mainname):
+                self.log.debug("'%s': duplicated" % mainname)
+                continue
+
+            #print fontnames
+            font = FcFont(fontnames, partial=partial)
+            self.fonts[mainname] = font
+
+    def build_fonts_family(self, family_type):
+        # Create a sorted list matching a generic family
+        # Use --sort to have only fonts completing unicode range
+        font_family = []
+        self.fonts_family[family_type] = font_family
+        d = execute(["fc-match", "--sort", family_type, "family"])
+        fonts = d.strip().split("\n")
+        for f in fonts:
+            font = self.fonts.get(f)
+            if not(font in font_family):
+                font_family.append(font)
+        #print family_type
+        #print font_family
+
diff --git a/lib/dbtexmf/dblatex/xetex/fontspec.py b/lib/dbtexmf/dblatex/xetex/fontspec.py
index cd93cde..767d003 100644
--- a/lib/dbtexmf/dblatex/xetex/fontspec.py
+++ b/lib/dbtexmf/dblatex/xetex/fontspec.py
@@ -49,7 +49,7 @@ class UnicodeInterval:
         if m:
             return int(m.group(1), 16)
         else:
-            raise RuntimeError, 'Not a unicode codepoint: ' + codepoint
+            raise RuntimeError('Not a unicode codepoint: ' + codepoint)
 
     def from_char(self, char):
         """Interval for a single character"""
@@ -167,7 +167,7 @@ class FontSpec:
                     intervals.append(
                         UnicodeInterval().from_codepoint(m.group(1)))
                 else:
-                    raise RuntimeError, 'Unable to parse range: "' + range + '"'
+                    raise RuntimeError('Unable to parse range: "' + range + '"')
         return intervals
 
     def _parse_transitions(self, node, transition_type):
diff --git a/lib/dbtexmf/dblatex/xetex/fontspec.py.enable-python3 b/lib/dbtexmf/dblatex/xetex/fontspec.py.enable-python3
new file mode 100644
index 0000000..cd93cde
--- /dev/null
+++ b/lib/dbtexmf/dblatex/xetex/fontspec.py.enable-python3
@@ -0,0 +1,296 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Time-stamp: <2008-06-23 22:21:26 ah>
+
+"""
+Provide an encoder for a font specification configuration: the encoder is fed
+with Unicode characters one by one and determines the needed font switches
+between the preceding and the current character.
+"""
+
+import re
+import xml.dom.minidom
+import logging
+
+
+def _indent(string, width=2):
+    """Indent the  lines by  blank characters."""
+    istr = ' ' * width
+    s = istr + istr.join(string.splitlines(1))
+    return s
+
+class UnicodeInterval:
+    """Unicode codepoint interval, including all codepoints between its minimum
+    and maximum boundary.
+    For any Unicode codepoint it can be queried if it belongs to the interval.
+    """
+
+    # Internal data attributes:
+    # _min_boundary: Minimum boundary of the codepoint interval (ordinal)
+    # _max_boundary: Maximum boundary of the codepoint interval (ordinal)
+
+    _re_codepoint = re.compile(r'^[Uu]\+?([0-9A-Fa-f]+)$')
+
+    def __init__(self):
+        self._min_boundary = 0
+        self._max_boundary = 0
+
+    def __str__(self):
+        """Dump the instance's data attributes."""
+        string = '[' + str(self._min_boundary)
+        if self._max_boundary != self._min_boundary:
+            string += ',' + str(self._max_boundary)
+        string += ']'
+        return string
+
+    def _unicode_to_ordinal(self, codepoint):
+        """Return the ordinal of the specified codepoint."""
+        m = self._re_codepoint.match(codepoint)
+        if m:
+            return int(m.group(1), 16)
+        else:
+            raise RuntimeError, 'Not a unicode codepoint: ' + codepoint
+
+    def from_char(self, char):
+        """Interval for a single character"""
+        self._min_boundary = ord(char)
+        self._max_boundary = self._min_boundary
+        return self
+
+    def from_codepoint(self, codepoint):
+        """Interval for a single character defined as unicode string."""
+        self._min_boundary = self._unicode_to_ordinal(codepoint)
+        self._max_boundary = self._min_boundary
+        return self
+
+    def from_interval(self, codepoint1, codepoint2):
+        """Interval from a unicode range."""
+        self._min_boundary = self._unicode_to_ordinal(codepoint1)
+        self._max_boundary = self._unicode_to_ordinal(codepoint2)
+        if self._min_boundary > self._max_boundary:
+            self._min_boundary, self._max_boundary = \
+                self._max_boundary, self._min_boundary
+        return self
+
+    def contains(self, char):
+        """
+        Determine whether the specified character is contained in this
+        instance's interval.
+        """
+        #print "%d in [%d - %d]?" % (ord(char), self._min_boundary,self._max_boundary)
+        return (ord(char) >= self._min_boundary
+                and ord(char) <= self._max_boundary)
+
+
+class FontSpec:
+    """
+    Font specification, consisting of one or several unicode character
+    intervals and of fonts to select for those characters. The object
+    fully defines the fonts to switch to.
+    """
+
+    # Internal data attributes:
+    # _intervals: UnicodeInterval list
+
+    transition_types = ['enter', 'inter', 'exit']
+    _re_interval = re.compile(r'^([Uu][0-9A-Fa-f]+)-([Uu][0-9A-Fa-f]+)$')
+    _re_codepoint = re.compile(r'^([Uu][0-9A-Fa-f]+)$')
+
+    def __init__(self, intervals=None, subfont_first=False):
+        """Create a font specification from the specified codepoint intervals.
+        The other data attributes will be set by the caller later.
+        """
+        self.type = ""
+        self.id = None
+        self.refmode = None
+        self.transitions = {}
+        self.fontspecs = [self]
+        self.subfont_first = subfont_first
+        self._ignored = []
+        self.log = logging.getLogger("dblatex")
+
+        for type in self.transition_types:
+            self.transitions[type] = {}
+
+        if not(intervals):
+            self._intervals = []
+            return
+
+        try:
+            self._intervals = list(intervals)
+        except TypeError:
+            self._intervals = [intervals]
+
+    def fromnode(self, node):
+        range = node.getAttribute('range')
+        charset = node.getAttribute('charset')
+        id = node.getAttribute('id')
+        refmode = node.getAttribute('refmode')
+        self.type = node.getAttribute('type')
+
+        if (range):
+            self._intervals = self._parse_range(range)
+        elif (charset):
+            for char in charset:
+                self.add_char(char)
+
+        # Unique identifier
+        if (id):
+            self.id = id
+        if (refmode):
+            self.refmode = refmode
+
+        for transition_type in self.transition_types:
+            self._parse_transitions(node, transition_type)
+
+    def mainfont(self):
+        # Try to return the most representative font of this spec
+        return (self.transitions["enter"].get("main") or 
+                self.transitions["enter"].get("sans"))
+
+    def _parse_range(self, range):
+        """Parse the specified /fonts/fontspec@range attribute to a
+        UnicodeInterval list.
+        """
+        #print range
+        intervals = []
+        chunks = range.split()
+        for chunk in chunks:
+            m = self._re_interval.match(chunk)
+            #print match
+            if m:
+                urange = UnicodeInterval().from_interval(m.group(1), m.group(2))
+                intervals.append(urange)
+            else:
+                m = self._re_codepoint.match(chunk)
+                if m:
+                    intervals.append(
+                        UnicodeInterval().from_codepoint(m.group(1)))
+                else:
+                    raise RuntimeError, 'Unable to parse range: "' + range + '"'
+        return intervals
+
+    def _parse_transitions(self, node, transition_type):
+        """Evaluate the font elements of the specified fontspec element for the
+        specified transition type (enter, inter or exit).
+        """
+        fontlist = self.transitions[transition_type]
+
+        for dom_transition in node.getElementsByTagName(transition_type):
+            for dom_font in dom_transition.getElementsByTagName('font'):
+                font = ''
+                types = dom_font.getAttribute("type")
+                types = types.split()
+                for dom_child in dom_font.childNodes:
+                    if dom_child.nodeType == dom_child.TEXT_NODE:
+                        font += dom_child.nodeValue
+                if (font):
+                    for type in types:
+                        fontlist[type] = font
+
+    def _switch_to(self, fonts):
+        """
+        Return a string with the XeTeX font switching commands for the
+        specified font types.
+        """
+        s = ''
+        for type, font in fonts.items():
+            s += '\switch%sfont{%s}' % (type, font)
+        if s:
+            s = r"\savefamily" + s + r"\loadfamily{}"
+        return s
+
+    def enter(self):
+        self.log.debug("enter in %s" % self.id)
+        s = self._switch_to(self.transitions["enter"])
+        return s
+
+    def exit(self):
+        self.log.debug("exit from %s" % self.id)
+        s = self._switch_to(self.transitions["exit"])
+        return s
+
+    def interchar(self):
+        s = self._switch_to(self.transitions["inter"])
+        return s
+
+    def __str__(self):
+        """Dump the instance's data attributes."""
+        string = 'FontSpec:'
+        string += '\n  Id: %s' % self.id
+        string += '\n  Refmode: %s' % self.refmode
+        string += '\n  subFirst: %s' % self.subfont_first
+        for interval in self._intervals:
+            string += '\n' + _indent(str(interval))
+        return string
+
+    def add_subfont(self, fontspec):
+        self.log.debug("%s -> %s" % (self.id, fontspec.id))
+        if self.subfont_first:
+            self.fontspecs.insert(-1, fontspec)
+        else:
+            self.fontspecs.append(fontspec)
+
+    def add_char(self, char):
+        self._intervals.append(UnicodeInterval().from_char(char))
+
+    def add_uranges(self, ranges, depth=1):
+        # Recursively extend the supported character range
+        if depth:
+            for f in self.fontspecs:
+                if f != self:
+                    f.add_uranges(ranges)
+        self._intervals.extend(ranges)
+
+    def add_ignored(self, ranges, depth=1):
+        if depth:
+            for f in self.fontspecs:
+                if f != self:
+                    f.add_ignored(ranges)
+        self._ignored.extend(ranges)
+
+    def get_uranges(self):
+        return self._intervals
+
+    def contains(self, char):
+        #print "%s: %s" % (self.id, self._intervals)
+        for interval in self._intervals:
+            if interval.contains(char):
+                return True
+        else:
+            return False
+
+    def isignored(self, char):
+        self.log.debug("%s: %s" % (self.id, [ str(a) for a in self._ignored ]))
+        for interval in self._ignored:
+            if interval.contains(char):
+                return True
+        else:
+            return False
+
+    def _loghas(self, id, char):
+        try:
+            self.log.debug("%s has '%s'" % (id, str(char)))
+        except:
+            self.log.debug("%s has '%s'" % (id, ord(char)))
+
+    def match(self, char, excluded=None):
+        """Determine whether the font specification matches the specified
+        object, thereby considering refmode.
+        """
+        fontspec = None
+        self.log.debug( "Lookup in %s" % self.id)
+        if self.isignored(char):
+            self._loghas(self.id, char)
+            return self
+
+        for fontspec in self.fontspecs:
+            # Don't waste time in scanning excluded nodes
+            if fontspec == excluded:
+                continue
+            #print " Look in %s" % fontspec.id
+            if fontspec.contains(char):
+                self._loghas(fontspec.id, char)
+                return fontspec
+        return None
+
diff --git a/lib/dbtexmf/dblatex/xetex/fsconfig.py b/lib/dbtexmf/dblatex/xetex/fsconfig.py
index f62e51e..06c9adf 100644
--- a/lib/dbtexmf/dblatex/xetex/fsconfig.py
+++ b/lib/dbtexmf/dblatex/xetex/fsconfig.py
@@ -12,8 +12,8 @@ import re
 import xml.dom.minidom
 import logging
 
-from fcfallback import FcFallbackFontSpec, DefaultFontSpec
-from fontspec import FontSpec, _indent
+from dbtexmf.dblatex.xetex.fcfallback import FcFallbackFontSpec, DefaultFontSpec
+from dbtexmf.dblatex.xetex.fontspec import FontSpec, _indent
 
 
 class FontSpecConfig:
diff --git a/lib/dbtexmf/dblatex/xetex/fsconfig.py.enable-python3 b/lib/dbtexmf/dblatex/xetex/fsconfig.py.enable-python3
new file mode 100644
index 0000000..f62e51e
--- /dev/null
+++ b/lib/dbtexmf/dblatex/xetex/fsconfig.py.enable-python3
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Time-stamp: <2008-06-23 22:21:26 ah>
+
+"""
+Provide an encoder for a font specification configuration: the encoder is fed
+with Unicode characters one by one and determines the needed font switches
+between the preceding and the current character.
+"""
+
+import re
+import xml.dom.minidom
+import logging
+
+from fcfallback import FcFallbackFontSpec, DefaultFontSpec
+from fontspec import FontSpec, _indent
+
+
+class FontSpecConfig:
+    """
+    This object parses an XML fontspec configuration file and build the
+    resulting fontspec tree, the root fontspec being the default font
+    to apply.
+    
+    The fontspec configuration file defines the fonts to apply in order
+    of precedence (and for some Unicode ranges) and the font levels (or
+    subfonts) thanks to the 'refmode' attribute that links a font to its
+    parent.
+    """
+
+    def __init__(self, conf_file):
+        """Create a font specification configuration from the specified file
+        (file name or file-like object).
+        """
+        self.log = logging.getLogger("dblatex")
+        self.fontspecs = []
+        self.fontnames = {}
+
+        dom_document = xml.dom.minidom.parse(conf_file)
+        for dom_fontspec in dom_document.getElementsByTagName('fontspec'):
+            default = dom_fontspec.getAttribute('default')
+            if default:
+                self.log.debug("has default")
+                fallback = dom_fontspec.getAttribute('fallback')
+                if fallback == "fontconfig":
+                    self.default_fontspec = FcFallbackFontSpec()
+                else:
+                    self.default_fontspec = DefaultFontSpec()
+                fontspec = self.default_fontspec
+            else:
+                fontspec = FontSpec()
+
+            fontspec.fromnode(dom_fontspec)
+
+            if fontspec != self.default_fontspec:
+                self.fontspecs.append(fontspec)
+            if fontspec.id:
+                self.fontnames[fontspec.id] = fontspec
+
+        dom_document.unlink()
+
+        if not(self.default_fontspec):
+            self.default_fontspec = DefaultFontSpec()
+
+        self.build_tree()
+
+    def build_tree(self):
+        """
+        Build the fontspec tree, the root node being the default font
+        to apply. The fontspecs without a refmode (i.e. not being
+        explicitely a subfont) are direct children of the default font.
+        """
+        to_ignore = []
+        for fontspec in self.fontspecs:
+            if fontspec.type == "ignore":
+                to_ignore.append(fontspec)
+                continue
+
+            if not(fontspec.refmode):
+                f = self.default_fontspec
+            else:
+                f = self.fontnames.get(fontspec.refmode, None)
+
+            if (f):
+                f.add_subfont(fontspec)
+            else:
+                raise ValueError("wrong fontspec tree")
+
+        # Insert the characters to ignore in fontspecs
+        for f in to_ignore:
+            self.default_fontspec.add_ignored(f.get_uranges())
+
+    def __str__(self):
+        """Dump the instance's data attributes."""
+        string = 'FontSpecConfig:'
+        string += '\n  Fontspec list:'
+        for fontspec in self.fontspecs:
+            string += '\n' + _indent(str(fontspec), 4)
+        return string
+
diff --git a/lib/dbtexmf/dblatex/xetex/fsencoder.py b/lib/dbtexmf/dblatex/xetex/fsencoder.py
index 9960bbe..6175adf 100644
--- a/lib/dbtexmf/dblatex/xetex/fsencoder.py
+++ b/lib/dbtexmf/dblatex/xetex/fsencoder.py
@@ -7,12 +7,14 @@ Provide an encoder for a font specification configuration: the encoder is fed
 with Unicode characters one by one and determines the needed font switches
 between the preceding and the current character.
 """
+from __future__ import print_function
+
 import sys
 import re
 import xml.dom.minidom
 
-from fontspec import UnicodeInterval
-from fsconfig import FontSpecConfig
+from dbtexmf.dblatex.xetex.fontspec import UnicodeInterval
+from dbtexmf.dblatex.xetex.fsconfig import FontSpecConfig
 
 
 class FontSpecEncoder:
@@ -68,7 +70,7 @@ class FontSpecEncoder:
         """
         fontspec = self._cur_fontspec or self._conf.default_fontspec
 
-        print >>sys.stderr, "Current:", fontspec.id
+        print("Current:", fontspec.id, file=sys.stderr)
         fontspec = fontspec.match(char)
         while not(fontspec):
             leaf = self._ref_stack.pop()
diff --git a/lib/dbtexmf/dblatex/xetex/fsencoder.py.enable-python3 b/lib/dbtexmf/dblatex/xetex/fsencoder.py.enable-python3
new file mode 100644
index 0000000..9960bbe
--- /dev/null
+++ b/lib/dbtexmf/dblatex/xetex/fsencoder.py.enable-python3
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Time-stamp: <2008-06-23 22:21:26 ah>
+
+"""
+Provide an encoder for a font specification configuration: the encoder is fed
+with Unicode characters one by one and determines the needed font switches
+between the preceding and the current character.
+"""
+import sys
+import re
+import xml.dom.minidom
+
+from fontspec import UnicodeInterval
+from fsconfig import FontSpecConfig
+
+
+class FontSpecEncoder:
+    """
+    Encoder working with font specifications: it is fed
+    with Unicode characters one by one and it inserts the needed font switches
+    between the preceding and the current character.
+    """
+
+    def __init__(self, configuration):
+        """
+        Create a font specification encoder from the specified configuration
+        file (file name or file-like object).
+        """
+        self._conf = FontSpecConfig(configuration)
+        self._cur_fontspec = None
+        self._ref_stack = [self._conf.default_fontspec]
+
+    def reset(self):
+        # Restart from the default fontspec to avoid a useless 'enter' from None
+        self._cur_fontspec = self._conf.default_fontspec
+        self._ref_stack = [self._conf.default_fontspec]
+
+    def _switch_to(self, fontspec):
+        """
+        Insert the transition string, according to the newly selected
+        fontspec and the previously selected fontspec
+        """
+        s = ""
+        # If the font hasn't changed, just insert optional inter-char material
+        if fontspec == self._cur_fontspec:
+            return fontspec.interchar()
+
+        # A new font is selected, so exit from current font stream
+        if self._cur_fontspec:
+            s += self._cur_fontspec.exit()
+
+        # Enter into the current font stream
+        self._cur_fontspec = fontspec
+        s += fontspec.enter()
+        return s
+
+    def _encode(self, char):
+        """
+        Select the fontspec matching the specified , and switch to
+        this font as current font.
+
+        The principle to find out the fontspec is:
+        - to find from the current font level a matching font
+          (the current font leaf or the direct font children)
+        - if no font is found try with the parent font, and so on,
+          up to the default root font (that must exist).
+        """
+        fontspec = self._cur_fontspec or self._conf.default_fontspec
+
+        print >>sys.stderr, "Current:", fontspec.id
+        fontspec = fontspec.match(char)
+        while not(fontspec):
+            leaf = self._ref_stack.pop()
+            fontspec = self._ref_stack[-1].match(char, excluded=leaf)
+
+        if fontspec != self._ref_stack[-1]:
+            self._ref_stack.append(fontspec)
+
+        return self._switch_to(fontspec)
+
+    def ignorechars(self, charset):
+        "Characters to ignore in font selection (maintain the current one)"
+        intervals = [ UnicodeInterval().from_char(c) for c in charset ]
+        self._conf.default_fontspec.add_ignored(intervals)
+
+    def encode(self, char):
+        """
+        Return a string consisting of the specified character prepended by
+        all necessary font switching commands.
+        """
+        return (self._encode(char), char)
+
+    def stop(self):
+        """
+        Cleanly exit from the current fontspec
+        """
+        if self._cur_fontspec:
+            s = self._cur_fontspec.exit()
+            self._cur_fontspec = None
+            return s
+
diff --git a/lib/dbtexmf/xslt/4xslt.py b/lib/dbtexmf/xslt/4xslt.py
index 4af729f..3746fce 100644
--- a/lib/dbtexmf/xslt/4xslt.py
+++ b/lib/dbtexmf/xslt/4xslt.py
@@ -3,6 +3,7 @@
 #
 import sys
 import os
+from io import open
 
 from Ft.Xml.Xslt import Processor
 from Ft.Lib.Uri import OsPathToUri
@@ -49,7 +50,7 @@ class FourXslt:
         uri =  OsPathToUri(xslfile)
         xslt = factory.fromUri(uri,  processIncludes=False)
 
-        o = open(outfile, "w")
+        o = open(outfile, "wb")
         proc.appendStylesheet(xslt)
         if params:
             rc = proc.run(xml, outputStream=o, topLevelParams=params)
diff --git a/lib/dbtexmf/xslt/4xslt.py.enable-python3 b/lib/dbtexmf/xslt/4xslt.py.enable-python3
new file mode 100644
index 0000000..4af729f
--- /dev/null
+++ b/lib/dbtexmf/xslt/4xslt.py.enable-python3
@@ -0,0 +1,69 @@
+#
+# 4Suite plugin
+#
+import sys
+import os
+
+from Ft.Xml.Xslt import Processor
+from Ft.Lib.Uri import OsPathToUri
+from Ft.Xml import Catalog, InputSource
+from Ft.Xml.Domlette import NonvalidatingReader
+
+class FourXslt:
+    def __init__(self):
+        self.verbose = 0
+        self.use_catalogs = 1
+        # Do it only once
+        self.factory = self.get_factory()
+
+    def get_deplist(self):
+        return []
+
+    def get_factory(self):
+        system_catalogs = ['/etc/xml/catalog',
+                           '/usr/local/share/xml/catalog']
+
+        system_catalogs = [p for p in system_catalogs if os.path.exists(p)]
+
+        if system_catalogs:
+            xml_catalog_files = os.getenv("XML_CATALOG_FILES")
+            if xml_catalog_files:
+                xml_catalog_files += " " + " ".join(system_catalogs)
+            else:
+                xml_catalog_files = " ".join(system_catalogs)
+            os.environ["XML_CATALOG_FILES"] = xml_catalog_files
+
+        # The factory now takes into account the new XSML_CATALOG_FILES
+        factory = InputSource.InputSourceFactory(catalog=Catalog.GetDefaultCatalog())
+        return factory
+
+    def run(self, xslfile, xmlfile, outfile, opts=None, params=None):
+        proc = Processor.Processor()
+        proc.msgPrefix = ""
+        proc.msgSuffix = "\n"
+        factory = self.factory
+
+        uri = OsPathToUri(xmlfile)
+        xml = factory.fromUri(uri)
+
+        uri =  OsPathToUri(xslfile)
+        xslt = factory.fromUri(uri,  processIncludes=False)
+
+        o = open(outfile, "w")
+        proc.appendStylesheet(xslt)
+        if params:
+            rc = proc.run(xml, outputStream=o, topLevelParams=params)
+        else:
+            rc = proc.run(xml, outputStream=o)
+        o.close()
+
+
+class Xslt(FourXslt):
+    "Plugin Class to load"
+
+
+def main():
+    run(sys.argv[1], sys.argv[2], sys.argv[3])
+
+if __name__ == "__main__":
+    main()
diff --git a/lib/dbtexmf/xslt/xsltproc.py b/lib/dbtexmf/xslt/xsltproc.py
index 38f1d2b..db72a87 100644
--- a/lib/dbtexmf/xslt/xsltproc.py
+++ b/lib/dbtexmf/xslt/xsltproc.py
@@ -2,6 +2,7 @@
 # Basic wrapper for xsltproc. Maybe we should directly use the lixslt Python
 # API.
 #
+import sys
 import os
 import logging
 import re
@@ -43,6 +44,8 @@ class XsltProc:
         # check that with help output the option is there
         p = Popen(["xsltproc"], stdout=PIPE)
         data = p.communicate()[0]
+        if isinstance(data, bytes):
+            data = data.decode(sys.getdefaultencoding())
         m = re.search("--xincludestyle", data, re.M)
         if not(m):
             return False
diff --git a/lib/dbtexmf/xslt/xsltproc.py.enable-python3 b/lib/dbtexmf/xslt/xsltproc.py.enable-python3
new file mode 100644
index 0000000..38f1d2b
--- /dev/null
+++ b/lib/dbtexmf/xslt/xsltproc.py.enable-python3
@@ -0,0 +1,54 @@
+#
+# Basic wrapper for xsltproc. Maybe we should directly use the lixslt Python
+# API.
+#
+import os
+import logging
+import re
+from subprocess import call, Popen, PIPE
+
+class XsltProc:
+    def __init__(self):
+        self.catalogs = os.getenv("SGML_CATALOG_FILES")
+        self.use_catalogs = 1
+        self.log = logging.getLogger("dblatex")
+        self.run_opts = ["--xinclude"]
+        # If --xincludestyle is supported we *must* use it to support external
+        # listings (see mklistings.xsl and pals)
+        if self._has_xincludestyle():
+            self.run_opts.append("--xincludestyle")
+
+    def get_deplist(self):
+        return ["xsltproc"]
+
+    def run(self, xslfile, xmlfile, outfile, opts=None, params=None):
+        cmd = ["xsltproc", "-o", os.path.basename(outfile)] + self.run_opts
+        if self.use_catalogs and self.catalogs:
+            cmd.append("--catalogs")
+        if params:
+            for param, value in params.items():
+                cmd += ["--param", param, "'%s'" % value]
+        if opts:
+            cmd += opts
+        cmd += [xslfile, xmlfile]
+        self.system(cmd)
+
+    def system(self, cmd):
+        self.log.debug(" ".join(cmd))
+        rc = call(cmd)
+        if rc != 0:
+            raise ValueError("xsltproc failed")
+
+    def _has_xincludestyle(self):
+        # check that with help output the option is there
+        p = Popen(["xsltproc"], stdout=PIPE)
+        data = p.communicate()[0]
+        m = re.search("--xincludestyle", data, re.M)
+        if not(m):
+            return False
+        else:
+            return True
+
+
+class Xslt(XsltProc):
+    "Plugin Class to load"
diff --git a/setup.py b/setup.py
index 5af4cfb..379323b 100644
--- a/setup.py
+++ b/setup.py
@@ -3,11 +3,14 @@
 #
 # dblatex python setup script - See the COPYRIGHT
 #
+from __future__ import print_function
+
 import os
 import sys
 import re
 import glob
 import subprocess
+from io import open
 
 try:
     from setuptools import setup
@@ -83,7 +86,7 @@ os.environ["SGML_CATALOG_FILES"] = cat
         self._catalogs = install.catalogs
         self._style = install.style
         self._use_py_path = install.use_python_path
-        print self._package_base
+        print(self._package_base)
 
         # Build the command line script
         self.build_script()
@@ -162,8 +165,8 @@ os.environ["SGML_CATALOG_FILES"] = cat
         script = self.SHELL_SCRIPT % script_args
         script_name = os.path.basename(script_name)
         outfile = os.path.join(self.build_dir, script_name)
-        fd = os.open(outfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC, 0755)
-        os.write(fd, script)
+        fd = os.open(outfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC, 0o755)
+        os.write(fd, script.encode('ascii'))
         os.close(fd)
 
 
@@ -225,8 +228,10 @@ def kpsewhich(tex_file):
         close_fds = True
     p = Popen("kpsewhich %s" % tex_file, shell=True,
               stdin=PIPE, stdout=PIPE, close_fds=close_fds)
-    out = "".join(p.stdout.readlines()).strip()
-    return out
+    data = p.communicate()[0]
+    if isinstance(data, bytes):
+        data = data.decode(sys.getdefaultencoding())
+    return data.strip()
 
 
 class Sdist(sdist):
@@ -260,19 +265,19 @@ class Install(install):
         # First, check non critical graphic tools
         found, missed = find_programs(("epstopdf", "convert", "fig2dev"))
         for util in found:
-            print "+checking %s... yes" % util
+            print("+checking %s... yes" % util)
         for util in missed:
-            print "+checking %s... no" % util
+            print("+checking %s... no" % util)
         if missed:
-            print("warning: not found: %s" % ", ".join(missed))
+            print(("warning: not found: %s" % ", ".join(missed)))
 
         # Now, be serious
         found, missed = find_programs(("latex", "makeindex",
                                        "pdflatex", "kpsewhich"))
         for util in found:
-            print "+checking %s... yes" % util
+            print("+checking %s... yes" % util)
         for util in missed:
-            print "+checking %s... no" % util
+            print("+checking %s... no" % util)
         if missed:
             raise OSError("not found: %s" % ", ".join(missed))
 
@@ -292,21 +297,21 @@ class Install(install):
         for (mod, deplist) in deplists:
             if not(deplist):
                 xslt_found.append(mod)
-                print "+checking XSLT %s... yes" % mod
+                print("+checking XSLT %s... yes" % mod)
                 continue
             found, missed = find_programs(deplist)
             if missed:
                 xslt_missed.append(mod)
-                print "+checking XSLT %s... no (missing %s)" % \
-                      (mod, ", ".join(missed))
+                print("+checking XSLT %s... no (missing %s)" % \
+                      (mod, ", ".join(missed)))
             else:
                 xslt_found.append(mod)
-                print "+checking XSLT %s... yes" % mod
+                print("+checking XSLT %s... yes" % mod)
 
         if not(xslt_found):
             raise OSError("XSLT not installed: %s" % ", ".join(xslt_missed))
         elif xslt_missed:
-            print "warning: XSLT not found: %s" % ", ".join(xslt_missed)
+            print("warning: XSLT not found: %s" % ", ".join(xslt_missed))
 
     def check_latex_dependencies(self):
         # Find the Latex files from the package
@@ -322,7 +327,7 @@ class Install(install):
         used_stys = []
         re_sty = re.compile(r"\\usepackage\s*\[?.*\]?{(\w+)}")
         for sty in stys:
-            f = open(sty)
+            f = open(sty, "rt", encoding="latin-1")
             for line in f:
                 line = line.split("%")[0]
                 m = re_sty.search(line)
@@ -353,7 +358,7 @@ class Install(install):
             if sty in own_stys:
                 status += "found in package"
                 found_stys.append(sty)
-                print status
+                print(status)
                 continue
             stypath = kpsewhich("%s.sty" % sty)
             if stypath:
@@ -362,7 +367,7 @@ class Install(install):
             else:
                 status += "no"
                 mis_stys.append(sty)
-            print status
+            print(status)
             
         if mis_stys:
             raise OSError("not found: %s" % ", ".join(mis_stys))
@@ -378,8 +383,8 @@ class Install(install):
                 self.check_xslt_dependencies()
                 self.check_util_dependencies()
                 self.check_latex_dependencies()
-            except Exception, e:
-                print >>sys.stderr, "Error: %s" % e
+            except Exception as e:
+                print("Error: %s" % e, file=sys.stderr)
                 sys.exit(1)
 
         if db: db.adapt_paths()
@@ -450,17 +455,17 @@ class InstallData(install_data):
             return
 
         # Grab the value from package version
-        d = open(hyper_sty).read()
-        m = re.search("\\ProvidesPackage{hyperref}\s+\[(\d+)", d, re.M)
+        d = open(hyper_sty, "rt", encoding="latin-1").read()
+        m = re.search(r"\\ProvidesPackage{hyperref}\s+\[(\d+)", d, re.M)
         if not(m):
             return
         year = m.group(1)
 
         # Patch the parameter with the found value
-        p = open(param_file).read()
+        p = open(param_file, "rt", encoding="latin-1").read()
         p2 = re.sub('name="texlive.version">.*<',
                     'name="texlive.version">%s<' % year, p)
-        f = open(param_file, "w")
+        f = open(param_file, "wt", encoding="latin-1")
         f.write(p2)
         f.close()
 
diff --git a/setup.py.enable-python3 b/setup.py.enable-python3
new file mode 100644
index 0000000..5af4cfb
--- /dev/null
+++ b/setup.py.enable-python3
@@ -0,0 +1,521 @@
+#!/usr/bin/env python
+# -*- coding: ISO-8859-1 -*-
+#
+# dblatex python setup script - See the COPYRIGHT
+#
+import os
+import sys
+import re
+import glob
+import subprocess
+
+try:
+    from setuptools import setup
+    from setuptools.command.install import install
+except ImportError:
+    from distutils.core import setup
+    from distutils.command.install import install
+
+from distutils.command.build import build
+from distutils.command.build_scripts import build_scripts
+from distutils.command.install_data import install_data
+from distutils.command.sdist import sdist
+from distutils import log
+from subprocess import Popen, PIPE
+sys.path.append("lib")
+from contrib.debian.installer import DebianInstaller
+
+#
+# Build the command line script
+#
+class BuildScripts(build_scripts):
+
+    SHELL_SCRIPT = """#!%(env_executable)s%(env_args)s%(py_executable)s
+import sys
+import os
+
+package_base = %(package_base)s
+
+%(lib_path)s
+%(catalogs)s
+%(style_set)s
+from %(package_path)s import %(package)s
+%(package)s.main(base=package_base)
+"""
+
+    CATALOGS = """cat = os.environ.get("SGML_CATALOG_FILES")
+if cat:
+    cat += ":%s"
+else:
+    cat = "%s"
+os.environ["SGML_CATALOG_FILES"] = cat
+"""
+
+    def run(self):
+        """
+        Create the proper script for the current platform.
+        """
+        if not self.scripts:
+            return
+
+        # The script can only work with package data
+        self.data_files = self.distribution.data_files
+        if not(self.data_files):
+            return
+
+        if self.dry_run:
+            return
+
+        # Ensure the destination directory exists
+        self.mkpath(self.build_dir)
+
+        # Data useful for building the script
+        install = self.distribution.get_command_obj("install")
+        if not(install.install_data):
+            return
+
+        self._install_lib = os.path.normpath(install.install_lib)
+        self._root = install.root
+        if self._root:
+            self._root = os.path.normpath(self._root)
+        self._package_base = os.path.join(install.install_data,
+                                          self.data_files[0][0])
+        self._catalogs = install.catalogs
+        self._style = install.style
+        self._use_py_path = install.use_python_path
+        print self._package_base
+
+        # Build the command line script
+        self.build_script()
+
+    def _strip_root(self, *paths):
+        if not(self._root):
+            return paths
+
+        newpaths = []
+        for path in paths:
+            if path.startswith(self._root):
+                newpaths.append(path[len(self._root):])
+            else:
+                newpaths.append(path)
+        return newpaths
+
+    def build_script(self):
+        script_name = self.scripts[0]
+
+        # prepare args for the bang path at the top of the script
+        ENV_BIN = '/usr/bin/env'
+        env_args = ''
+        if self._use_py_path:
+            env_exec = ''
+            py_exec = sys.executable
+        elif os.name == 'posix':
+            # Some Solaris platforms may not have an 'env' binary.
+            # If /usr/bin/env exists, use '#!/usr/bin/env python'
+            # otherwise, use '#!' + sys.executable
+            env_exec = os.path.isfile(ENV_BIN) and \
+                os.access(ENV_BIN, os.X_OK) and ENV_BIN or ''
+            py_exec = env_exec and 'python' or sys.executable
+        else:
+            # shouldn't matter on non-POSIX; we'll just use defaults
+            env_exec = ENV_BIN
+            py_exec = 'python'
+
+        # Retrieve actual installation paths
+        lib_path, package_base = self._strip_root(self._install_lib,
+                                                  self._package_base)
+
+        # Just help for non standard installation paths
+        if lib_path in sys.path:
+            lib_path = ""
+        else:
+            lib_path = "sys.path.append(r\"%s\")" % lib_path
+
+        # Things to adapt when building an egg
+        if "/egg" in lib_path:
+            lib_path = ""
+            package_base = 'os.path.abspath(os.path.join(os.path.dirname('\
+                           '__file__), "..", "..", "share", "dblatex"))'
+        else:
+            package_base = 'r"%s"' % (package_base)
+
+        if self._catalogs:
+            catalogs = self.CATALOGS % (self._catalogs, self._catalogs)
+        else:
+            catalogs = ""
+
+        if self._style:
+            style_set = "sys.argv.insert(1, '-T%s')" % self._style
+        else:
+            style_set = ""
+
+        script_args = { 'env_executable': env_exec,
+                        'env_args': env_exec and (' %s' % env_args) or '',
+                        'py_executable': py_exec,
+                        'lib_path': lib_path,
+                        'style_set': style_set,
+                        'package': "dblatex",
+                        'package_path': "dbtexmf.dblatex",
+                        'catalogs': catalogs,
+                        'package_base': package_base }
+
+        script = self.SHELL_SCRIPT % script_args
+        script_name = os.path.basename(script_name)
+        outfile = os.path.join(self.build_dir, script_name)
+        fd = os.open(outfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC, 0755)
+        os.write(fd, script)
+        os.close(fd)
+
+
+class Build(build):
+    """
+    Build the documentation if missing or required to rebuild
+    """
+    user_options = build.user_options + \
+                 [('docbook-xsl=', None,
+                   'DocBook Project Stylesheet base directory (build_doc)')]
+
+    def initialize_options(self):
+        build.initialize_options(self)
+        self.docbook_xsl = None
+
+    def run(self):
+        # Do the default tasks
+        build.run(self)
+        # And build the doc
+        self.build_doc()
+
+    def build_doc(self):
+        log.info("running build_doc")
+        htmldir = os.path.join("docs", "xhtml")
+        pdfdocs = glob.glob(os.path.join("docs", "[mr]*.pdf"))
+        manpage = os.path.join("docs", "manpage", "dblatex.1.gz")
+
+        # Lazy check to avoid a rebuild for nothing
+        if (not(self.force) and os.path.exists(htmldir) and len(pdfdocs) >= 2
+            and os.path.exists(manpage)):
+            return
+
+        # Assumes that make is the GNU make
+        cmd = ["make", "-C", "docs", "VERSION=%s" % (get_version())]
+        if self.docbook_xsl:
+            cmd.append("XSLDBK=%s" % os.path.abspath(self.docbook_xsl))
+
+        subprocess.call(cmd)
+
+
+def find_programs(utils):
+    sys.path.append("lib")
+    from contrib.which import which
+    util_paths = {}
+    missed = []
+    for util in utils:
+        try:
+            path = which.which(util)
+            util_paths[util] = path
+        except which.WhichError:
+            missed.append(util)
+    sys.path.remove("lib")
+    return (util_paths, missed)
+
+def kpsewhich(tex_file):
+    if os.name == "nt":
+        close_fds = False
+    else:
+        close_fds = True
+    p = Popen("kpsewhich %s" % tex_file, shell=True,
+              stdin=PIPE, stdout=PIPE, close_fds=close_fds)
+    out = "".join(p.stdout.readlines()).strip()
+    return out
+
+
+class Sdist(sdist):
+    """
+    Make the source package, and remove the .pyc files
+    """
+    def prune_file_list(self):
+        sdist.prune_file_list(self)
+        self.filelist.exclude_pattern(r'.*.pyc', is_regex=1)
+
+
+class Install(install):
+
+    user_options = install.user_options + \
+                   [('catalogs=', None, 'default SGML catalogs'),
+                    ('nodeps', None, 'don\'t check the dependencies'),
+                    ('style=', None, 'default style to use'),
+                    ('use-python-path', None, 'don\'t use env to locate python')]
+
+    def initialize_options(self):
+        install.initialize_options(self)
+        self.catalogs = None
+        self.nodeps = None
+        self.style = None
+        self.use_python_path = None
+        # Prevents from undefined 'install_layout' attribute
+        if not(getattr(self, "install_layout", None)):
+            self.install_layout = None
+
+    def check_util_dependencies(self):
+        # First, check non critical graphic tools
+        found, missed = find_programs(("epstopdf", "convert", "fig2dev"))
+        for util in found:
+            print "+checking %s... yes" % util
+        for util in missed:
+            print "+checking %s... no" % util
+        if missed:
+            print("warning: not found: %s" % ", ".join(missed))
+
+        # Now, be serious
+        found, missed = find_programs(("latex", "makeindex",
+                                       "pdflatex", "kpsewhich"))
+        for util in found:
+            print "+checking %s... yes" % util
+        for util in missed:
+            print "+checking %s... no" % util
+        if missed:
+            raise OSError("not found: %s" % ", ".join(missed))
+
+    def check_xslt_dependencies(self):
+        sys.path.insert(0, "lib")
+        from dbtexmf.xslt import xslt
+        sys.path.remove("lib")
+
+        # At least one XSLT must be available
+        deplists = xslt.get_deplists()
+        if not(deplists):
+            raise OSError("no XSLT available")
+
+        # For each XSLT check the programs they depend on
+        xslt_found = []
+        xslt_missed = []
+        for (mod, deplist) in deplists:
+            if not(deplist):
+                xslt_found.append(mod)
+                print "+checking XSLT %s... yes" % mod
+                continue
+            found, missed = find_programs(deplist)
+            if missed:
+                xslt_missed.append(mod)
+                print "+checking XSLT %s... no (missing %s)" % \
+                      (mod, ", ".join(missed))
+            else:
+                xslt_found.append(mod)
+                print "+checking XSLT %s... yes" % mod
+
+        if not(xslt_found):
+            raise OSError("XSLT not installed: %s" % ", ".join(xslt_missed))
+        elif xslt_missed:
+            print "warning: XSLT not found: %s" % ", ".join(xslt_missed)
+
+    def check_latex_dependencies(self):
+        # Find the Latex files from the package
+        stys = []
+        for root, dirs, files in os.walk('latex/'):
+            stys += glob.glob(os.path.join(root, "*.sty"))
+        if stys:
+            own_stys = [os.path.basename(s)[:-4] for s in stys]
+        else:
+            own_stys = []
+
+        # Find the used packages
+        used_stys = []
+        re_sty = re.compile(r"\\usepackage\s*\[?.*\]?{(\w+)}")
+        for sty in stys:
+            f = open(sty)
+            for line in f:
+                line = line.split("%")[0]
+                m = re_sty.search(line)
+                if m:
+                    p = m.group(1)
+                    try:
+                        used_stys.index(p)
+                    except:
+                        used_stys.append(p)
+            f.close()
+
+        # Now look if they are found
+        found_stys = []
+        mis_stys = []
+        used_stys.sort()
+
+        # Dirty...
+        for f in ("truncate", "elfonts", "CJKutf8", "pinyin", "ifxetex"):
+            try:
+                used_stys.remove(f)
+            except:
+                pass
+
+        for sty in used_stys:
+            if sty in found_stys:
+                continue
+            status = "+checking %s... " % sty
+            if sty in own_stys:
+                status += "found in package"
+                found_stys.append(sty)
+                print status
+                continue
+            stypath = kpsewhich("%s.sty" % sty)
+            if stypath:
+                status += "yes"
+                found_stys.append(sty)
+            else:
+                status += "no"
+                mis_stys.append(sty)
+            print status
+            
+        if mis_stys:
+            raise OSError("not found: %s" % ", ".join(mis_stys))
+
+    def run(self):
+        if self.install_layout == "deb":
+            db = DebianInstaller(self)
+        else:
+            db = None
+
+        if not(db) and not(self.nodeps):
+            try:
+                self.check_xslt_dependencies()
+                self.check_util_dependencies()
+                self.check_latex_dependencies()
+            except Exception, e:
+                print >>sys.stderr, "Error: %s" % e
+                sys.exit(1)
+
+        if db: db.adapt_paths()
+
+        # If no build is required, at least build the script
+        if self.skip_build:
+            self.run_command('build_scripts')
+
+        install.run(self)
+
+        if db: db.finalize()
+
+
+class InstallData(install_data):
+
+    def run(self):
+        ignore_pattern = os.path.sep + r"(CVS|RCS)" + os.path.sep
+        # literal backslash must be doubled in regular expressions
+        ignore_pattern = ignore_pattern.replace('\\', r'\\')
+
+        # Walk through sub-dirs, specified in data_files and build the
+        # full data files list accordingly
+        full_data_files = []
+        for install_base, paths in self.data_files:
+            base_files = []
+            for path in paths:
+                if os.path.isdir(path):
+                    pref = os.path.dirname(path)
+                    for root, dirs, files in os.walk(path):
+                        if re.search(ignore_pattern, root + os.sep):
+                            continue
+                        # Only the last directory is copied, not the full path
+                        if not(pref):
+                            iroot = root
+                        else:
+                            iroot = root.split(pref + os.path.sep, 1)[1]
+                        idir = os.path.join(install_base, iroot)
+                        files = [os.path.join(root, i) for i in files]
+                        if files:
+                            full_data_files += [(idir, files)]
+                else:
+                    base_files.append(path)
+
+            if base_files:
+                full_data_files += [(install_base, base_files)]
+
+        # Replace synthetic data_files by the full one, and do the actual job
+        self.data_files = full_data_files
+        rc = install_data.run(self)
+
+        if self.distribution.get_command_obj("install").install_layout != "deb":
+            self.adapt_installed_data()
+        return rc
+
+    def adapt_installed_data(self):
+        installed = self.get_outputs()
+        for data_file in installed:
+            if os.path.basename(data_file) == "param.xsl":
+                self._set_texlive_version(data_file)
+                break
+
+    def _set_texlive_version(self, param_file):
+        """Detect the installed Texlive version from hyperref.sty version, and
+        override the texlive.version param accordingly."""
+        hyper_sty = kpsewhich("hyperref.sty")
+        if not(hyper_sty):
+            # Cannot do anything, give up
+            return
+
+        # Grab the value from package version
+        d = open(hyper_sty).read()
+        m = re.search("\\ProvidesPackage{hyperref}\s+\[(\d+)", d, re.M)
+        if not(m):
+            return
+        year = m.group(1)
+
+        # Patch the parameter with the found value
+        p = open(param_file).read()
+        p2 = re.sub('name="texlive.version">.*<',
+                    'name="texlive.version">%s<' % year, p)
+        f = open(param_file, "w")
+        f.write(p2)
+        f.close()
+
+
+def get_version():
+    sys.path.insert(0, "lib")
+    from dbtexmf.dblatex import dblatex
+    d = dblatex.DbLatex(base=os.getcwd())
+    sys.path.remove("lib")
+    return d.get_version()
+
+
+if __name__ == "__main__":
+    pdfdocs = glob.glob(os.path.join("docs", "[mr]*.pdf"))
+    htmldoc = [os.path.join("docs", "xhtml")]
+    classifiers = [
+       "Operating System :: OS Independent",
+       "Topic :: Text Processing :: Markup :: XML",
+       "License :: OSI Approved :: GNU General Public License (GPL)"
+    ]
+
+    description = """
+       dblatex is a program that transforms your SGML/XML DocBook documents to
+       DVI, PostScript or PDF by translating them into pure LaTeX as a first
+       process.  MathML 2.0 markups are supported, too. It started as a clone
+       of DB2LaTeX.
+       """
+
+    setup(name="dblatex",
+        version=get_version(),
+        description='DocBook to LaTeX/ConTeXt Publishing',
+        author='Benoit Guillon',
+        author_email='marsgui@users.sourceforge.net',
+        url='http://dblatex.sf.net',
+        license='GPL Version 2 or later',
+        long_description=description,
+        classifiers=classifiers,
+        packages=['dbtexmf',
+                  'dbtexmf.core',
+                  'dbtexmf.xslt',
+                  'dbtexmf.dblatex',
+                  'dbtexmf.dblatex.xetex',
+                  'dbtexmf.dblatex.grubber'],
+        package_dir={'dbtexmf':'lib/dbtexmf'},
+        package_data={'dbtexmf.core':['sgmlent.txt'],
+                      'dbtexmf.dblatex.grubber':['xindylang.xml']},
+        data_files=[('share/dblatex', ['xsl', 'latex', 'etc/schema']),
+                    ('share/doc/dblatex', pdfdocs),
+                    ('share/doc/dblatex', htmldoc),
+                    ('share/man/man1', ['docs/manpage/dblatex.1.gz'])],
+        scripts=['scripts/dblatex'],
+        cmdclass={'build': Build,
+                  'build_scripts': BuildScripts,
+                  'install': Install,
+                  'install_data': InstallData,
+                  'sdist': Sdist}
+        )
+