Blob Blame History Raw
# This file is part of Rubber and thus covered by the GPL
# (c) Emmanuel Beffara, 2002--2006
"""
LaTeX document building system for Rubber.

This module defines the class that parses the input LaTeX to load the expected
modules.
"""
import re

class TexParser:
    re_input = re.compile("\\\\input +(?P<arg>[^{} \n\\\\]+)")
    re_comment = re.compile(r"(?P<line>([^\\%]|\\%|\\)*)(%.*)?")

    def __init__(self, doc):
        self.doc = doc
        self.comment_mark = "%"
        self.exclude_mods = []
        self.hooks = {
            "usepackage"   : self.h_usepackage,
            "begin{btSect}": self.h_bibtopic,
        }
        self.update_rehooks()

    def update_rehooks(self):
        """
        Update the regular expression used to match macro calls using the keys
        in the `hook' dictionary. We don't match all control sequences for
        obvious efficiency reasons.
        """
        # Make a "foo|bar\*stub" list
        hooklist = [x.replace("*", "\\*") for x in self.hooks]

        pattern = "\\\\(?P<name>%s)\*?"\
                  " *(\\[(?P<opt>[^\\]]*)\\])?"\
                  " *({(?P<arg>[^{}]*)}|(?=[^A-Za-z]))"

        self.rehooks = re.compile(pattern % "|".join(hooklist))

    def add_hook(self, name, fun):
        """
        Register a given function to be called (with no arguments) when a
        given macro is found.
        """
        self.hooks[name] = fun
        self.update_rehooks()

    def parse(self, fd, exclude_mods=None):
        """
        Process a LaTeX source. The file must be open, it is read to the end
        calling the handlers for the macro calls. This recursively processes
        the included sources.

        If the optional argument 'dump' is not None, then it is considered as
        a stream on which all text not matched as a macro is written.
        """
        self.exclude_mods = exclude_mods or []
        self.lineno = 0
        for line in fd:
            self.parse_line(line)

    def parse_line(self, line, dump=None):
        self.lineno += 1

        # Remove comments
        line = self.re_comment.match(line).group("line")

        match = self.rehooks.search(line)
        while match:
            dict = match.groupdict()
            name = dict["name"]
            
            # The case of \input is handled specifically, because of the
            # TeX syntax with no braces

            if name == "input" and not dict["arg"]:
                match2 = self.re_input.search(line)
                if match2:
                    match = match2
                    dict = match.groupdict()

            if dump: dump.write(line[:match.start()])
            dict["match"] = line[match.start():match.end()]
            dict["line"] = line[match.end():]
            #dict["pos"] = { 'file': self.vars["file"], 'line': self.lineno }
            dict["pos"] = { 'file': "file", 'line': self.lineno }
            dict["dump"] = dump

#            if self.env.caching:
#                self.cache_list.append(("hook", name, dict))

            self.hooks[name](dict)
            line = dict["line"]
            match = self.rehooks.search(line)

        if dump: dump.write(line)

    def h_usepackage(self, dict):
        """
        Called when a \\usepackage macro is found. If there is a package in the
        directory of the source file, then it is treated as an include file
        unless there is a supporting module in the current directory,
        otherwise it is treated as a package.
        """
        if not dict["arg"]: return
        for name in dict["arg"].split(","):
            name = name.strip()
#            file = self.env.find_file(name + ".sty")
#            if file and not exists(name + ".py"):
#                self.process(file)
#            else:
            if (name in self.exclude_mods):
                continue
            self.doc.modules.register(name, dict)

    def h_bibtopic(self, dict):
        """
        Called when a \\btSect macro is found. It can also be loaded by a
        usepackage of bibtopic. Note that once loaded the btSect hook will be
        preempted by the bibtopic module hook.
        """
        if ("bibtopic" in self.exclude_mods):
            return
        self.doc.modules.register("bibtopic", dict)