Blame lib/dbtexmf/dblatex/grubber/logparser.py.enable-python3

Packit 5164a5
# This file is part of Rubber and thus covered by the GPL
Packit 5164a5
# (c) Emmanuel Beffara, 2002--2006
Packit 5164a5
"""
Packit 5164a5
LaTeX document building system for Rubber.
Packit 5164a5
Packit 5164a5
This module defines the class that parses the LaTeX log files.
Packit 5164a5
"""
Packit 5164a5
from __future__ import generators
Packit 5164a5
Packit 5164a5
import re
Packit 5164a5
Packit 5164a5
from msg import _, msg
Packit 5164a5
Packit 5164a5
class LogParser:
Packit 5164a5
    """
Packit 5164a5
    This class performs all the extraction of information from the log file.
Packit 5164a5
    For efficiency, the instances contain the whole file as a list of strings
Packit 5164a5
    so that it can be read several times with no disk access.
Packit 5164a5
    """
Packit 5164a5
Packit 5164a5
    re_loghead = re.compile("This is [0-9a-zA-Z-]*(TeX|Omega)")
Packit 5164a5
    re_rerun = re.compile(
Packit 5164a5
        "(LaTeX|Package longtable|Package bibtopic) Warning:.*Rerun")
Packit 5164a5
    re_rerun2 = re.compile("\(Changebar\).*Rerun")
Packit 5164a5
    re_file = re.compile("(\\((?P<file>[^ \n\t(){}]*)|\\))")
Packit 5164a5
    re_badbox = re.compile(r"(Ov|Und)erfull \\[hv]box ")
Packit 5164a5
    re_line = re.compile(r"(l\.(?P<line>[0-9]+)( (?P.*))?$|<\*>)")
Packit 5164a5
    re_cseq = re.compile(r".*(?P<seq>\\[^ ]*) ?$")
Packit 5164a5
    re_page = re.compile("\[(?P<num>[0-9]+)\]")
Packit 5164a5
    re_atline = re.compile(
Packit 5164a5
    "( detected| in paragraph)? at lines? (?P<line>[0-9]*)(--(?P<last>[0-9]*))?")
Packit 5164a5
    re_reference = re.compile("LaTeX Warning: Reference `(?P<ref>.*)' \
Packit 5164a5
    on page (?P<page>[0-9]*) undefined on input line (?P<line>[0-9]*)\\.$")
Packit 5164a5
    re_label = re.compile("LaTeX Warning: (?P<text>Label .*)$")
Packit 5164a5
    re_warning = re.compile(
Packit 5164a5
    "(LaTeX|Package)( (?P<pkg>.*))? Warning: (?P<text>.*)$")
Packit 5164a5
    re_online = re.compile("(; reported)? on input line (?P<line>[0-9]*)")
Packit 5164a5
    re_ignored = re.compile("; all text was ignored after line (?P<line>[0-9]*).$")
Packit 5164a5
    re_misschar = re.compile("Missing character: There is no (?P<uchar>[^ ]*) in font (?P<font>.*)!")
Packit 5164a5
Packit 5164a5
    #-- Initialization {{{2
Packit 5164a5
Packit 5164a5
    def __init__ (self):
Packit 5164a5
        self.lines = []
Packit 5164a5
Packit 5164a5
    def read (self, name):
Packit 5164a5
        """
Packit 5164a5
        Read the specified log file, checking that it was produced by the
Packit 5164a5
        right compiler. Returns true if the log file is invalid or does not
Packit 5164a5
        exist.
Packit 5164a5
        """
Packit 5164a5
        self.lines = []
Packit 5164a5
        try:
Packit 5164a5
            file = open(name)
Packit 5164a5
        except IOError:
Packit 5164a5
            return 2
Packit 5164a5
        line = file.readline()
Packit 5164a5
        if not line:
Packit 5164a5
            file.close()
Packit 5164a5
            return 1
Packit 5164a5
        if not self.re_loghead.match(line):
Packit 5164a5
            file.close()
Packit 5164a5
            return 1
Packit 5164a5
        self.lines = file.readlines()
Packit 5164a5
        file.close()
Packit 5164a5
        return 0
Packit 5164a5
Packit 5164a5
    #-- Process information {{{2
Packit 5164a5
Packit 5164a5
    def errors (self):
Packit 5164a5
        """
Packit 5164a5
        Returns true if there was an error during the compilation.
Packit 5164a5
        """
Packit 5164a5
        skipping = 0
Packit 5164a5
        for line in self.lines:
Packit 5164a5
            if line.strip() == "":
Packit 5164a5
                skipping = 0
Packit 5164a5
                continue
Packit 5164a5
            if skipping:
Packit 5164a5
                continue
Packit 5164a5
            m = self.re_badbox.match(line)
Packit 5164a5
            if m:
Packit 5164a5
                skipping = 1
Packit 5164a5
                continue
Packit 5164a5
            if line[0] == "!":
Packit 5164a5
                # We check for the substring "pdfTeX warning" because pdfTeX
Packit 5164a5
                # sometimes issues warnings (like undefined references) in the
Packit 5164a5
                # form of errors...
Packit 5164a5
Packit 5164a5
                if line.find("pdfTeX warning") == -1:
Packit 5164a5
                    return 1
Packit 5164a5
        return 0
Packit 5164a5
Packit 5164a5
    def run_needed (self):
Packit 5164a5
        """
Packit 5164a5
        Returns true if LaTeX indicated that another compilation is needed.
Packit 5164a5
        """
Packit 5164a5
        for line in self.lines:
Packit 5164a5
            if self.re_rerun.match(line):
Packit 5164a5
                return 1
Packit 5164a5
            if self.re_rerun2.match(line):
Packit 5164a5
                return 1
Packit 5164a5
        return 0
Packit 5164a5
Packit 5164a5
    #-- Information extraction {{{2
Packit 5164a5
Packit 5164a5
    def continued (self, line):
Packit 5164a5
        """
Packit 5164a5
        Check if a line in the log is continued on the next line. This is
Packit 5164a5
        needed because TeX breaks messages at 79 characters per line. We make
Packit 5164a5
        this into a method because the test is slightly different in Metapost.
Packit 5164a5
        """
Packit 5164a5
        return len(line) == 79
Packit 5164a5
Packit 5164a5
    def parse (self, errors=0, boxes=0, refs=0, warnings=0, misschars=0):
Packit 5164a5
        """
Packit 5164a5
        Parse the log file for relevant information. The named arguments are
Packit 5164a5
        booleans that indicate which information should be extracted:
Packit 5164a5
        - errors: all errors
Packit 5164a5
        - boxes: bad boxes
Packit 5164a5
        - refs: warnings about references
Packit 5164a5
        - warnings: all other warnings
Packit 5164a5
        The function returns a generator. Each generated item is a dictionary
Packit 5164a5
        that contains (some of) the following entries:
Packit 5164a5
        - kind: the kind of information ("error", "box", "ref", "warning")
Packit 5164a5
        - text: the text of the error or warning
Packit 5164a5
        - code: the piece of code that caused an error
Packit 5164a5
        - file, line, last, pkg: as used by Message.format_pos.
Packit 5164a5
        """
Packit 5164a5
        if not self.lines:
Packit 5164a5
            return
Packit 5164a5
        last_file = None
Packit 5164a5
        pos = [last_file]
Packit 5164a5
        page = 1
Packit 5164a5
        parsing = 0    # 1 if we are parsing an error's text
Packit 5164a5
        skipping = 0   # 1 if we are skipping text until an empty line
Packit 5164a5
        something = 0  # 1 if some error was found
Packit 5164a5
        prefix = None  # the prefix for warning messages from packages
Packit 5164a5
        accu = ""      # accumulated text from the previous line
Packit 5164a5
        for line in self.lines:
Packit 5164a5
            line = line[:-1]  # remove the line feed
Packit 5164a5
Packit 5164a5
            # TeX breaks messages at 79 characters, just to make parsing
Packit 5164a5
            # trickier...
Packit 5164a5
Packit 5164a5
            if self.continued(line):
Packit 5164a5
                accu += line
Packit 5164a5
                continue
Packit 5164a5
            line = accu + line
Packit 5164a5
            accu = ""
Packit 5164a5
Packit 5164a5
            # Text that should be skipped (from bad box messages)
Packit 5164a5
Packit 5164a5
            if prefix is None and line == "":
Packit 5164a5
                skipping = 0
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            if skipping:
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            # Errors (including aborted compilation)
Packit 5164a5
Packit 5164a5
            if parsing:
Packit 5164a5
                if error == "Undefined control sequence.":
Packit 5164a5
                    # This is a special case in order to report which control
Packit 5164a5
                    # sequence is undefined.
Packit 5164a5
                    m = self.re_cseq.match(line)
Packit 5164a5
                    if m:
Packit 5164a5
                        error = "Undefined control sequence %s." % m.group("seq")
Packit 5164a5
                m = self.re_line.match(line)
Packit 5164a5
                if m:
Packit 5164a5
                    parsing = 0
Packit 5164a5
                    skipping = 1
Packit 5164a5
                    pdfTeX = error.find("pdfTeX warning") != -1
Packit 5164a5
                    if (pdfTeX and warnings) or (errors and not pdfTeX):
Packit 5164a5
                        if pdfTeX:
Packit 5164a5
                            d = {
Packit 5164a5
                                "kind": "warning",
Packit 5164a5
                                "pkg": "pdfTeX",
Packit 5164a5
                                "text": error[error.find(":")+2:]
Packit 5164a5
                            }
Packit 5164a5
                        else:
Packit 5164a5
                            d = {
Packit 5164a5
                                "kind": "error",
Packit 5164a5
                                "text": error
Packit 5164a5
                            }
Packit 5164a5
                        d.update( m.groupdict() )
Packit 5164a5
                        m = self.re_ignored.search(error)
Packit 5164a5
                        if m:
Packit 5164a5
                            d["file"] = last_file
Packit 5164a5
                            if d.has_key("code"):
Packit 5164a5
                                del d["code"]
Packit 5164a5
                            d.update( m.groupdict() )
Packit 5164a5
                        elif pos[-1] is None:
Packit 5164a5
                            d["file"] = last_file
Packit 5164a5
                        else:
Packit 5164a5
                            d["file"] = pos[-1]
Packit 5164a5
                        yield d
Packit 5164a5
                elif line[0] == "!":
Packit 5164a5
                    error = line[2:]
Packit 5164a5
                elif line[0:3] == "***":
Packit 5164a5
                    parsing = 0
Packit 5164a5
                    skipping = 1
Packit 5164a5
                    if errors:
Packit 5164a5
                        yield    {
Packit 5164a5
                            "kind": "abort",
Packit 5164a5
                            "text": error,
Packit 5164a5
                            "why" : line[4:],
Packit 5164a5
                            "file": last_file
Packit 5164a5
                            }
Packit 5164a5
                elif line[0:15] == "Type X to quit ":
Packit 5164a5
                    parsing = 0
Packit 5164a5
                    skipping = 0
Packit 5164a5
                    if errors:
Packit 5164a5
                        yield    {
Packit 5164a5
                            "kind": "error",
Packit 5164a5
                            "text": error,
Packit 5164a5
                            "file": pos[-1]
Packit 5164a5
                            }
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            if len(line) > 0 and line[0] == "!":
Packit 5164a5
                error = line[2:]
Packit 5164a5
                parsing = 1
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            if line == "Runaway argument?":
Packit 5164a5
                error = line
Packit 5164a5
                parsing = 1
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            # Long warnings
Packit 5164a5
Packit 5164a5
            if prefix is not None:
Packit 5164a5
                if line[:len(prefix)] == prefix:
Packit 5164a5
                    text.append(line[len(prefix):].strip())
Packit 5164a5
                else:
Packit 5164a5
                    text = " ".join(text)
Packit 5164a5
                    m = self.re_online.search(text)
Packit 5164a5
                    if m:
Packit 5164a5
                        info["line"] = m.group("line")
Packit 5164a5
                        text = text[:m.start()] + text[m.end():]
Packit 5164a5
                    if warnings:
Packit 5164a5
                        info["text"] = text
Packit 5164a5
                        d = { "kind": "warning" }
Packit 5164a5
                        d.update( info )
Packit 5164a5
                        yield d
Packit 5164a5
                    prefix = None
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            # Undefined references
Packit 5164a5
Packit 5164a5
            m = self.re_reference.match(line)
Packit 5164a5
            if m:
Packit 5164a5
                if refs:
Packit 5164a5
                    d = {
Packit 5164a5
                        "kind": "warning",
Packit 5164a5
                        "text": _("Reference `%s' undefined.") % m.group("ref"),
Packit 5164a5
                        "file": pos[-1]
Packit 5164a5
                        }
Packit 5164a5
                    d.update( m.groupdict() )
Packit 5164a5
                    yield d
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            m = self.re_label.match(line)
Packit 5164a5
            if m:
Packit 5164a5
                if refs:
Packit 5164a5
                    d = {
Packit 5164a5
                        "kind": "warning",
Packit 5164a5
                        "file": pos[-1]
Packit 5164a5
                        }
Packit 5164a5
                    d.update( m.groupdict() )
Packit 5164a5
                    yield d
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            # Other warnings
Packit 5164a5
Packit 5164a5
            if line.find("Warning") != -1:
Packit 5164a5
                m = self.re_warning.match(line)
Packit 5164a5
                if m:
Packit 5164a5
                    info = m.groupdict()
Packit 5164a5
                    info["file"] = pos[-1]
Packit 5164a5
                    info["page"] = page
Packit 5164a5
                    if info["pkg"] is None:
Packit 5164a5
                        del info["pkg"]
Packit 5164a5
                        prefix = ""
Packit 5164a5
                    else:
Packit 5164a5
                        prefix = ("(%s)" % info["pkg"])
Packit 5164a5
                    prefix = prefix.ljust(m.start("text"))
Packit 5164a5
                    text = [info["text"]]
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            # Bad box messages
Packit 5164a5
Packit 5164a5
            m = self.re_badbox.match(line)
Packit 5164a5
            if m:
Packit 5164a5
                if boxes:
Packit 5164a5
                    mpos = { "file": pos[-1], "page": page }
Packit 5164a5
                    m = self.re_atline.search(line)
Packit 5164a5
                    if m:
Packit 5164a5
                        md = m.groupdict()
Packit 5164a5
                        for key in "line", "last":
Packit 5164a5
                            if md[key]: mpos[key] = md[key]
Packit 5164a5
                        line = line[:m.start()]
Packit 5164a5
                    d = {
Packit 5164a5
                        "kind": "warning",
Packit 5164a5
                        "text": line
Packit 5164a5
                        }
Packit 5164a5
                    d.update( mpos )
Packit 5164a5
                    yield d
Packit 5164a5
                skipping = 1
Packit 5164a5
                continue
Packit 5164a5
Packit 5164a5
            # Missing characters in a font
Packit 5164a5
            if misschars:
Packit 5164a5
                m = self.re_misschar.match(line)
Packit 5164a5
                if m:
Packit 5164a5
                    d = {
Packit 5164a5
                        "kind": "warning",
Packit 5164a5
                        "uchar": m.group("uchar"),
Packit 5164a5
                        "font": m.group("font"),
Packit 5164a5
                        }
Packit 5164a5
                    yield d
Packit 5164a5
                    continue
Packit 5164a5
Packit 5164a5
            # If there is no message, track source names and page numbers.
Packit 5164a5
Packit 5164a5
            last_file = self.update_file(line, pos, last_file)
Packit 5164a5
            page = self.update_page(line, page)
Packit 5164a5
Packit 5164a5
    def get_errors (self):
Packit 5164a5
        return self.parse(errors=1)
Packit 5164a5
    def get_boxes (self):
Packit 5164a5
        return self.parse(boxes=1)
Packit 5164a5
    def get_references (self):
Packit 5164a5
        return self.parse(refs=1)
Packit 5164a5
    def get_warnings (self):
Packit 5164a5
        return self.parse(warnings=1)
Packit 5164a5
    def get_misschars (self):
Packit 5164a5
        return self.parse(misschars=1)
Packit 5164a5
Packit 5164a5
    def update_file (self, line, stack, last):
Packit 5164a5
        """
Packit 5164a5
        Parse the given line of log file for file openings and closings and
Packit 5164a5
        update the list `stack'. Newly opened files are at the end, therefore
Packit 5164a5
        stack[1] is the main source while stack[-1] is the current one. The
Packit 5164a5
        first element, stack[0], contains the value None for errors that may
Packit 5164a5
        happen outside the source. Return the last file from which text was
Packit 5164a5
        read (the new stack top, or the one before the last closing
Packit 5164a5
        parenthesis).
Packit 5164a5
        """
Packit 5164a5
        m = self.re_file.search(line)
Packit 5164a5
        while m:
Packit 5164a5
            if line[m.start()] == '(':
Packit 5164a5
                last = m.group("file")
Packit 5164a5
                stack.append(last)
Packit 5164a5
            else:
Packit 5164a5
                last = stack[-1]
Packit 5164a5
                del stack[-1]
Packit 5164a5
            line = line[m.end():]
Packit 5164a5
            m = self.re_file.search(line)
Packit 5164a5
        return last
Packit 5164a5
Packit 5164a5
    def update_page (self, line, before):
Packit 5164a5
        """
Packit 5164a5
        Parse the given line and return the number of the page that is being
Packit 5164a5
        built after that line, assuming the current page before the line was
Packit 5164a5
        `before'.
Packit 5164a5
        """
Packit 5164a5
        ms = self.re_page.findall(line)
Packit 5164a5
        if ms == []:
Packit 5164a5
            return before
Packit 5164a5
        return int(ms[-1]) + 1
Packit 5164a5