Blame lib/dbtexmf/dblatex/grubber/logparser.py

Packit Service 76cb02
# This file is part of Rubber and thus covered by the GPL
Packit Service 76cb02
# (c) Emmanuel Beffara, 2002--2006
Packit Service 76cb02
"""
Packit Service 76cb02
LaTeX document building system for Rubber.
Packit Service 76cb02
Packit Service 76cb02
This module defines the class that parses the LaTeX log files.
Packit Service 76cb02
"""
Packit Service 76cb02
from __future__ import generators
Packit Service 76cb02
Packit Service 76cb02
import re
Packit Service cd7d79
from io import open
Packit Service 76cb02
Packit Service cd7d79
from dbtexmf.dblatex.grubber.msg import _, msg
Packit Service 76cb02
Packit Service 76cb02
class LogParser:
Packit Service 76cb02
    """
Packit Service 76cb02
    This class performs all the extraction of information from the log file.
Packit Service 76cb02
    For efficiency, the instances contain the whole file as a list of strings
Packit Service 76cb02
    so that it can be read several times with no disk access.
Packit Service 76cb02
    """
Packit Service 76cb02
Packit Service 76cb02
    re_loghead = re.compile("This is [0-9a-zA-Z-]*(TeX|Omega)")
Packit Service 76cb02
    re_rerun = re.compile(
Packit Service 76cb02
        "(LaTeX|Package longtable|Package bibtopic) Warning:.*Rerun")
Packit Service 76cb02
    re_rerun2 = re.compile("\(Changebar\).*Rerun")
Packit Service 76cb02
    re_file = re.compile("(\\((?P<file>[^ \n\t(){}]*)|\\))")
Packit Service 76cb02
    re_badbox = re.compile(r"(Ov|Und)erfull \\[hv]box ")
Packit Service 76cb02
    re_line = re.compile(r"(l\.(?P<line>[0-9]+)( (?P.*))?$|<\*>)")
Packit Service 76cb02
    re_cseq = re.compile(r".*(?P<seq>\\[^ ]*) ?$")
Packit Service 76cb02
    re_page = re.compile("\[(?P<num>[0-9]+)\]")
Packit Service 76cb02
    re_atline = re.compile(
Packit Service 76cb02
    "( detected| in paragraph)? at lines? (?P<line>[0-9]*)(--(?P<last>[0-9]*))?")
Packit Service 76cb02
    re_reference = re.compile("LaTeX Warning: Reference `(?P<ref>.*)' \
Packit Service 76cb02
    on page (?P<page>[0-9]*) undefined on input line (?P<line>[0-9]*)\\.$")
Packit Service 76cb02
    re_label = re.compile("LaTeX Warning: (?P<text>Label .*)$")
Packit Service 76cb02
    re_warning = re.compile(
Packit Service 76cb02
    "(LaTeX|Package)( (?P<pkg>.*))? Warning: (?P<text>.*)$")
Packit Service 76cb02
    re_online = re.compile("(; reported)? on input line (?P<line>[0-9]*)")
Packit Service 76cb02
    re_ignored = re.compile("; all text was ignored after line (?P<line>[0-9]*).$")
Packit Service 76cb02
    re_misschar = re.compile("Missing character: There is no (?P<uchar>[^ ]*) in font (?P<font>.*)!")
Packit Service 76cb02
Packit Service 76cb02
    #-- Initialization {{{2
Packit Service 76cb02
Packit Service 76cb02
    def __init__ (self):
Packit Service 76cb02
        self.lines = []
Packit Service 76cb02
Packit Service 76cb02
    def read (self, name):
Packit Service 76cb02
        """
Packit Service 76cb02
        Read the specified log file, checking that it was produced by the
Packit Service 76cb02
        right compiler. Returns true if the log file is invalid or does not
Packit Service 76cb02
        exist.
Packit Service 76cb02
        """
Packit Service 76cb02
        self.lines = []
Packit Service 76cb02
        try:
Packit Service cd7d79
            file = open(name, "rt")
Packit Service 76cb02
        except IOError:
Packit Service 76cb02
            return 2
Packit Service 76cb02
        line = file.readline()
Packit Service 76cb02
        if not line:
Packit Service 76cb02
            file.close()
Packit Service 76cb02
            return 1
Packit Service 76cb02
        if not self.re_loghead.match(line):
Packit Service 76cb02
            file.close()
Packit Service 76cb02
            return 1
Packit Service 76cb02
        self.lines = file.readlines()
Packit Service 76cb02
        file.close()
Packit Service 76cb02
        return 0
Packit Service 76cb02
Packit Service 76cb02
    #-- Process information {{{2
Packit Service 76cb02
Packit Service 76cb02
    def errors (self):
Packit Service 76cb02
        """
Packit Service 76cb02
        Returns true if there was an error during the compilation.
Packit Service 76cb02
        """
Packit Service 76cb02
        skipping = 0
Packit Service 76cb02
        for line in self.lines:
Packit Service 76cb02
            if line.strip() == "":
Packit Service 76cb02
                skipping = 0
Packit Service 76cb02
                continue
Packit Service 76cb02
            if skipping:
Packit Service 76cb02
                continue
Packit Service 76cb02
            m = self.re_badbox.match(line)
Packit Service 76cb02
            if m:
Packit Service 76cb02
                skipping = 1
Packit Service 76cb02
                continue
Packit Service 76cb02
            if line[0] == "!":
Packit Service 76cb02
                # We check for the substring "pdfTeX warning" because pdfTeX
Packit Service 76cb02
                # sometimes issues warnings (like undefined references) in the
Packit Service 76cb02
                # form of errors...
Packit Service 76cb02
Packit Service 76cb02
                if line.find("pdfTeX warning") == -1:
Packit Service 76cb02
                    return 1
Packit Service 76cb02
        return 0
Packit Service 76cb02
Packit Service 76cb02
    def run_needed (self):
Packit Service 76cb02
        """
Packit Service 76cb02
        Returns true if LaTeX indicated that another compilation is needed.
Packit Service 76cb02
        """
Packit Service 76cb02
        for line in self.lines:
Packit Service 76cb02
            if self.re_rerun.match(line):
Packit Service 76cb02
                return 1
Packit Service 76cb02
            if self.re_rerun2.match(line):
Packit Service 76cb02
                return 1
Packit Service 76cb02
        return 0
Packit Service 76cb02
Packit Service 76cb02
    #-- Information extraction {{{2
Packit Service 76cb02
Packit Service 76cb02
    def continued (self, line):
Packit Service 76cb02
        """
Packit Service 76cb02
        Check if a line in the log is continued on the next line. This is
Packit Service 76cb02
        needed because TeX breaks messages at 79 characters per line. We make
Packit Service 76cb02
        this into a method because the test is slightly different in Metapost.
Packit Service 76cb02
        """
Packit Service 76cb02
        return len(line) == 79
Packit Service 76cb02
Packit Service 76cb02
    def parse (self, errors=0, boxes=0, refs=0, warnings=0, misschars=0):
Packit Service 76cb02
        """
Packit Service 76cb02
        Parse the log file for relevant information. The named arguments are
Packit Service 76cb02
        booleans that indicate which information should be extracted:
Packit Service 76cb02
        - errors: all errors
Packit Service 76cb02
        - boxes: bad boxes
Packit Service 76cb02
        - refs: warnings about references
Packit Service 76cb02
        - warnings: all other warnings
Packit Service 76cb02
        The function returns a generator. Each generated item is a dictionary
Packit Service 76cb02
        that contains (some of) the following entries:
Packit Service 76cb02
        - kind: the kind of information ("error", "box", "ref", "warning")
Packit Service 76cb02
        - text: the text of the error or warning
Packit Service 76cb02
        - code: the piece of code that caused an error
Packit Service 76cb02
        - file, line, last, pkg: as used by Message.format_pos.
Packit Service 76cb02
        """
Packit Service 76cb02
        if not self.lines:
Packit Service 76cb02
            return
Packit Service 76cb02
        last_file = None
Packit Service 76cb02
        pos = [last_file]
Packit Service 76cb02
        page = 1
Packit Service 76cb02
        parsing = 0    # 1 if we are parsing an error's text
Packit Service 76cb02
        skipping = 0   # 1 if we are skipping text until an empty line
Packit Service 76cb02
        something = 0  # 1 if some error was found
Packit Service 76cb02
        prefix = None  # the prefix for warning messages from packages
Packit Service 76cb02
        accu = ""      # accumulated text from the previous line
Packit Service 76cb02
        for line in self.lines:
Packit Service 76cb02
            line = line[:-1]  # remove the line feed
Packit Service 76cb02
Packit Service 76cb02
            # TeX breaks messages at 79 characters, just to make parsing
Packit Service 76cb02
            # trickier...
Packit Service 76cb02
Packit Service 76cb02
            if self.continued(line):
Packit Service 76cb02
                accu += line
Packit Service 76cb02
                continue
Packit Service 76cb02
            line = accu + line
Packit Service 76cb02
            accu = ""
Packit Service 76cb02
Packit Service 76cb02
            # Text that should be skipped (from bad box messages)
Packit Service 76cb02
Packit Service 76cb02
            if prefix is None and line == "":
Packit Service 76cb02
                skipping = 0
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            if skipping:
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            # Errors (including aborted compilation)
Packit Service 76cb02
Packit Service 76cb02
            if parsing:
Packit Service 76cb02
                if error == "Undefined control sequence.":
Packit Service 76cb02
                    # This is a special case in order to report which control
Packit Service 76cb02
                    # sequence is undefined.
Packit Service 76cb02
                    m = self.re_cseq.match(line)
Packit Service 76cb02
                    if m:
Packit Service 76cb02
                        error = "Undefined control sequence %s." % m.group("seq")
Packit Service 76cb02
                m = self.re_line.match(line)
Packit Service 76cb02
                if m:
Packit Service 76cb02
                    parsing = 0
Packit Service 76cb02
                    skipping = 1
Packit Service 76cb02
                    pdfTeX = error.find("pdfTeX warning") != -1
Packit Service 76cb02
                    if (pdfTeX and warnings) or (errors and not pdfTeX):
Packit Service 76cb02
                        if pdfTeX:
Packit Service 76cb02
                            d = {
Packit Service 76cb02
                                "kind": "warning",
Packit Service 76cb02
                                "pkg": "pdfTeX",
Packit Service 76cb02
                                "text": error[error.find(":")+2:]
Packit Service 76cb02
                            }
Packit Service 76cb02
                        else:
Packit Service 76cb02
                            d = {
Packit Service 76cb02
                                "kind": "error",
Packit Service 76cb02
                                "text": error
Packit Service 76cb02
                            }
Packit Service 76cb02
                        d.update( m.groupdict() )
Packit Service 76cb02
                        m = self.re_ignored.search(error)
Packit Service 76cb02
                        if m:
Packit Service 76cb02
                            d["file"] = last_file
Packit Service cd7d79
                            if "code" in d:
Packit Service 76cb02
                                del d["code"]
Packit Service 76cb02
                            d.update( m.groupdict() )
Packit Service 76cb02
                        elif pos[-1] is None:
Packit Service 76cb02
                            d["file"] = last_file
Packit Service 76cb02
                        else:
Packit Service 76cb02
                            d["file"] = pos[-1]
Packit Service 76cb02
                        yield d
Packit Service 76cb02
                elif line[0] == "!":
Packit Service 76cb02
                    error = line[2:]
Packit Service 76cb02
                elif line[0:3] == "***":
Packit Service 76cb02
                    parsing = 0
Packit Service 76cb02
                    skipping = 1
Packit Service 76cb02
                    if errors:
Packit Service 76cb02
                        yield    {
Packit Service 76cb02
                            "kind": "abort",
Packit Service 76cb02
                            "text": error,
Packit Service 76cb02
                            "why" : line[4:],
Packit Service 76cb02
                            "file": last_file
Packit Service 76cb02
                            }
Packit Service 76cb02
                elif line[0:15] == "Type X to quit ":
Packit Service 76cb02
                    parsing = 0
Packit Service 76cb02
                    skipping = 0
Packit Service 76cb02
                    if errors:
Packit Service 76cb02
                        yield    {
Packit Service 76cb02
                            "kind": "error",
Packit Service 76cb02
                            "text": error,
Packit Service 76cb02
                            "file": pos[-1]
Packit Service 76cb02
                            }
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            if len(line) > 0 and line[0] == "!":
Packit Service 76cb02
                error = line[2:]
Packit Service 76cb02
                parsing = 1
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            if line == "Runaway argument?":
Packit Service 76cb02
                error = line
Packit Service 76cb02
                parsing = 1
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            # Long warnings
Packit Service 76cb02
Packit Service 76cb02
            if prefix is not None:
Packit Service 76cb02
                if line[:len(prefix)] == prefix:
Packit Service 76cb02
                    text.append(line[len(prefix):].strip())
Packit Service 76cb02
                else:
Packit Service 76cb02
                    text = " ".join(text)
Packit Service 76cb02
                    m = self.re_online.search(text)
Packit Service 76cb02
                    if m:
Packit Service 76cb02
                        info["line"] = m.group("line")
Packit Service 76cb02
                        text = text[:m.start()] + text[m.end():]
Packit Service 76cb02
                    if warnings:
Packit Service 76cb02
                        info["text"] = text
Packit Service 76cb02
                        d = { "kind": "warning" }
Packit Service 76cb02
                        d.update( info )
Packit Service 76cb02
                        yield d
Packit Service 76cb02
                    prefix = None
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            # Undefined references
Packit Service 76cb02
Packit Service 76cb02
            m = self.re_reference.match(line)
Packit Service 76cb02
            if m:
Packit Service 76cb02
                if refs:
Packit Service 76cb02
                    d = {
Packit Service 76cb02
                        "kind": "warning",
Packit Service 76cb02
                        "text": _("Reference `%s' undefined.") % m.group("ref"),
Packit Service 76cb02
                        "file": pos[-1]
Packit Service 76cb02
                        }
Packit Service 76cb02
                    d.update( m.groupdict() )
Packit Service 76cb02
                    yield d
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            m = self.re_label.match(line)
Packit Service 76cb02
            if m:
Packit Service 76cb02
                if refs:
Packit Service 76cb02
                    d = {
Packit Service 76cb02
                        "kind": "warning",
Packit Service 76cb02
                        "file": pos[-1]
Packit Service 76cb02
                        }
Packit Service 76cb02
                    d.update( m.groupdict() )
Packit Service 76cb02
                    yield d
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            # Other warnings
Packit Service 76cb02
Packit Service 76cb02
            if line.find("Warning") != -1:
Packit Service 76cb02
                m = self.re_warning.match(line)
Packit Service 76cb02
                if m:
Packit Service 76cb02
                    info = m.groupdict()
Packit Service 76cb02
                    info["file"] = pos[-1]
Packit Service 76cb02
                    info["page"] = page
Packit Service 76cb02
                    if info["pkg"] is None:
Packit Service 76cb02
                        del info["pkg"]
Packit Service 76cb02
                        prefix = ""
Packit Service 76cb02
                    else:
Packit Service 76cb02
                        prefix = ("(%s)" % info["pkg"])
Packit Service 76cb02
                    prefix = prefix.ljust(m.start("text"))
Packit Service 76cb02
                    text = [info["text"]]
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            # Bad box messages
Packit Service 76cb02
Packit Service 76cb02
            m = self.re_badbox.match(line)
Packit Service 76cb02
            if m:
Packit Service 76cb02
                if boxes:
Packit Service 76cb02
                    mpos = { "file": pos[-1], "page": page }
Packit Service 76cb02
                    m = self.re_atline.search(line)
Packit Service 76cb02
                    if m:
Packit Service 76cb02
                        md = m.groupdict()
Packit Service 76cb02
                        for key in "line", "last":
Packit Service 76cb02
                            if md[key]: mpos[key] = md[key]
Packit Service 76cb02
                        line = line[:m.start()]
Packit Service 76cb02
                    d = {
Packit Service 76cb02
                        "kind": "warning",
Packit Service 76cb02
                        "text": line
Packit Service 76cb02
                        }
Packit Service 76cb02
                    d.update( mpos )
Packit Service 76cb02
                    yield d
Packit Service 76cb02
                skipping = 1
Packit Service 76cb02
                continue
Packit Service 76cb02
Packit Service 76cb02
            # Missing characters in a font
Packit Service 76cb02
            if misschars:
Packit Service 76cb02
                m = self.re_misschar.match(line)
Packit Service 76cb02
                if m:
Packit Service 76cb02
                    d = {
Packit Service 76cb02
                        "kind": "warning",
Packit Service 76cb02
                        "uchar": m.group("uchar"),
Packit Service 76cb02
                        "font": m.group("font"),
Packit Service 76cb02
                        }
Packit Service 76cb02
                    yield d
Packit Service 76cb02
                    continue
Packit Service 76cb02
Packit Service 76cb02
            # If there is no message, track source names and page numbers.
Packit Service 76cb02
Packit Service 76cb02
            last_file = self.update_file(line, pos, last_file)
Packit Service 76cb02
            page = self.update_page(line, page)
Packit Service 76cb02
Packit Service 76cb02
    def get_errors (self):
Packit Service 76cb02
        return self.parse(errors=1)
Packit Service 76cb02
    def get_boxes (self):
Packit Service 76cb02
        return self.parse(boxes=1)
Packit Service 76cb02
    def get_references (self):
Packit Service 76cb02
        return self.parse(refs=1)
Packit Service 76cb02
    def get_warnings (self):
Packit Service 76cb02
        return self.parse(warnings=1)
Packit Service 76cb02
    def get_misschars (self):
Packit Service 76cb02
        return self.parse(misschars=1)
Packit Service 76cb02
Packit Service 76cb02
    def update_file (self, line, stack, last):
Packit Service 76cb02
        """
Packit Service 76cb02
        Parse the given line of log file for file openings and closings and
Packit Service 76cb02
        update the list `stack'. Newly opened files are at the end, therefore
Packit Service 76cb02
        stack[1] is the main source while stack[-1] is the current one. The
Packit Service 76cb02
        first element, stack[0], contains the value None for errors that may
Packit Service 76cb02
        happen outside the source. Return the last file from which text was
Packit Service 76cb02
        read (the new stack top, or the one before the last closing
Packit Service 76cb02
        parenthesis).
Packit Service 76cb02
        """
Packit Service 76cb02
        m = self.re_file.search(line)
Packit Service 76cb02
        while m:
Packit Service 76cb02
            if line[m.start()] == '(':
Packit Service 76cb02
                last = m.group("file")
Packit Service 76cb02
                stack.append(last)
Packit Service 76cb02
            else:
Packit Service 76cb02
                last = stack[-1]
Packit Service 76cb02
                del stack[-1]
Packit Service 76cb02
            line = line[m.end():]
Packit Service 76cb02
            m = self.re_file.search(line)
Packit Service 76cb02
        return last
Packit Service 76cb02
Packit Service 76cb02
    def update_page (self, line, before):
Packit Service 76cb02
        """
Packit Service 76cb02
        Parse the given line and return the number of the page that is being
Packit Service 76cb02
        built after that line, assuming the current page before the line was
Packit Service 76cb02
        `before'.
Packit Service 76cb02
        """
Packit Service 76cb02
        ms = self.re_page.findall(line)
Packit Service 76cb02
        if ms == []:
Packit Service 76cb02
            return before
Packit Service 76cb02
        return int(ms[-1]) + 1
Packit Service 76cb02