|
Packit |
5164a5 |
# This file is part of Rubber and thus covered by the GPL
|
|
Packit |
5164a5 |
# (c) Emmanuel Beffara, 2002--2006
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
LaTeX document building system for Rubber.
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
This module defines the class that parses the LaTeX log files.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
from __future__ import generators
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
import re
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
from msg import _, msg
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
class LogParser:
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
This class performs all the extraction of information from the log file.
|
|
Packit |
5164a5 |
For efficiency, the instances contain the whole file as a list of strings
|
|
Packit |
5164a5 |
so that it can be read several times with no disk access.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
re_loghead = re.compile("This is [0-9a-zA-Z-]*(TeX|Omega)")
|
|
Packit |
5164a5 |
re_rerun = re.compile(
|
|
Packit |
5164a5 |
"(LaTeX|Package longtable|Package bibtopic) Warning:.*Rerun")
|
|
Packit |
5164a5 |
re_rerun2 = re.compile("\(Changebar\).*Rerun")
|
|
Packit |
5164a5 |
re_file = re.compile("(\\((?P<file>[^ \n\t(){}]*)|\\))")
|
|
Packit |
5164a5 |
re_badbox = re.compile(r"(Ov|Und)erfull \\[hv]box ")
|
|
Packit |
5164a5 |
re_line = re.compile(r"(l\.(?P<line>[0-9]+)( (?P.*))?$|<\*>)")
|
|
Packit |
5164a5 |
re_cseq = re.compile(r".*(?P<seq>\\[^ ]*) ?$")
|
|
Packit |
5164a5 |
re_page = re.compile("\[(?P<num>[0-9]+)\]")
|
|
Packit |
5164a5 |
re_atline = re.compile(
|
|
Packit |
5164a5 |
"( detected| in paragraph)? at lines? (?P<line>[0-9]*)(--(?P<last>[0-9]*))?")
|
|
Packit |
5164a5 |
re_reference = re.compile("LaTeX Warning: Reference `(?P<ref>.*)' \
|
|
Packit |
5164a5 |
on page (?P<page>[0-9]*) undefined on input line (?P<line>[0-9]*)\\.$")
|
|
Packit |
5164a5 |
re_label = re.compile("LaTeX Warning: (?P<text>Label .*)$")
|
|
Packit |
5164a5 |
re_warning = re.compile(
|
|
Packit |
5164a5 |
"(LaTeX|Package)( (?P<pkg>.*))? Warning: (?P<text>.*)$")
|
|
Packit |
5164a5 |
re_online = re.compile("(; reported)? on input line (?P<line>[0-9]*)")
|
|
Packit |
5164a5 |
re_ignored = re.compile("; all text was ignored after line (?P<line>[0-9]*).$")
|
|
Packit |
5164a5 |
re_misschar = re.compile("Missing character: There is no (?P<uchar>[^ ]*) in font (?P<font>.*)!")
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
#-- Initialization {{{2
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def __init__ (self):
|
|
Packit |
5164a5 |
self.lines = []
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def read (self, name):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Read the specified log file, checking that it was produced by the
|
|
Packit |
5164a5 |
right compiler. Returns true if the log file is invalid or does not
|
|
Packit |
5164a5 |
exist.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
self.lines = []
|
|
Packit |
5164a5 |
try:
|
|
Packit |
5164a5 |
file = open(name)
|
|
Packit |
5164a5 |
except IOError:
|
|
Packit |
5164a5 |
return 2
|
|
Packit |
5164a5 |
line = file.readline()
|
|
Packit |
5164a5 |
if not line:
|
|
Packit |
5164a5 |
file.close()
|
|
Packit |
5164a5 |
return 1
|
|
Packit |
5164a5 |
if not self.re_loghead.match(line):
|
|
Packit |
5164a5 |
file.close()
|
|
Packit |
5164a5 |
return 1
|
|
Packit |
5164a5 |
self.lines = file.readlines()
|
|
Packit |
5164a5 |
file.close()
|
|
Packit |
5164a5 |
return 0
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
#-- Process information {{{2
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def errors (self):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Returns true if there was an error during the compilation.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
skipping = 0
|
|
Packit |
5164a5 |
for line in self.lines:
|
|
Packit |
5164a5 |
if line.strip() == "":
|
|
Packit |
5164a5 |
skipping = 0
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
if skipping:
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
m = self.re_badbox.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
skipping = 1
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
if line[0] == "!":
|
|
Packit |
5164a5 |
# We check for the substring "pdfTeX warning" because pdfTeX
|
|
Packit |
5164a5 |
# sometimes issues warnings (like undefined references) in the
|
|
Packit |
5164a5 |
# form of errors...
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if line.find("pdfTeX warning") == -1:
|
|
Packit |
5164a5 |
return 1
|
|
Packit |
5164a5 |
return 0
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def run_needed (self):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Returns true if LaTeX indicated that another compilation is needed.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
for line in self.lines:
|
|
Packit |
5164a5 |
if self.re_rerun.match(line):
|
|
Packit |
5164a5 |
return 1
|
|
Packit |
5164a5 |
if self.re_rerun2.match(line):
|
|
Packit |
5164a5 |
return 1
|
|
Packit |
5164a5 |
return 0
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
#-- Information extraction {{{2
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def continued (self, line):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Check if a line in the log is continued on the next line. This is
|
|
Packit |
5164a5 |
needed because TeX breaks messages at 79 characters per line. We make
|
|
Packit |
5164a5 |
this into a method because the test is slightly different in Metapost.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
return len(line) == 79
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def parse (self, errors=0, boxes=0, refs=0, warnings=0, misschars=0):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Parse the log file for relevant information. The named arguments are
|
|
Packit |
5164a5 |
booleans that indicate which information should be extracted:
|
|
Packit |
5164a5 |
- errors: all errors
|
|
Packit |
5164a5 |
- boxes: bad boxes
|
|
Packit |
5164a5 |
- refs: warnings about references
|
|
Packit |
5164a5 |
- warnings: all other warnings
|
|
Packit |
5164a5 |
The function returns a generator. Each generated item is a dictionary
|
|
Packit |
5164a5 |
that contains (some of) the following entries:
|
|
Packit |
5164a5 |
- kind: the kind of information ("error", "box", "ref", "warning")
|
|
Packit |
5164a5 |
- text: the text of the error or warning
|
|
Packit |
5164a5 |
- code: the piece of code that caused an error
|
|
Packit |
5164a5 |
- file, line, last, pkg: as used by Message.format_pos.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
if not self.lines:
|
|
Packit |
5164a5 |
return
|
|
Packit |
5164a5 |
last_file = None
|
|
Packit |
5164a5 |
pos = [last_file]
|
|
Packit |
5164a5 |
page = 1
|
|
Packit |
5164a5 |
parsing = 0 # 1 if we are parsing an error's text
|
|
Packit |
5164a5 |
skipping = 0 # 1 if we are skipping text until an empty line
|
|
Packit |
5164a5 |
something = 0 # 1 if some error was found
|
|
Packit |
5164a5 |
prefix = None # the prefix for warning messages from packages
|
|
Packit |
5164a5 |
accu = "" # accumulated text from the previous line
|
|
Packit |
5164a5 |
for line in self.lines:
|
|
Packit |
5164a5 |
line = line[:-1] # remove the line feed
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# TeX breaks messages at 79 characters, just to make parsing
|
|
Packit |
5164a5 |
# trickier...
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if self.continued(line):
|
|
Packit |
5164a5 |
accu += line
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
line = accu + line
|
|
Packit |
5164a5 |
accu = ""
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Text that should be skipped (from bad box messages)
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if prefix is None and line == "":
|
|
Packit |
5164a5 |
skipping = 0
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if skipping:
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Errors (including aborted compilation)
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if parsing:
|
|
Packit |
5164a5 |
if error == "Undefined control sequence.":
|
|
Packit |
5164a5 |
# This is a special case in order to report which control
|
|
Packit |
5164a5 |
# sequence is undefined.
|
|
Packit |
5164a5 |
m = self.re_cseq.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
error = "Undefined control sequence %s." % m.group("seq")
|
|
Packit |
5164a5 |
m = self.re_line.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
parsing = 0
|
|
Packit |
5164a5 |
skipping = 1
|
|
Packit |
5164a5 |
pdfTeX = error.find("pdfTeX warning") != -1
|
|
Packit |
5164a5 |
if (pdfTeX and warnings) or (errors and not pdfTeX):
|
|
Packit |
5164a5 |
if pdfTeX:
|
|
Packit |
5164a5 |
d = {
|
|
Packit |
5164a5 |
"kind": "warning",
|
|
Packit |
5164a5 |
"pkg": "pdfTeX",
|
|
Packit |
5164a5 |
"text": error[error.find(":")+2:]
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
else:
|
|
Packit |
5164a5 |
d = {
|
|
Packit |
5164a5 |
"kind": "error",
|
|
Packit |
5164a5 |
"text": error
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
d.update( m.groupdict() )
|
|
Packit |
5164a5 |
m = self.re_ignored.search(error)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
d["file"] = last_file
|
|
Packit |
5164a5 |
if d.has_key("code"):
|
|
Packit |
5164a5 |
del d["code"]
|
|
Packit |
5164a5 |
d.update( m.groupdict() )
|
|
Packit |
5164a5 |
elif pos[-1] is None:
|
|
Packit |
5164a5 |
d["file"] = last_file
|
|
Packit |
5164a5 |
else:
|
|
Packit |
5164a5 |
d["file"] = pos[-1]
|
|
Packit |
5164a5 |
yield d
|
|
Packit |
5164a5 |
elif line[0] == "!":
|
|
Packit |
5164a5 |
error = line[2:]
|
|
Packit |
5164a5 |
elif line[0:3] == "***":
|
|
Packit |
5164a5 |
parsing = 0
|
|
Packit |
5164a5 |
skipping = 1
|
|
Packit |
5164a5 |
if errors:
|
|
Packit |
5164a5 |
yield {
|
|
Packit |
5164a5 |
"kind": "abort",
|
|
Packit |
5164a5 |
"text": error,
|
|
Packit |
5164a5 |
"why" : line[4:],
|
|
Packit |
5164a5 |
"file": last_file
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
elif line[0:15] == "Type X to quit ":
|
|
Packit |
5164a5 |
parsing = 0
|
|
Packit |
5164a5 |
skipping = 0
|
|
Packit |
5164a5 |
if errors:
|
|
Packit |
5164a5 |
yield {
|
|
Packit |
5164a5 |
"kind": "error",
|
|
Packit |
5164a5 |
"text": error,
|
|
Packit |
5164a5 |
"file": pos[-1]
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if len(line) > 0 and line[0] == "!":
|
|
Packit |
5164a5 |
error = line[2:]
|
|
Packit |
5164a5 |
parsing = 1
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if line == "Runaway argument?":
|
|
Packit |
5164a5 |
error = line
|
|
Packit |
5164a5 |
parsing = 1
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Long warnings
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if prefix is not None:
|
|
Packit |
5164a5 |
if line[:len(prefix)] == prefix:
|
|
Packit |
5164a5 |
text.append(line[len(prefix):].strip())
|
|
Packit |
5164a5 |
else:
|
|
Packit |
5164a5 |
text = " ".join(text)
|
|
Packit |
5164a5 |
m = self.re_online.search(text)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
info["line"] = m.group("line")
|
|
Packit |
5164a5 |
text = text[:m.start()] + text[m.end():]
|
|
Packit |
5164a5 |
if warnings:
|
|
Packit |
5164a5 |
info["text"] = text
|
|
Packit |
5164a5 |
d = { "kind": "warning" }
|
|
Packit |
5164a5 |
d.update( info )
|
|
Packit |
5164a5 |
yield d
|
|
Packit |
5164a5 |
prefix = None
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Undefined references
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
m = self.re_reference.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
if refs:
|
|
Packit |
5164a5 |
d = {
|
|
Packit |
5164a5 |
"kind": "warning",
|
|
Packit |
5164a5 |
"text": _("Reference `%s' undefined.") % m.group("ref"),
|
|
Packit |
5164a5 |
"file": pos[-1]
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
d.update( m.groupdict() )
|
|
Packit |
5164a5 |
yield d
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
m = self.re_label.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
if refs:
|
|
Packit |
5164a5 |
d = {
|
|
Packit |
5164a5 |
"kind": "warning",
|
|
Packit |
5164a5 |
"file": pos[-1]
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
d.update( m.groupdict() )
|
|
Packit |
5164a5 |
yield d
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Other warnings
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
if line.find("Warning") != -1:
|
|
Packit |
5164a5 |
m = self.re_warning.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
info = m.groupdict()
|
|
Packit |
5164a5 |
info["file"] = pos[-1]
|
|
Packit |
5164a5 |
info["page"] = page
|
|
Packit |
5164a5 |
if info["pkg"] is None:
|
|
Packit |
5164a5 |
del info["pkg"]
|
|
Packit |
5164a5 |
prefix = ""
|
|
Packit |
5164a5 |
else:
|
|
Packit |
5164a5 |
prefix = ("(%s)" % info["pkg"])
|
|
Packit |
5164a5 |
prefix = prefix.ljust(m.start("text"))
|
|
Packit |
5164a5 |
text = [info["text"]]
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Bad box messages
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
m = self.re_badbox.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
if boxes:
|
|
Packit |
5164a5 |
mpos = { "file": pos[-1], "page": page }
|
|
Packit |
5164a5 |
m = self.re_atline.search(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
md = m.groupdict()
|
|
Packit |
5164a5 |
for key in "line", "last":
|
|
Packit |
5164a5 |
if md[key]: mpos[key] = md[key]
|
|
Packit |
5164a5 |
line = line[:m.start()]
|
|
Packit |
5164a5 |
d = {
|
|
Packit |
5164a5 |
"kind": "warning",
|
|
Packit |
5164a5 |
"text": line
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
d.update( mpos )
|
|
Packit |
5164a5 |
yield d
|
|
Packit |
5164a5 |
skipping = 1
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# Missing characters in a font
|
|
Packit |
5164a5 |
if misschars:
|
|
Packit |
5164a5 |
m = self.re_misschar.match(line)
|
|
Packit |
5164a5 |
if m:
|
|
Packit |
5164a5 |
d = {
|
|
Packit |
5164a5 |
"kind": "warning",
|
|
Packit |
5164a5 |
"uchar": m.group("uchar"),
|
|
Packit |
5164a5 |
"font": m.group("font"),
|
|
Packit |
5164a5 |
}
|
|
Packit |
5164a5 |
yield d
|
|
Packit |
5164a5 |
continue
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
# If there is no message, track source names and page numbers.
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
last_file = self.update_file(line, pos, last_file)
|
|
Packit |
5164a5 |
page = self.update_page(line, page)
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def get_errors (self):
|
|
Packit |
5164a5 |
return self.parse(errors=1)
|
|
Packit |
5164a5 |
def get_boxes (self):
|
|
Packit |
5164a5 |
return self.parse(boxes=1)
|
|
Packit |
5164a5 |
def get_references (self):
|
|
Packit |
5164a5 |
return self.parse(refs=1)
|
|
Packit |
5164a5 |
def get_warnings (self):
|
|
Packit |
5164a5 |
return self.parse(warnings=1)
|
|
Packit |
5164a5 |
def get_misschars (self):
|
|
Packit |
5164a5 |
return self.parse(misschars=1)
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def update_file (self, line, stack, last):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Parse the given line of log file for file openings and closings and
|
|
Packit |
5164a5 |
update the list `stack'. Newly opened files are at the end, therefore
|
|
Packit |
5164a5 |
stack[1] is the main source while stack[-1] is the current one. The
|
|
Packit |
5164a5 |
first element, stack[0], contains the value None for errors that may
|
|
Packit |
5164a5 |
happen outside the source. Return the last file from which text was
|
|
Packit |
5164a5 |
read (the new stack top, or the one before the last closing
|
|
Packit |
5164a5 |
parenthesis).
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
m = self.re_file.search(line)
|
|
Packit |
5164a5 |
while m:
|
|
Packit |
5164a5 |
if line[m.start()] == '(':
|
|
Packit |
5164a5 |
last = m.group("file")
|
|
Packit |
5164a5 |
stack.append(last)
|
|
Packit |
5164a5 |
else:
|
|
Packit |
5164a5 |
last = stack[-1]
|
|
Packit |
5164a5 |
del stack[-1]
|
|
Packit |
5164a5 |
line = line[m.end():]
|
|
Packit |
5164a5 |
m = self.re_file.search(line)
|
|
Packit |
5164a5 |
return last
|
|
Packit |
5164a5 |
|
|
Packit |
5164a5 |
def update_page (self, line, before):
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
Parse the given line and return the number of the page that is being
|
|
Packit |
5164a5 |
built after that line, assuming the current page before the line was
|
|
Packit |
5164a5 |
`before'.
|
|
Packit |
5164a5 |
"""
|
|
Packit |
5164a5 |
ms = self.re_page.findall(line)
|
|
Packit |
5164a5 |
if ms == []:
|
|
Packit |
5164a5 |
return before
|
|
Packit |
5164a5 |
return int(ms[-1]) + 1
|
|
Packit |
5164a5 |
|