# This file is part of Rubber and thus covered by the GPL
# (c) Emmanuel Beffara, 2002--2006
"""
LaTeX document building system for Rubber.
This module defines the class that parses the LaTeX log files.
"""
from __future__ import generators
import re
from io import open
from dbtexmf.dblatex.grubber.msg import _, msg
class LogParser:
"""
This class performs all the extraction of information from the log file.
For efficiency, the instances contain the whole file as a list of strings
so that it can be read several times with no disk access.
"""
re_loghead = re.compile("This is [0-9a-zA-Z-]*(TeX|Omega)")
re_rerun = re.compile(
"(LaTeX|Package longtable|Package bibtopic) Warning:.*Rerun")
re_rerun2 = re.compile("\(Changebar\).*Rerun")
re_file = re.compile("(\\((?P<file>[^ \n\t(){}]*)|\\))")
re_badbox = re.compile(r"(Ov|Und)erfull \\[hv]box ")
re_line = re.compile(r"(l\.(?P<line>[0-9]+)( (?P<code>.*))?$|<\*>)")
re_cseq = re.compile(r".*(?P<seq>\\[^ ]*) ?$")
re_page = re.compile("\[(?P<num>[0-9]+)\]")
re_atline = re.compile(
"( detected| in paragraph)? at lines? (?P<line>[0-9]*)(--(?P<last>[0-9]*))?")
re_reference = re.compile("LaTeX Warning: Reference `(?P<ref>.*)' \
on page (?P<page>[0-9]*) undefined on input line (?P<line>[0-9]*)\\.$")
re_label = re.compile("LaTeX Warning: (?P<text>Label .*)$")
re_warning = re.compile(
"(LaTeX|Package)( (?P<pkg>.*))? Warning: (?P<text>.*)$")
re_online = re.compile("(; reported)? on input line (?P<line>[0-9]*)")
re_ignored = re.compile("; all text was ignored after line (?P<line>[0-9]*).$")
re_misschar = re.compile("Missing character: There is no (?P<uchar>[^ ]*) in font (?P<font>.*)!")
#-- Initialization {{{2
def __init__ (self):
self.lines = []
def read (self, name):
"""
Read the specified log file, checking that it was produced by the
right compiler. Returns true if the log file is invalid or does not
exist.
"""
self.lines = []
try:
file = open(name, "rt")
except IOError:
return 2
line = file.readline()
if not line:
file.close()
return 1
if not self.re_loghead.match(line):
file.close()
return 1
self.lines = file.readlines()
file.close()
return 0
#-- Process information {{{2
def errors (self):
"""
Returns true if there was an error during the compilation.
"""
skipping = 0
for line in self.lines:
if line.strip() == "":
skipping = 0
continue
if skipping:
continue
m = self.re_badbox.match(line)
if m:
skipping = 1
continue
if line[0] == "!":
# We check for the substring "pdfTeX warning" because pdfTeX
# sometimes issues warnings (like undefined references) in the
# form of errors...
if line.find("pdfTeX warning") == -1:
return 1
return 0
def run_needed (self):
"""
Returns true if LaTeX indicated that another compilation is needed.
"""
for line in self.lines:
if self.re_rerun.match(line):
return 1
if self.re_rerun2.match(line):
return 1
return 0
#-- Information extraction {{{2
def continued (self, line):
"""
Check if a line in the log is continued on the next line. This is
needed because TeX breaks messages at 79 characters per line. We make
this into a method because the test is slightly different in Metapost.
"""
return len(line) == 79
def parse (self, errors=0, boxes=0, refs=0, warnings=0, misschars=0):
"""
Parse the log file for relevant information. The named arguments are
booleans that indicate which information should be extracted:
- errors: all errors
- boxes: bad boxes
- refs: warnings about references
- warnings: all other warnings
The function returns a generator. Each generated item is a dictionary
that contains (some of) the following entries:
- kind: the kind of information ("error", "box", "ref", "warning")
- text: the text of the error or warning
- code: the piece of code that caused an error
- file, line, last, pkg: as used by Message.format_pos.
"""
if not self.lines:
return
last_file = None
pos = [last_file]
page = 1
parsing = 0 # 1 if we are parsing an error's text
skipping = 0 # 1 if we are skipping text until an empty line
something = 0 # 1 if some error was found
prefix = None # the prefix for warning messages from packages
accu = "" # accumulated text from the previous line
for line in self.lines:
line = line[:-1] # remove the line feed
# TeX breaks messages at 79 characters, just to make parsing
# trickier...
if self.continued(line):
accu += line
continue
line = accu + line
accu = ""
# Text that should be skipped (from bad box messages)
if prefix is None and line == "":
skipping = 0
continue
if skipping:
continue
# Errors (including aborted compilation)
if parsing:
if error == "Undefined control sequence.":
# This is a special case in order to report which control
# sequence is undefined.
m = self.re_cseq.match(line)
if m:
error = "Undefined control sequence %s." % m.group("seq")
m = self.re_line.match(line)
if m:
parsing = 0
skipping = 1
pdfTeX = error.find("pdfTeX warning") != -1
if (pdfTeX and warnings) or (errors and not pdfTeX):
if pdfTeX:
d = {
"kind": "warning",
"pkg": "pdfTeX",
"text": error[error.find(":")+2:]
}
else:
d = {
"kind": "error",
"text": error
}
d.update( m.groupdict() )
m = self.re_ignored.search(error)
if m:
d["file"] = last_file
if "code" in d:
del d["code"]
d.update( m.groupdict() )
elif pos[-1] is None:
d["file"] = last_file
else:
d["file"] = pos[-1]
yield d
elif line[0] == "!":
error = line[2:]
elif line[0:3] == "***":
parsing = 0
skipping = 1
if errors:
yield {
"kind": "abort",
"text": error,
"why" : line[4:],
"file": last_file
}
elif line[0:15] == "Type X to quit ":
parsing = 0
skipping = 0
if errors:
yield {
"kind": "error",
"text": error,
"file": pos[-1]
}
continue
if len(line) > 0 and line[0] == "!":
error = line[2:]
parsing = 1
continue
if line == "Runaway argument?":
error = line
parsing = 1
continue
# Long warnings
if prefix is not None:
if line[:len(prefix)] == prefix:
text.append(line[len(prefix):].strip())
else:
text = " ".join(text)
m = self.re_online.search(text)
if m:
info["line"] = m.group("line")
text = text[:m.start()] + text[m.end():]
if warnings:
info["text"] = text
d = { "kind": "warning" }
d.update( info )
yield d
prefix = None
continue
# Undefined references
m = self.re_reference.match(line)
if m:
if refs:
d = {
"kind": "warning",
"text": _("Reference `%s' undefined.") % m.group("ref"),
"file": pos[-1]
}
d.update( m.groupdict() )
yield d
continue
m = self.re_label.match(line)
if m:
if refs:
d = {
"kind": "warning",
"file": pos[-1]
}
d.update( m.groupdict() )
yield d
continue
# Other warnings
if line.find("Warning") != -1:
m = self.re_warning.match(line)
if m:
info = m.groupdict()
info["file"] = pos[-1]
info["page"] = page
if info["pkg"] is None:
del info["pkg"]
prefix = ""
else:
prefix = ("(%s)" % info["pkg"])
prefix = prefix.ljust(m.start("text"))
text = [info["text"]]
continue
# Bad box messages
m = self.re_badbox.match(line)
if m:
if boxes:
mpos = { "file": pos[-1], "page": page }
m = self.re_atline.search(line)
if m:
md = m.groupdict()
for key in "line", "last":
if md[key]: mpos[key] = md[key]
line = line[:m.start()]
d = {
"kind": "warning",
"text": line
}
d.update( mpos )
yield d
skipping = 1
continue
# Missing characters in a font
if misschars:
m = self.re_misschar.match(line)
if m:
d = {
"kind": "warning",
"uchar": m.group("uchar"),
"font": m.group("font"),
}
yield d
continue
# If there is no message, track source names and page numbers.
last_file = self.update_file(line, pos, last_file)
page = self.update_page(line, page)
def get_errors (self):
return self.parse(errors=1)
def get_boxes (self):
return self.parse(boxes=1)
def get_references (self):
return self.parse(refs=1)
def get_warnings (self):
return self.parse(warnings=1)
def get_misschars (self):
return self.parse(misschars=1)
def update_file (self, line, stack, last):
"""
Parse the given line of log file for file openings and closings and
update the list `stack'. Newly opened files are at the end, therefore
stack[1] is the main source while stack[-1] is the current one. The
first element, stack[0], contains the value None for errors that may
happen outside the source. Return the last file from which text was
read (the new stack top, or the one before the last closing
parenthesis).
"""
m = self.re_file.search(line)
while m:
if line[m.start()] == '(':
last = m.group("file")
stack.append(last)
else:
last = stack[-1]
del stack[-1]
line = line[m.end():]
m = self.re_file.search(line)
return last
def update_page (self, line, before):
"""
Parse the given line and return the number of the page that is being
built after that line, assuming the current page before the line was
`before'.
"""
ms = self.re_page.findall(line)
if ms == []:
return before
return int(ms[-1]) + 1