Blame lib/dbtexmf/dblatex/grubber/bibtex.py

Packit 0f19cf
# This file is part of Rubber and thus covered by the GPL
Packit 0f19cf
# (c) Emmanuel Beffara, 2002--2006
Packit 0f19cf
"""
Packit 0f19cf
BibTeX support for Rubber
Packit 0f19cf
Packit 0f19cf
This module is a special one: it is triggered by the macros \\bibliography and
Packit 0f19cf
\\bibliographystyle and not as a package, so the main system knows about it.
Packit 0f19cf
The module provides the following commands:
Packit 0f19cf
Packit 0f19cf
  path <dir> = adds <dir> to the search path for databases
Packit 0f19cf
  stylepath <dir> = adds <dir> to the search path for styles
Packit 0f19cf
"""
Packit 0f19cf
Packit 0f19cf
# Stop python 2.2 from calling "yield" statements syntax errors.
Packit 0f19cf
from __future__ import generators
Packit 0f19cf
Packit 0f19cf
import os, sys
Packit 0f19cf
from os.path import *
Packit 0f19cf
import re, string
Packit 0f19cf
import subprocess
Packit Service f3de8e
from io import open
Packit 0f19cf
Packit 0f19cf
#from grubber import _
Packit 0f19cf
#from grubber import *
Packit Service f3de8e
from dbtexmf.dblatex.grubber.msg import _, msg
Packit Service f3de8e
from dbtexmf.dblatex.grubber.plugins import TexModule
Packit 0f19cf
Packit 0f19cf
re_bibdata = re.compile(r"\\bibdata{(?P<data>.*)}")
Packit 0f19cf
re_citation = re.compile(r"\\citation{(?P.*)}")
Packit 0f19cf
re_undef = re.compile("LaTeX Warning: Citation `(?P.*)' .*undefined.*")
Packit 0f19cf
Packit 0f19cf
# The regular expression that identifies errors in BibTeX log files is heavily
Packit 0f19cf
# heuristic. The remark is that all error messages end with a text of the form
Packit 0f19cf
# "---line xxx of file yyy" or "---while reading file zzz". The actual error
Packit 0f19cf
# is either the text before the dashes or the text on the previous line.
Packit 0f19cf
Packit 0f19cf
re_error = re.compile(
Packit 0f19cf
    "---(line (?P<line>[0-9]+) of|while reading) file (?P<file>.*)")
Packit 0f19cf
Packit 0f19cf
class BibTex(TexModule):
Packit 0f19cf
    """
Packit 0f19cf
    This class is the module that handles BibTeX in Rubber. It provides the
Packit 0f19cf
    funcionality required when compiling documents as well as material to
Packit 0f19cf
    parse blg files for diagnostics.
Packit 0f19cf
    """
Packit 0f19cf
    def __init__ (self, doc, dict, base=None):
Packit 0f19cf
        """
Packit 0f19cf
        Initialize the state of the module and register appropriate functions
Packit 0f19cf
        in the main process. The extra arugment 'base' can be used to specify
Packit 0f19cf
        the base name of the aux file, it defaults to the document name.
Packit 0f19cf
        """
Packit 0f19cf
        self.doc = doc
Packit 0f19cf
        self.env = doc.env
Packit 0f19cf
Packit 0f19cf
        if not(base):
Packit 0f19cf
            self.base = doc.src_base
Packit 0f19cf
        else:
Packit 0f19cf
            self.base = base
Packit 0f19cf
Packit 0f19cf
        self.bblfile = self.base + ".bbl"
Packit 0f19cf
        self.blgfile = self.base + ".blg"
Packit 0f19cf
        self.auxfile = self.base + ".aux"
Packit 0f19cf
Packit 0f19cf
#        cwd = self.env.vars["cwd"]
Packit 0f19cf
#        cwd = ""
Packit 0f19cf
#        self.bib_path = [cwd]
Packit 0f19cf
#        if doc.src_path != cwd:
Packit 0f19cf
#            self.bib_path.append(doc.src_path)
Packit 0f19cf
#        self.bst_path = [cwd]
Packit 0f19cf
        self.bib_path = []
Packit 0f19cf
        self.bst_path = []
Packit 0f19cf
Packit 0f19cf
        self.undef_cites = None
Packit 0f19cf
        self.used_cites = None
Packit 0f19cf
        self.style = None
Packit 0f19cf
        self.set_style("plain")
Packit 0f19cf
        self.db = {}
Packit 0f19cf
        self.sorted = 1
Packit 0f19cf
        self.run_needed = 0
Packit 0f19cf
Packit 0f19cf
    #
Packit 0f19cf
    # The following method are used to specify the various datafiles that
Packit 0f19cf
    # BibTeX uses.
Packit 0f19cf
    #
Packit 0f19cf
Packit 0f19cf
    def do_path (self, path):
Packit 0f19cf
        self.bib_path.append(self.doc.abspath(path))
Packit 0f19cf
Packit 0f19cf
    def do_stylepath (self, path):
Packit 0f19cf
        self.bst_path.append(self.doc.abspath(path))
Packit 0f19cf
Packit 0f19cf
    def do_sorted (self, mode):
Packit 0f19cf
        self.sorted = mode in ("true", "yes", "1")
Packit 0f19cf
Packit 0f19cf
    def add_db (self, name):
Packit 0f19cf
        """
Packit 0f19cf
        Register a bibliography database file.
Packit 0f19cf
        """
Packit 0f19cf
        for dir in self.bib_path:
Packit 0f19cf
            bib = join(dir, name + ".bib")
Packit 0f19cf
            if exists(bib):
Packit 0f19cf
                self.db[name] = bib
Packit 0f19cf
                self.doc.sources[bib] = DependLeaf(self.env, bib)
Packit 0f19cf
                self.doc.not_included.append(bib)
Packit 0f19cf
                return
Packit 0f19cf
Packit 0f19cf
    def set_style (self, style):
Packit 0f19cf
        """
Packit 0f19cf
        Define the bibliography style used. This method is called when
Packit 0f19cf
        \\bibliographystyle is found. If the style file is found in the
Packit 0f19cf
        current directory, it is considered a dependency.
Packit 0f19cf
        """
Packit 0f19cf
        if self.style:
Packit 0f19cf
            old_bst = self.style + ".bst"
Packit Service f3de8e
            if exists(old_bst) and old_bst in self.doc.sources:
Packit 0f19cf
                del self.doc.sources[old_bst]
Packit 0f19cf
Packit 0f19cf
        self.style = style
Packit 0f19cf
        for dir in self.bst_path:
Packit 0f19cf
            new_bst = join(dir, style + ".bst")
Packit 0f19cf
            if exists(new_bst):
Packit 0f19cf
                self.bst_file = new_bst
Packit 0f19cf
                self.doc.sources[new_bst] = DependLeaf(self.env, new_bst)
Packit 0f19cf
                return
Packit 0f19cf
        self.bst_file = None
Packit 0f19cf
Packit 0f19cf
    #
Packit 0f19cf
    # The following methods are responsible of detecting when running BibTeX
Packit 0f19cf
    # is needed and actually running it.
Packit 0f19cf
    #
Packit 0f19cf
Packit 0f19cf
    def pre_compile (self):
Packit 0f19cf
        """
Packit 0f19cf
        Run BibTeX if needed before the first compilation. This function also
Packit 0f19cf
        checks if BibTeX has been run by someone else, and in this case it
Packit 0f19cf
        tells the system that it should recompile the document.
Packit 0f19cf
        """
Packit 0f19cf
        if exists(self.doc.auxfile):
Packit 0f19cf
            self.used_cites, self.prev_dbs = self.parse_aux()
Packit 0f19cf
        else:
Packit 0f19cf
            self.prev_dbs = None
Packit 0f19cf
        if self.doc.log.lines:
Packit 0f19cf
            self.undef_cites = self.list_undefs()
Packit 0f19cf
Packit 0f19cf
        self.run_needed = self.first_run_needed()
Packit 0f19cf
        if self.doc.must_compile:
Packit 0f19cf
            # If a LaTeX compilation is going to happen, it is not necessary
Packit 0f19cf
            # to bother with BibTeX yet.
Packit 0f19cf
            return 0
Packit 0f19cf
        if self.run_needed:
Packit 0f19cf
            return self.run()
Packit 0f19cf
Packit 0f19cf
        if (exists(self.bblfile) and
Packit 0f19cf
            getmtime(self.bblfile) > getmtime(self.doc.logfile)):
Packit 0f19cf
            self.doc.must_compile = 1
Packit 0f19cf
        return 0
Packit 0f19cf
Packit 0f19cf
    def first_run_needed (self):
Packit 0f19cf
        """
Packit 0f19cf
        The condition is only on the database files' modification dates, but
Packit 0f19cf
        it would be more clever to check if the results have changed.
Packit 0f19cf
        BibTeXing is also needed when the last run of BibTeX failed, and in
Packit 0f19cf
        the very particular case when the style has changed since last
Packit 0f19cf
        compilation.
Packit 0f19cf
        """
Packit 0f19cf
        if not exists(self.auxfile):
Packit 0f19cf
            return 0
Packit 0f19cf
        if not exists(self.blgfile):
Packit 0f19cf
            return 1
Packit 0f19cf
Packit 0f19cf
        dtime = getmtime(self.blgfile)
Packit 0f19cf
        for db in self.db.values():
Packit 0f19cf
            if getmtime(db) > dtime:
Packit 0f19cf
                msg.log(_("bibliography database %s was modified") % db,
Packit 0f19cf
                        pkg="bibtex")
Packit 0f19cf
                return 1
Packit 0f19cf
Packit Service f3de8e
        blg = open(self.blgfile, "rt", encoding="latin-1")
Packit 0f19cf
        for line in blg.readlines():
Packit 0f19cf
            if re_error.search(line):
Packit 0f19cf
                blg.close()
Packit 0f19cf
                msg.log(_("last BibTeXing failed"), pkg="bibtex")
Packit 0f19cf
                return 1
Packit 0f19cf
        blg.close()
Packit 0f19cf
Packit 0f19cf
        if self.style_changed():
Packit 0f19cf
            return 1
Packit 0f19cf
        if self.bst_file and getmtime(self.bst_file) > dtime:
Packit 0f19cf
            msg.log(_("the bibliography style file was modified"), pkg="bibtex")
Packit 0f19cf
            return 1
Packit 0f19cf
        return 0
Packit 0f19cf
Packit 0f19cf
    def parse_aux (self):
Packit 0f19cf
        """
Packit 0f19cf
        Parse the aux files and return the list of all defined citations and
Packit 0f19cf
        the list of databases used.
Packit 0f19cf
        """
Packit 0f19cf
        last = 0
Packit 0f19cf
        cites = {}
Packit 0f19cf
        dbs = []
Packit 0f19cf
        auxfiles = [self.doc.auxfile]
Packit 0f19cf
        if self.auxfile != self.doc.auxfile:
Packit 0f19cf
            auxfiles.append(self.auxfile)
Packit 0f19cf
Packit 0f19cf
        for auxname in auxfiles:
Packit Service f3de8e
            aux = open(auxname, "rt", encoding="latin-1")
Packit 0f19cf
            for line in aux:
Packit 0f19cf
                m = re_citation.match(line)
Packit 0f19cf
                if m:
Packit 0f19cf
                    cite = m.group("cite")
Packit Service f3de8e
                    if cite not in cites:
Packit 0f19cf
                        last = last + 1
Packit 0f19cf
                        cites[cite] = last
Packit 0f19cf
                    continue
Packit 0f19cf
                m = re_bibdata.match(line)
Packit 0f19cf
                if m:
Packit 0f19cf
                    dbs.extend(m.group("data").split(","))
Packit 0f19cf
            aux.close()
Packit 0f19cf
        dbs.sort()
Packit 0f19cf
Packit 0f19cf
        if self.sorted:
Packit 0f19cf
            list = cites.keys()
Packit 0f19cf
            list.sort()
Packit 0f19cf
            return list, dbs
Packit 0f19cf
        else:
Packit 0f19cf
            list = [(n,c) for (c,n) in cites.items()]
Packit 0f19cf
            list.sort()
Packit 0f19cf
            return [c for (n,c) in list], dbs
Packit 0f19cf
Packit 0f19cf
    def list_undefs (self):
Packit 0f19cf
        """
Packit 0f19cf
        Return the list of all undefined citations.
Packit 0f19cf
        """
Packit 0f19cf
        cites = {}
Packit 0f19cf
        for line in self.doc.log.lines:
Packit 0f19cf
            match = re_undef.match(line)
Packit 0f19cf
            if match:
Packit 0f19cf
                cites[match.group("cite")] = None
Packit 0f19cf
        list = cites.keys()
Packit 0f19cf
        list.sort()
Packit 0f19cf
        return list
Packit 0f19cf
Packit 0f19cf
    def post_compile (self):
Packit 0f19cf
        """
Packit 0f19cf
        This method runs BibTeX if needed to solve undefined citations. If it
Packit 0f19cf
        was run, then force a new LaTeX compilation.
Packit 0f19cf
        """
Packit 0f19cf
        if not self.bibtex_needed():
Packit 0f19cf
            msg.log(_("no BibTeXing needed"), pkg="bibtex")
Packit 0f19cf
            return 0
Packit 0f19cf
        return self.run()
Packit 0f19cf
Packit 0f19cf
    def run (self):
Packit 0f19cf
        """
Packit 0f19cf
        This method actually runs BibTeX with the appropriate environment
Packit 0f19cf
        variables set.
Packit 0f19cf
        """
Packit 0f19cf
        msg.progress(_("running BibTeX on %s") % self.base)
Packit 0f19cf
        doc = {}
Packit 0f19cf
        if len(self.bib_path) != 1:
Packit 0f19cf
            os.environ["BIBINPUTS"] = string.join(self.bib_path +
Packit 0f19cf
                [os.getenv("BIBINPUTS", "")], ":")
Packit 0f19cf
        if len(self.bst_path) != 1:
Packit 0f19cf
            os.environ["BSTINPUTS"] = string.join(self.bst_path +
Packit 0f19cf
                [os.getenv("BSTINPUTS", "")], ":")
Packit 0f19cf
        rc = subprocess.call(["bibtex", self.base], stdout=msg.stdout)
Packit 0f19cf
        if rc != 0:
Packit 0f19cf
            msg.error(_("There were errors making the bibliography."))
Packit 0f19cf
            return 1
Packit 0f19cf
        self.run_needed = 0
Packit 0f19cf
        self.doc.must_compile = 1
Packit 0f19cf
        return 0
Packit 0f19cf
Packit 0f19cf
    def bibtex_needed (self):
Packit 0f19cf
        """
Packit 0f19cf
        Return true if BibTeX must be run.
Packit 0f19cf
        """
Packit 0f19cf
        if self.run_needed:
Packit 0f19cf
            return 1
Packit 0f19cf
        msg.log(_("checking if BibTeX must be run..."), pkg="bibtex")
Packit 0f19cf
Packit 0f19cf
        newcites, dbs = self.parse_aux()
Packit 0f19cf
Packit 0f19cf
        # If there was a list of used citations, we check if it has
Packit 0f19cf
        # changed. If it has, we have to rerun.
Packit 0f19cf
Packit 0f19cf
        if self.prev_dbs is not None and self.prev_dbs != dbs:
Packit 0f19cf
            msg.log(_("the set of databases changed"), pkg="bibtex")
Packit 0f19cf
            self.prev_dbs = dbs
Packit 0f19cf
            self.used_cites = newcites
Packit 0f19cf
            self.undef_cites = self.list_undefs()
Packit 0f19cf
            return 1
Packit 0f19cf
        self.prev_dbs = dbs
Packit 0f19cf
Packit 0f19cf
        # If there was a list of used citations, we check if it has
Packit 0f19cf
        # changed. If it has, we have to rerun.
Packit 0f19cf
Packit 0f19cf
        if self.used_cites and newcites != self.used_cites:
Packit 0f19cf
            msg.log(_("the list of citations changed"), pkg="bibtex")
Packit 0f19cf
            self.used_cites = newcites
Packit 0f19cf
            self.undef_cites = self.list_undefs()
Packit 0f19cf
            return 1
Packit 0f19cf
        self.used_cites = newcites
Packit 0f19cf
Packit 0f19cf
        # If there was a list of undefined citations, we check if it has
Packit 0f19cf
        # changed. If it has and it is not empty, we have to rerun.
Packit 0f19cf
Packit 0f19cf
        if self.undef_cites:
Packit 0f19cf
            new = self.list_undefs()
Packit 0f19cf
            if new == []:
Packit 0f19cf
                msg.log(_("no more undefined citations"), pkg="bibtex")
Packit 0f19cf
                self.undef_cites = new
Packit 0f19cf
            else:
Packit 0f19cf
                for cite in new:
Packit 0f19cf
                    if cite in self.undef_cites:
Packit 0f19cf
                        continue
Packit 0f19cf
                    msg.log(_("there are new undefined citations"), pkg="bibtex")
Packit 0f19cf
                    self.undef_cites = new
Packit 0f19cf
                    return 1
Packit 0f19cf
                msg.log(_("there is no new undefined citation"), pkg="bibtex")
Packit 0f19cf
                self.undef_cites = new
Packit 0f19cf
                return 0
Packit 0f19cf
        else:
Packit 0f19cf
            self.undef_cites = self.list_undefs()
Packit 0f19cf
Packit 0f19cf
        # At this point we don't know if undefined citations changed. If
Packit 0f19cf
        # BibTeX has not been run before (i.e. there is no log file) we know
Packit 0f19cf
        # that it has to be run now.
Packit 0f19cf
Packit 0f19cf
        if not exists(self.blgfile):
Packit 0f19cf
            msg.log(_("no BibTeX log file"), pkg="bibtex")
Packit 0f19cf
            return 1
Packit 0f19cf
Packit 0f19cf
        # Here, BibTeX has been run before but we don't know if undefined
Packit 0f19cf
        # citations changed.
Packit 0f19cf
Packit 0f19cf
        if self.undef_cites == []:
Packit 0f19cf
            msg.log(_("no undefined citations"), pkg="bibtex")
Packit 0f19cf
            return 0
Packit 0f19cf
Packit 0f19cf
        if getmtime(self.blgfile) < getmtime(self.doc.logfile):
Packit 0f19cf
            msg.log(_("BibTeX's log is older than the main log"), pkg="bibtex")
Packit 0f19cf
            return 1
Packit 0f19cf
Packit 0f19cf
        return 0
Packit 0f19cf
Packit 0f19cf
    def clean (self):
Packit 0f19cf
        self.doc.remove_suffixes([".bbl", ".blg"])
Packit 0f19cf
Packit 0f19cf
    #
Packit 0f19cf
    # The following method extract information from BibTeX log files.
Packit 0f19cf
    #
Packit 0f19cf
Packit 0f19cf
    def style_changed (self):
Packit 0f19cf
        """
Packit 0f19cf
        Read the log file if it exists and check if the style used is the one
Packit 0f19cf
        specified in the source. This supposes that the style is mentioned on
Packit 0f19cf
        a line with the form 'The style file: foo.bst'.
Packit 0f19cf
        """
Packit 0f19cf
        if not exists(self.blgfile):
Packit 0f19cf
            return 0
Packit Service f3de8e
        log = open(self.blgfile, "rt", encoding="latin-1")
Packit 0f19cf
        line = log.readline()
Packit 0f19cf
        while line != "":
Packit 0f19cf
            if line.startswith("The style file: "):
Packit 0f19cf
                if line.rstrip()[16:-4] != self.style:
Packit 0f19cf
                    msg.log(_("the bibliography style was changed"), pkg="bibtex")
Packit 0f19cf
                    log.close()
Packit 0f19cf
                    return 1
Packit 0f19cf
            line = log.readline()
Packit 0f19cf
        log.close()
Packit 0f19cf
        return 0
Packit 0f19cf
Packit 0f19cf
    def get_errors (self):
Packit 0f19cf
        """
Packit 0f19cf
        Read the log file, identify error messages and report them.
Packit 0f19cf
        """
Packit 0f19cf
        if not exists(self.blgfile):
Packit 0f19cf
            return
Packit Service f3de8e
        log = open(self.blgfile, "rt", encoding="latin-1")
Packit 0f19cf
        last_line = ""
Packit 0f19cf
        for line in log:
Packit 0f19cf
            m = re_error.search(line)
Packit 0f19cf
            if m:
Packit 0f19cf
                # TODO: it would be possible to report the offending code.
Packit 0f19cf
                if m.start() == 0:
Packit 0f19cf
                    text = string.strip(last_line)
Packit 0f19cf
                else:
Packit 0f19cf
                    text = string.strip(line[:m.start()])
Packit 0f19cf
                line = m.group("line")
Packit 0f19cf
                if line: line = int(line)
Packit 0f19cf
                d = {
Packit 0f19cf
                    "pkg": "bibtex",
Packit 0f19cf
                    "kind": "error",
Packit 0f19cf
                    "text": text
Packit 0f19cf
                    }
Packit 0f19cf
                d.update( m.groupdict() )
Packit 0f19cf
Packit 0f19cf
                # BibTeX does not report the path of the database in its log.
Packit 0f19cf
Packit 0f19cf
                file = d["file"]
Packit 0f19cf
                if file[-4:] == ".bib":
Packit 0f19cf
                    file = file[:-4]
Packit Service f3de8e
                if file in self.db:
Packit 0f19cf
                    d["file"] = self.db[file]
Packit Service f3de8e
                elif file + ".bib" in self.db:
Packit 0f19cf
                    d["file"] = self.db[file + ".bib"]
Packit 0f19cf
                yield d
Packit 0f19cf
            last_line = line
Packit 0f19cf
        log.close()
Packit 0f19cf
Packit 0f19cf
Packit 0f19cf
class Module(BibTex):
Packit 0f19cf
    """
Packit 0f19cf
    Module to load to handle a bibtex
Packit 0f19cf
    """