Tree - source-git/dblatex - CentOS Git server

source-git / dblatex

Files

Commit: 5164a56ebc6388fa3dd4b8a59a84a886ee63eb8b
Blob Blame History Raw
#
# Dblatex parser. Its role is to:
# - encode the raw tex file to the expected output encoding, taking care about
#   the special characters to escape,
# - convert the document images to the appropriate format if needed
#
import sys
import os
import re
from io import open

from dbtexmf.dblatex.rawparse import RawLatexParser, RawUtfParser
from dbtexmf.dblatex.rawverb import VerbParser
from dbtexmf.dblatex.xetex.codec import XetexCodec
from dbtexmf.core.imagedata import *


class RawLatex:
    "Main latex file parser"
    def __init__(self):
        self.figre = \
            re.compile(br"(\\includegraphics[\[]?|"\
                       br"\\begin{overpic}|"\
                       br"\\imgexits)[^{]*{([^}]*)}")
        self.image = Imagedata()
        self.parsers = []
        self.format = None
        self.backend = None

    def set_fig_paths(self, paths):
        self.image.paths = paths

    def set_parsers(self, input, output_encoding=""):
        codec = None
        if self.backend == "xetex":
            output_encoding = "utf8"
            codec = XetexCodec()
        elif not(output_encoding):
            f = open(input, "rt", encoding="latin-1")
            params = {}
            started = 0
            for line in f:
                if not(started):
                    if line.startswith("%%<params>"): started = 1
                    continue
                if line.startswith("%%</params>"):
                    break
                p = line.split()
                params[p[1]] = p[2]
            output_encoding = params.get("latex.encoding", "latin-1")

        self.parsers = [VerbParser(output_encoding=output_encoding),
                        RawLatexParser(codec=codec,
                                       output_encoding=output_encoding),
                        RawUtfParser(output_encoding=output_encoding)]
        self.image.set_encoding(output_encoding or "latin-1")

    def set_format(self, format, backend=None):
        # Adjust the actual format from backend
        if (format == "pdf" and backend == "dvips"):
            format = "ps"
        self.format = format
        self.backend = backend
        self.image.set_format(format, backend)

    def fig_format(self, format):
        # TODO: consistency check?
        self.image.input_format = format

    def parse(self, input, output):
        self.set_parsers(input)
        f = open(input, "rb")
        o = open(output, "wb")
        for line in f:
            if self.format:
                line = self.figconvert(line)
            for p in self.parsers:
                line = p.parse(line)
                if not(line):
                    break
            if line:
                o.write(line)
        o.close()
        f.close()

    def figconvert(self, line):
        # Is there one or more images included here
        mlist = self.figre.findall(line)
        if not(mlist):
            return line

        # Try to convert each found image
        for m in mlist:
            fig = m[1]
            newfig = self.image.convert(fig)

            # If something done, replace the figure in the tex file
            if newfig != fig:
                line = re.sub(br"{%s}" % fig, br"{%s}" % newfig, line)

        return line
            

def main():
    c = RawLatex()
    c.set_fig_paths([os.getcwd()])
    c.parse(sys.argv[1], sys.argv[2])

if __name__ == "__main__":
    main()
source-git / dblatex

Source Code

Files