Blame lib/dbtexmf/dblatex/rawverb.py

Packit Service 76cb02
#
Packit Service 76cb02
# The role of the verbatim parser is to encode properly from UTF-8 verbatim
Packit Service 76cb02
# text to valid latin-1 text. Two goals must be met:
Packit Service 76cb02
#
Packit Service 76cb02
# - Just encode the characters, but don't escape latex characters like in normal
Packit Service 76cb02
#   text. This is why a dedicated latex encoder is used.
Packit Service 76cb02
# - When the characters are translated to macros, escape the whole sequence
Packit Service 76cb02
#   to allow tex execute the macro embedded in verbatim text.
Packit Service 76cb02
# - When the escape sequence is required, update the listing environment options
Packit Service 76cb02
#   if necessary.
Packit Service 76cb02
#
Packit Service 76cb02
import re
Packit Service cd7d79
from io import open
Packit Service 76cb02
Packit Service cd7d79
from dbtexmf.dblatex.texcodec import TexCodec
Packit Service cd7d79
from dbtexmf.dblatex.texcodec import tex_handler_counter
Packit Service cd7d79
from dbtexmf.dblatex.rawparse import RawUtfParser
Packit Service 76cb02
Packit Service 76cb02
Packit Service 76cb02
class VerbCodec(TexCodec):
Packit Service 76cb02
    def __init__(self, pre, post, errors="verbtex",
Packit Service 76cb02
                 input_encoding="utf8", output_encoding="latin-1"):
Packit Service 76cb02
        self.pre = pre
Packit Service 76cb02
        self.post = post
Packit Service 76cb02
        self.output_encoding = output_encoding
Packit Service 76cb02
        TexCodec.__init__(self, input_encoding, output_encoding,
Packit Service 76cb02
                          errors=errors, pre=pre, post=post)
Packit Service 76cb02
Packit Service 76cb02
    def decode(self, text):
Packit Service 76cb02
        global tex_handler_counter
Packit Service 76cb02
        ntext = TexCodec.decode(self, text)
Packit Service 76cb02
        if self.output_encoding != "utf8":
Packit Service 76cb02
            return ntext
Packit Service 76cb02
Packit Service 76cb02
        # Funnily listings cannot handle unicode characters greater than 255.
Packit Service 76cb02
        # The loop just escapes them by wrapping with 
 and <post> and
Packit Service 76cb02
        # emulates the corresponding encoding exception
Packit Service 76cb02
        text = ""
Packit Service 76cb02
        n = tex_handler_counter[self._errors]
Packit Service 76cb02
        for c in ntext:
Packit Service 76cb02
            if ord(c) > 255:
Packit Service 76cb02
                c = self.pre + c + self.post
Packit Service 76cb02
                n += 1
Packit Service 76cb02
            text += c
Packit Service 76cb02
        tex_handler_counter[self._errors] = n
Packit Service 76cb02
        return text
Packit Service 76cb02
Packit Service 76cb02
Packit Service 76cb02
class VerbParser:
Packit Service 76cb02
    def __init__(self, output_encoding="latin-1"):
Packit Service 76cb02
        # The listing environment can be different from 'lstlisting'
Packit Service 76cb02
        # but the rule is that it must begin with 'lst'
Packit Service cd7d79
        self.start_re = re.compile(br"\\begin{lst[^}]*}")
Packit Service cd7d79
        self.stop_re = re.compile(br"\\end{lst[^}]*}")
Packit Service cd7d79
        self.esc_re = re.compile(br"escapeinside={([^}]*)}{([^}]*)}")
Packit Service cd7d79
        self.block = b""
Packit Service 76cb02
        self.encoding = output_encoding
Packit Service cd7d79
        self.default_esc_start = b"<:"
Packit Service cd7d79
        self.default_esc_stop = b":>"
Packit Service 76cb02
        self.default_codec = VerbCodec(self.default_esc_start,
Packit Service 76cb02
                                       self.default_esc_stop,
Packit Service 76cb02
                                       output_encoding=output_encoding)
Packit Service 76cb02
Packit Service 76cb02
    def parse(self, line):
Packit Service 76cb02
        if not(self.block):
Packit Service 76cb02
            m = self.start_re.search(line)
Packit Service 76cb02
            if not(m):
Packit Service 76cb02
                return line
Packit Service 76cb02
            else:
Packit Service 76cb02
                return self.parse_begin(line, m)
Packit Service 76cb02
        else:
Packit Service 76cb02
            m = self.stop_re.search(line)
Packit Service 76cb02
            if not(m):
Packit Service 76cb02
                return self.block_grow(line)
Packit Service 76cb02
            else:
Packit Service 76cb02
                return self.parse_end(line, m)
Packit Service 76cb02
Packit Service 76cb02
    def parse_begin(self, line, m):
Packit Service 76cb02
        preblock = line[:m.start()]
Packit Service 76cb02
        self.command = line[m.start():m.end()]
Packit Service 76cb02
        line = line[m.end():]
Packit Service 76cb02
        # By default, no escape sequence defined yet
Packit Service cd7d79
        self.esc_start = b""
Packit Service cd7d79
        self.esc_stop = b""
Packit Service cd7d79
        self.options = b""
Packit Service 76cb02
Packit Service 76cb02
        # If there are some options, look for escape specs
Packit Service cd7d79
        if line[0] == b"[":
Packit Service cd7d79
            e = line.find(b"]")+1
Packit Service 76cb02
            self.options = line[:e]
Packit Service 76cb02
            line = line[e:]
Packit Service 76cb02
            m = self.esc_re.search(self.options)
Packit Service 76cb02
            if m:
Packit Service 76cb02
                self.esc_start = m.group(1)
Packit Service 76cb02
                self.esc_stop = m.group(2)
Packit Service 76cb02
Packit Service 76cb02
        self.block_grow(line)
Packit Service 76cb02
        return preblock
Packit Service 76cb02
Packit Service 76cb02
    def parse_end(self, line, m):
Packit Service 76cb02
        self.block_grow(line[:m.start()])
Packit Service 76cb02
Packit Service 76cb02
        # The block is complete, find out the codec with escape sequence
Packit Service 76cb02
        c = self.get_codec()
Packit Service 76cb02
        c.clear_errors()
Packit Service 76cb02
Packit Service 76cb02
        # Now, parse/encode the block
Packit Service 76cb02
        p = RawUtfParser(codec=c)
Packit Service 76cb02
        text = p.parse(self.block)
Packit Service 76cb02
Packit Service 76cb02
        # Add the escape option if necessary
Packit Service 76cb02
        if not(self.esc_start) and c.get_errors() != 0:
Packit Service cd7d79
            escopt = b"escapeinside={%s}{%s}" % (c.pre, c.post)
Packit Service 76cb02
            if self.options:
Packit Service 76cb02
                if self.options[-2] != ",":
Packit Service cd7d79
                    escopt = b"," + escopt
Packit Service 76cb02
                self.options = self.options[:-1] + escopt + "]"
Packit Service 76cb02
            else:
Packit Service cd7d79
                self.options = b"[" + escopt + b"]"
Packit Service 76cb02
Packit Service 76cb02
        block = self.command + self.options + text + line[m.start():]
Packit Service cd7d79
        self.block = b""
Packit Service 76cb02
        return block
Packit Service 76cb02
Packit Service 76cb02
    def block_grow(self, line):
Packit Service 76cb02
        self.block += line
Packit Service cd7d79
        return b""
Packit Service 76cb02
Packit Service 76cb02
    def get_codec(self):
Packit Service 76cb02
        # Something already specified
Packit Service 76cb02
        if (self.esc_start):
Packit Service 76cb02
            if self.esc_start != self.default_esc_start:
Packit Service 76cb02
                return VerbCodec(self.esc_start, self.esc_stop,
Packit Service cd7d79
                                 b"verbtex" + self.esc_start,
Packit Service 76cb02
                                 output_encoding=self.encoding)
Packit Service 76cb02
            else:
Packit Service 76cb02
                return self.default_codec
Packit Service 76cb02
Packit Service 76cb02
        # Find the starting escape sequence that does not occur in verbatim text
Packit Service 76cb02
        s = self.default_esc_start
Packit Service 76cb02
        iter = 0
Packit Service 76cb02
        i = self.block.find(s)
Packit Service 76cb02
        while (i != -1):
Packit Service cd7d79
            s = b"<" + bytes(iter) + b":"
Packit Service 76cb02
            i = self.block.find(s)
Packit Service 76cb02
            iter += 1
Packit Service 76cb02
Packit Service 76cb02
        # By luck the default is enough
Packit Service 76cb02
        if (s == self.default_esc_start):
Packit Service 76cb02
            return self.default_codec
Packit Service 76cb02
Packit Service cd7d79
        return VerbCodec(s, self.default_esc_stop, b"verbtex" + s,
Packit Service 76cb02
                         output_encoding=self.encoding)
Packit Service 76cb02
Packit Service 76cb02
Packit Service 76cb02
if __name__ == "__main__":
Packit Service 76cb02
    import sys
Packit Service 76cb02
    v = VerbParser()
Packit Service cd7d79
    buf = getattr(sys.stdout, "buffer", sys.stdout)
Packit Service cd7d79
    f = open(sys.argv[1], "rb")
Packit Service 76cb02
    for line in f:
Packit Service 76cb02
        text = v.parse(line)
Packit Service 76cb02
        if text:
Packit Service cd7d79
            buf.write(text)
Packit Service 76cb02