|
Packit Service |
76cb02 |
#
|
|
Packit Service |
76cb02 |
# The role of the verbatim parser is to encode properly from UTF-8 verbatim
|
|
Packit Service |
76cb02 |
# text to valid latin-1 text. Two goals must be met:
|
|
Packit Service |
76cb02 |
#
|
|
Packit Service |
76cb02 |
# - Just encode the characters, but don't escape latex characters like in normal
|
|
Packit Service |
76cb02 |
# text. This is why a dedicated latex encoder is used.
|
|
Packit Service |
76cb02 |
# - When the characters are translated to macros, escape the whole sequence
|
|
Packit Service |
76cb02 |
# to allow tex execute the macro embedded in verbatim text.
|
|
Packit Service |
76cb02 |
# - When the escape sequence is required, update the listing environment options
|
|
Packit Service |
76cb02 |
# if necessary.
|
|
Packit Service |
76cb02 |
#
|
|
Packit Service |
76cb02 |
import re
|
|
Packit Service |
cd7d79 |
from io import open
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
cd7d79 |
from dbtexmf.dblatex.texcodec import TexCodec
|
|
Packit Service |
cd7d79 |
from dbtexmf.dblatex.texcodec import tex_handler_counter
|
|
Packit Service |
cd7d79 |
from dbtexmf.dblatex.rawparse import RawUtfParser
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
class VerbCodec(TexCodec):
|
|
Packit Service |
76cb02 |
def __init__(self, pre, post, errors="verbtex",
|
|
Packit Service |
76cb02 |
input_encoding="utf8", output_encoding="latin-1"):
|
|
Packit Service |
76cb02 |
self.pre = pre
|
|
Packit Service |
76cb02 |
self.post = post
|
|
Packit Service |
76cb02 |
self.output_encoding = output_encoding
|
|
Packit Service |
76cb02 |
TexCodec.__init__(self, input_encoding, output_encoding,
|
|
Packit Service |
76cb02 |
errors=errors, pre=pre, post=post)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def decode(self, text):
|
|
Packit Service |
76cb02 |
global tex_handler_counter
|
|
Packit Service |
76cb02 |
ntext = TexCodec.decode(self, text)
|
|
Packit Service |
76cb02 |
if self.output_encoding != "utf8":
|
|
Packit Service |
76cb02 |
return ntext
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# Funnily listings cannot handle unicode characters greater than 255.
|
|
Packit Service |
76cb02 |
# The loop just escapes them by wrapping with and <post> and
|
|
Packit Service |
76cb02 |
# emulates the corresponding encoding exception
|
|
Packit Service |
76cb02 |
text = ""
|
|
Packit Service |
76cb02 |
n = tex_handler_counter[self._errors]
|
|
Packit Service |
76cb02 |
for c in ntext:
|
|
Packit Service |
76cb02 |
if ord(c) > 255:
|
|
Packit Service |
76cb02 |
c = self.pre + c + self.post
|
|
Packit Service |
76cb02 |
n += 1
|
|
Packit Service |
76cb02 |
text += c
|
|
Packit Service |
76cb02 |
tex_handler_counter[self._errors] = n
|
|
Packit Service |
76cb02 |
return text
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
class VerbParser:
|
|
Packit Service |
76cb02 |
def __init__(self, output_encoding="latin-1"):
|
|
Packit Service |
76cb02 |
# The listing environment can be different from 'lstlisting'
|
|
Packit Service |
76cb02 |
# but the rule is that it must begin with 'lst'
|
|
Packit Service |
cd7d79 |
self.start_re = re.compile(br"\\begin{lst[^}]*}")
|
|
Packit Service |
cd7d79 |
self.stop_re = re.compile(br"\\end{lst[^}]*}")
|
|
Packit Service |
cd7d79 |
self.esc_re = re.compile(br"escapeinside={([^}]*)}{([^}]*)}")
|
|
Packit Service |
cd7d79 |
self.block = b""
|
|
Packit Service |
76cb02 |
self.encoding = output_encoding
|
|
Packit Service |
cd7d79 |
self.default_esc_start = b"<:"
|
|
Packit Service |
cd7d79 |
self.default_esc_stop = b":>"
|
|
Packit Service |
76cb02 |
self.default_codec = VerbCodec(self.default_esc_start,
|
|
Packit Service |
76cb02 |
self.default_esc_stop,
|
|
Packit Service |
76cb02 |
output_encoding=output_encoding)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def parse(self, line):
|
|
Packit Service |
76cb02 |
if not(self.block):
|
|
Packit Service |
76cb02 |
m = self.start_re.search(line)
|
|
Packit Service |
76cb02 |
if not(m):
|
|
Packit Service |
76cb02 |
return line
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
return self.parse_begin(line, m)
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
m = self.stop_re.search(line)
|
|
Packit Service |
76cb02 |
if not(m):
|
|
Packit Service |
76cb02 |
return self.block_grow(line)
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
return self.parse_end(line, m)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def parse_begin(self, line, m):
|
|
Packit Service |
76cb02 |
preblock = line[:m.start()]
|
|
Packit Service |
76cb02 |
self.command = line[m.start():m.end()]
|
|
Packit Service |
76cb02 |
line = line[m.end():]
|
|
Packit Service |
76cb02 |
# By default, no escape sequence defined yet
|
|
Packit Service |
cd7d79 |
self.esc_start = b""
|
|
Packit Service |
cd7d79 |
self.esc_stop = b""
|
|
Packit Service |
cd7d79 |
self.options = b""
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# If there are some options, look for escape specs
|
|
Packit Service |
cd7d79 |
if line[0] == b"[":
|
|
Packit Service |
cd7d79 |
e = line.find(b"]")+1
|
|
Packit Service |
76cb02 |
self.options = line[:e]
|
|
Packit Service |
76cb02 |
line = line[e:]
|
|
Packit Service |
76cb02 |
m = self.esc_re.search(self.options)
|
|
Packit Service |
76cb02 |
if m:
|
|
Packit Service |
76cb02 |
self.esc_start = m.group(1)
|
|
Packit Service |
76cb02 |
self.esc_stop = m.group(2)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
self.block_grow(line)
|
|
Packit Service |
76cb02 |
return preblock
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def parse_end(self, line, m):
|
|
Packit Service |
76cb02 |
self.block_grow(line[:m.start()])
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# The block is complete, find out the codec with escape sequence
|
|
Packit Service |
76cb02 |
c = self.get_codec()
|
|
Packit Service |
76cb02 |
c.clear_errors()
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# Now, parse/encode the block
|
|
Packit Service |
76cb02 |
p = RawUtfParser(codec=c)
|
|
Packit Service |
76cb02 |
text = p.parse(self.block)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# Add the escape option if necessary
|
|
Packit Service |
76cb02 |
if not(self.esc_start) and c.get_errors() != 0:
|
|
Packit Service |
cd7d79 |
escopt = b"escapeinside={%s}{%s}" % (c.pre, c.post)
|
|
Packit Service |
76cb02 |
if self.options:
|
|
Packit Service |
76cb02 |
if self.options[-2] != ",":
|
|
Packit Service |
cd7d79 |
escopt = b"," + escopt
|
|
Packit Service |
76cb02 |
self.options = self.options[:-1] + escopt + "]"
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
cd7d79 |
self.options = b"[" + escopt + b"]"
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
block = self.command + self.options + text + line[m.start():]
|
|
Packit Service |
cd7d79 |
self.block = b""
|
|
Packit Service |
76cb02 |
return block
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def block_grow(self, line):
|
|
Packit Service |
76cb02 |
self.block += line
|
|
Packit Service |
cd7d79 |
return b""
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def get_codec(self):
|
|
Packit Service |
76cb02 |
# Something already specified
|
|
Packit Service |
76cb02 |
if (self.esc_start):
|
|
Packit Service |
76cb02 |
if self.esc_start != self.default_esc_start:
|
|
Packit Service |
76cb02 |
return VerbCodec(self.esc_start, self.esc_stop,
|
|
Packit Service |
cd7d79 |
b"verbtex" + self.esc_start,
|
|
Packit Service |
76cb02 |
output_encoding=self.encoding)
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
return self.default_codec
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# Find the starting escape sequence that does not occur in verbatim text
|
|
Packit Service |
76cb02 |
s = self.default_esc_start
|
|
Packit Service |
76cb02 |
iter = 0
|
|
Packit Service |
76cb02 |
i = self.block.find(s)
|
|
Packit Service |
76cb02 |
while (i != -1):
|
|
Packit Service |
cd7d79 |
s = b"<" + bytes(iter) + b":"
|
|
Packit Service |
76cb02 |
i = self.block.find(s)
|
|
Packit Service |
76cb02 |
iter += 1
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
# By luck the default is enough
|
|
Packit Service |
76cb02 |
if (s == self.default_esc_start):
|
|
Packit Service |
76cb02 |
return self.default_codec
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
cd7d79 |
return VerbCodec(s, self.default_esc_stop, b"verbtex" + s,
|
|
Packit Service |
76cb02 |
output_encoding=self.encoding)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
if __name__ == "__main__":
|
|
Packit Service |
76cb02 |
import sys
|
|
Packit Service |
76cb02 |
v = VerbParser()
|
|
Packit Service |
cd7d79 |
buf = getattr(sys.stdout, "buffer", sys.stdout)
|
|
Packit Service |
cd7d79 |
f = open(sys.argv[1], "rb")
|
|
Packit Service |
76cb02 |
for line in f:
|
|
Packit Service |
76cb02 |
text = v.parse(line)
|
|
Packit Service |
76cb02 |
if text:
|
|
Packit Service |
cd7d79 |
buf.write(text)
|
|
Packit Service |
76cb02 |
|