|
Packit Service |
76cb02 |
#
|
|
Packit Service |
76cb02 |
# dblatex - Hyphenation classes to provide smart hyphenation of path like
|
|
Packit Service |
76cb02 |
# strings
|
|
Packit Service |
76cb02 |
#
|
|
Packit Service |
cd7d79 |
from __future__ import print_function
|
|
Packit Service |
cd7d79 |
|
|
Packit Service |
76cb02 |
import re
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
class Hyphenator:
|
|
Packit Service |
76cb02 |
def __init__(self, codec=None):
|
|
Packit Service |
76cb02 |
pass
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def hyphenate(self, text):
|
|
Packit Service |
76cb02 |
return text
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
class BasicHyphenator(Hyphenator):
|
|
Packit Service |
76cb02 |
"""
|
|
Packit Service |
76cb02 |
Hyphenates basically by putting an hyphenation point between each character.
|
|
Packit Service |
76cb02 |
"""
|
|
Packit Service |
76cb02 |
def __init__(self, codec=None):
|
|
Packit Service |
76cb02 |
self.codec = codec
|
|
Packit Service |
cd7d79 |
self.hyphenchar = b"\-"
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def hyphenate(self, text):
|
|
Packit Service |
76cb02 |
if self.codec: text = self.codec.decode(text)
|
|
Packit Service |
76cb02 |
ntext = "\1".join(list(text))
|
|
Packit Service |
76cb02 |
if self.codec: ntext = self.codec.encode(ntext)
|
|
Packit Service |
cd7d79 |
ntext = re.sub(b"\1? \1?", b" ", ntext)
|
|
Packit Service |
cd7d79 |
ntext = ntext.replace(b"\1", self.hyphenchar)
|
|
Packit Service |
76cb02 |
return ntext
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
class UrlHyphenator(Hyphenator):
|
|
Packit Service |
76cb02 |
"""
|
|
Packit Service |
76cb02 |
Hyphenates <text> so that cutting is easier on URL separators.
|
|
Packit Service |
76cb02 |
The hyphen chars are expected to be void to prevent from spurious
|
|
Packit Service |
76cb02 |
characters in displayed filenames or URLs.
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
The pathname words can be cut only after the <h_start> first characters
|
|
Packit Service |
76cb02 |
and before the <h_stop> characters to avoid a cut just after one or two
|
|
Packit Service |
76cb02 |
chars.
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
Tip: the inter-chars can be defined with macros \HO and \HL, to be shorter
|
|
Packit Service |
76cb02 |
like:
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
\def\HL{\penalty9999} (h_char="\HL")
|
|
Packit Service |
76cb02 |
\def\HO{\penalty5000} (h_sep="\HO")
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
By default these shortcuts are not used to avoid some macro declaration in
|
|
Packit Service |
76cb02 |
existing latex styles.
|
|
Packit Service |
76cb02 |
"""
|
|
Packit Service |
76cb02 |
def __init__(self, codec=None,
|
|
Packit Service |
cd7d79 |
h_sep=b"\penalty0 ", h_char=b"\penalty5000 ",
|
|
Packit Service |
76cb02 |
h_start=3, h_stop=3):
|
|
Packit Service |
76cb02 |
self.codec = codec
|
|
Packit Service |
76cb02 |
self.seps = r":/\@=?#;-."
|
|
Packit Service |
76cb02 |
self.h_sep = h_sep
|
|
Packit Service |
76cb02 |
self.h_char = h_char
|
|
Packit Service |
76cb02 |
self.h_start = (h_start-1)
|
|
Packit Service |
76cb02 |
self.h_stop = (h_stop-1)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def _translate(self, text):
|
|
Packit Service |
76cb02 |
if self.codec:
|
|
Packit Service |
76cb02 |
return self.codec.encode(text)
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
return text
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
def hyphenate(self, text):
|
|
Packit Service |
76cb02 |
if self.codec: text = self.codec.decode(text)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
vtext = []
|
|
Packit Service |
76cb02 |
p = "([%s])" % re.escape(self.seps)
|
|
Packit Service |
76cb02 |
words = re.split(p, text)
|
|
Packit Service |
76cb02 |
for w in words:
|
|
Packit Service |
76cb02 |
if not(w):
|
|
Packit Service |
76cb02 |
continue
|
|
Packit Service |
76cb02 |
if w in self.seps:
|
|
Packit Service |
76cb02 |
vtext.append(self._translate(w) + self.h_sep)
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
hword = w[self.h_start:-self.h_stop]
|
|
Packit Service |
76cb02 |
if len(hword) < 2:
|
|
Packit Service |
76cb02 |
vtext.append(self._translate(w))
|
|
Packit Service |
76cb02 |
else:
|
|
Packit Service |
76cb02 |
nw = w[:self.h_start]
|
|
Packit Service |
76cb02 |
nw += "\1".join(list(hword))
|
|
Packit Service |
76cb02 |
nw += w[-self.h_stop:]
|
|
Packit Service |
76cb02 |
nw = self._translate(nw)
|
|
Packit Service |
cd7d79 |
nw = re.sub(b"\1? \1?", b" ", nw)
|
|
Packit Service |
cd7d79 |
nw = nw.replace(b"\1", self.h_char)
|
|
Packit Service |
76cb02 |
vtext.append(nw)
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
cd7d79 |
ntext = b"".join(vtext)
|
|
Packit Service |
76cb02 |
return ntext
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
|
|
Packit Service |
76cb02 |
if __name__ == "__main__":
|
|
Packit Service |
cd7d79 |
url = b"http://www.fg/foobar fun#fght/fkkkf.tz?id=123"
|
|
Packit Service |
76cb02 |
h1 = BasicHyphenator()
|
|
Packit Service |
76cb02 |
h2 = UrlHyphenator()
|
|
Packit Service |
cd7d79 |
print(h1.hyphenate(url))
|
|
Packit Service |
cd7d79 |
print(h2.hyphenate(url))
|