#
# dblatex - Hyphenation classes to provide smart hyphenation of path like
# strings
#
from __future__ import print_function
import re
class Hyphenator:
def __init__(self, codec=None):
pass
def hyphenate(self, text):
return text
class BasicHyphenator(Hyphenator):
"""
Hyphenates basically by putting an hyphenation point between each character.
"""
def __init__(self, codec=None):
self.codec = codec
self.hyphenchar = b"\-"
def hyphenate(self, text):
if self.codec: text = self.codec.decode(text)
ntext = "\1".join(list(text))
if self.codec: ntext = self.codec.encode(ntext)
ntext = re.sub(b"\1? \1?", b" ", ntext)
ntext = ntext.replace(b"\1", self.hyphenchar)
return ntext
class UrlHyphenator(Hyphenator):
"""
Hyphenates <text> so that cutting is easier on URL separators.
The hyphen chars are expected to be void to prevent from spurious
characters in displayed filenames or URLs.
The pathname words can be cut only after the <h_start> first characters
and before the <h_stop> characters to avoid a cut just after one or two
chars.
Tip: the inter-chars can be defined with macros \HO and \HL, to be shorter
like:
\def\HL{\penalty9999} (h_char="\HL")
\def\HO{\penalty5000} (h_sep="\HO")
By default these shortcuts are not used to avoid some macro declaration in
existing latex styles.
"""
def __init__(self, codec=None,
h_sep=b"\penalty0 ", h_char=b"\penalty5000 ",
h_start=3, h_stop=3):
self.codec = codec
self.seps = r":/\@=?#;-."
self.h_sep = h_sep
self.h_char = h_char
self.h_start = (h_start-1)
self.h_stop = (h_stop-1)
def _translate(self, text):
if self.codec:
return self.codec.encode(text)
else:
return text
def hyphenate(self, text):
if self.codec: text = self.codec.decode(text)
vtext = []
p = "([%s])" % re.escape(self.seps)
words = re.split(p, text)
for w in words:
if not(w):
continue
if w in self.seps:
vtext.append(self._translate(w) + self.h_sep)
else:
hword = w[self.h_start:-self.h_stop]
if len(hword) < 2:
vtext.append(self._translate(w))
else:
nw = w[:self.h_start]
nw += "\1".join(list(hword))
nw += w[-self.h_stop:]
nw = self._translate(nw)
nw = re.sub(b"\1? \1?", b" ", nw)
nw = nw.replace(b"\1", self.h_char)
vtext.append(nw)
ntext = b"".join(vtext)
return ntext
if __name__ == "__main__":
url = b"http://www.fg/foobar fun#fght/fkkkf.tz?id=123"
h1 = BasicHyphenator()
h2 = UrlHyphenator()
print(h1.hyphenate(url))
print(h2.hyphenate(url))