Blame lib/dbtexmf/dblatex/texhyphen.py

Packit Service 76cb02
#
Packit Service 76cb02
# dblatex - Hyphenation classes to provide smart hyphenation of path like
Packit Service 76cb02
# strings
Packit Service 76cb02
#
Packit Service cd7d79
from __future__ import print_function
Packit Service cd7d79
Packit Service 76cb02
import re
Packit Service 76cb02
Packit Service 76cb02
class Hyphenator:
Packit Service 76cb02
    def __init__(self, codec=None):
Packit Service 76cb02
        pass
Packit Service 76cb02
    
Packit Service 76cb02
    def hyphenate(self, text):
Packit Service 76cb02
        return text
Packit Service 76cb02
Packit Service 76cb02
Packit Service 76cb02
class BasicHyphenator(Hyphenator):
Packit Service 76cb02
    """
Packit Service 76cb02
    Hyphenates basically by putting an hyphenation point between each character.
Packit Service 76cb02
    """
Packit Service 76cb02
    def __init__(self, codec=None):
Packit Service 76cb02
        self.codec = codec
Packit Service cd7d79
        self.hyphenchar = b"\-"
Packit Service 76cb02
Packit Service 76cb02
    def hyphenate(self, text):
Packit Service 76cb02
        if self.codec: text = self.codec.decode(text)
Packit Service 76cb02
        ntext = "\1".join(list(text))
Packit Service 76cb02
        if self.codec: ntext = self.codec.encode(ntext)
Packit Service cd7d79
        ntext = re.sub(b"\1? \1?", b" ", ntext)
Packit Service cd7d79
        ntext = ntext.replace(b"\1", self.hyphenchar)
Packit Service 76cb02
        return ntext
Packit Service 76cb02
Packit Service 76cb02
Packit Service 76cb02
class UrlHyphenator(Hyphenator):
Packit Service 76cb02
    """
Packit Service 76cb02
    Hyphenates <text> so that cutting is easier on URL separators.
Packit Service 76cb02
    The hyphen chars are expected to be void to prevent from spurious
Packit Service 76cb02
    characters in displayed filenames or URLs.
Packit Service 76cb02
Packit Service 76cb02
    The pathname words can be cut only after the <h_start> first characters
Packit Service 76cb02
    and before the <h_stop> characters to avoid a cut just after one or two
Packit Service 76cb02
    chars.
Packit Service 76cb02
Packit Service 76cb02
    Tip: the inter-chars can be defined with macros \HO and \HL, to be shorter
Packit Service 76cb02
    like:
Packit Service 76cb02
Packit Service 76cb02
    \def\HL{\penalty9999} (h_char="\HL")
Packit Service 76cb02
    \def\HO{\penalty5000} (h_sep="\HO")
Packit Service 76cb02
Packit Service 76cb02
    By default these shortcuts are not used to avoid some macro declaration in
Packit Service 76cb02
    existing latex styles.
Packit Service 76cb02
    """
Packit Service 76cb02
    def __init__(self, codec=None,
Packit Service cd7d79
                 h_sep=b"\penalty0 ", h_char=b"\penalty5000 ",
Packit Service 76cb02
                 h_start=3, h_stop=3):
Packit Service 76cb02
        self.codec = codec
Packit Service 76cb02
        self.seps = r":/\@=?#;-."
Packit Service 76cb02
        self.h_sep = h_sep
Packit Service 76cb02
        self.h_char = h_char
Packit Service 76cb02
        self.h_start = (h_start-1)
Packit Service 76cb02
        self.h_stop = (h_stop-1)
Packit Service 76cb02
Packit Service 76cb02
    def _translate(self, text):
Packit Service 76cb02
        if self.codec:
Packit Service 76cb02
            return self.codec.encode(text)
Packit Service 76cb02
        else:
Packit Service 76cb02
            return text
Packit Service 76cb02
Packit Service 76cb02
    def hyphenate(self, text):
Packit Service 76cb02
        if self.codec: text = self.codec.decode(text)
Packit Service 76cb02
Packit Service 76cb02
        vtext = []
Packit Service 76cb02
        p = "([%s])" % re.escape(self.seps)
Packit Service 76cb02
        words = re.split(p, text)
Packit Service 76cb02
        for w in words:
Packit Service 76cb02
            if not(w):
Packit Service 76cb02
                continue
Packit Service 76cb02
            if w in self.seps:
Packit Service 76cb02
                vtext.append(self._translate(w) + self.h_sep)
Packit Service 76cb02
            else:
Packit Service 76cb02
                hword = w[self.h_start:-self.h_stop]
Packit Service 76cb02
                if len(hword) < 2:
Packit Service 76cb02
                    vtext.append(self._translate(w))
Packit Service 76cb02
                else:
Packit Service 76cb02
                    nw = w[:self.h_start]
Packit Service 76cb02
                    nw += "\1".join(list(hword))
Packit Service 76cb02
                    nw += w[-self.h_stop:]
Packit Service 76cb02
                    nw = self._translate(nw)
Packit Service cd7d79
                    nw = re.sub(b"\1? \1?", b" ", nw)
Packit Service cd7d79
                    nw = nw.replace(b"\1", self.h_char)
Packit Service 76cb02
                    vtext.append(nw)
Packit Service 76cb02
Packit Service cd7d79
        ntext = b"".join(vtext)
Packit Service 76cb02
        return ntext
Packit Service 76cb02
Packit Service 76cb02
Packit Service 76cb02
if __name__ == "__main__":
Packit Service cd7d79
    url = b"http://www.fg/foobar fun#fght/fkkkf.tz?id=123"
Packit Service 76cb02
    h1 = BasicHyphenator()
Packit Service 76cb02
    h2 = UrlHyphenator()
Packit Service cd7d79
    print(h1.hyphenate(url))
Packit Service cd7d79
    print(h2.hyphenate(url))