Blame lib/dbtexmf/dblatex/xetex/fontspec.py

Packit Service 5e066f
#!/usr/bin/python3
Packit 0f19cf
# -*- coding: utf-8 -*-
Packit 0f19cf
# Time-stamp: <2008-06-23 22:21:26 ah>
Packit 0f19cf
Packit 0f19cf
"""
Packit 0f19cf
Provide an encoder for a font specification configuration: the encoder is fed
Packit 0f19cf
with Unicode characters one by one and determines the needed font switches
Packit 0f19cf
between the preceding and the current character.
Packit 0f19cf
"""
Packit 0f19cf
Packit 0f19cf
import re
Packit 0f19cf
import xml.dom.minidom
Packit 0f19cf
import logging
Packit 0f19cf
Packit 0f19cf
Packit 0f19cf
def _indent(string, width=2):
Packit 0f19cf
    """Indent the <string> lines by <width> blank characters."""
Packit 0f19cf
    istr = ' ' * width
Packit 0f19cf
    s = istr + istr.join(string.splitlines(1))
Packit 0f19cf
    return s
Packit 0f19cf
Packit 0f19cf
class UnicodeInterval:
Packit 0f19cf
    """Unicode codepoint interval, including all codepoints between its minimum
Packit 0f19cf
    and maximum boundary.
Packit 0f19cf
    For any Unicode codepoint it can be queried if it belongs to the interval.
Packit 0f19cf
    """
Packit 0f19cf
Packit 0f19cf
    # Internal data attributes:
Packit 0f19cf
    # _min_boundary: Minimum boundary of the codepoint interval (ordinal)
Packit 0f19cf
    # _max_boundary: Maximum boundary of the codepoint interval (ordinal)
Packit 0f19cf
Packit 0f19cf
    _re_codepoint = re.compile(r'^[Uu]\+?([0-9A-Fa-f]+)$')
Packit 0f19cf
Packit 0f19cf
    def __init__(self):
Packit 0f19cf
        self._min_boundary = 0
Packit 0f19cf
        self._max_boundary = 0
Packit 0f19cf
Packit 0f19cf
    def __str__(self):
Packit 0f19cf
        """Dump the instance's data attributes."""
Packit 0f19cf
        string = '[' + str(self._min_boundary)
Packit 0f19cf
        if self._max_boundary != self._min_boundary:
Packit 0f19cf
            string += ',' + str(self._max_boundary)
Packit 0f19cf
        string += ']'
Packit 0f19cf
        return string
Packit 0f19cf
Packit 0f19cf
    def _unicode_to_ordinal(self, codepoint):
Packit 0f19cf
        """Return the ordinal of the specified codepoint."""
Packit 0f19cf
        m = self._re_codepoint.match(codepoint)
Packit 0f19cf
        if m:
Packit 0f19cf
            return int(m.group(1), 16)
Packit 0f19cf
        else:
Packit Service f3de8e
            raise RuntimeError('Not a unicode codepoint: ' + codepoint)
Packit 0f19cf
Packit 0f19cf
    def from_char(self, char):
Packit 0f19cf
        """Interval for a single character"""
Packit 0f19cf
        self._min_boundary = ord(char)
Packit 0f19cf
        self._max_boundary = self._min_boundary
Packit 0f19cf
        return self
Packit 0f19cf
Packit 0f19cf
    def from_codepoint(self, codepoint):
Packit 0f19cf
        """Interval for a single character defined as unicode string."""
Packit 0f19cf
        self._min_boundary = self._unicode_to_ordinal(codepoint)
Packit 0f19cf
        self._max_boundary = self._min_boundary
Packit 0f19cf
        return self
Packit 0f19cf
Packit 0f19cf
    def from_interval(self, codepoint1, codepoint2):
Packit 0f19cf
        """Interval from a unicode range."""
Packit 0f19cf
        self._min_boundary = self._unicode_to_ordinal(codepoint1)
Packit 0f19cf
        self._max_boundary = self._unicode_to_ordinal(codepoint2)
Packit 0f19cf
        if self._min_boundary > self._max_boundary:
Packit 0f19cf
            self._min_boundary, self._max_boundary = \
Packit 0f19cf
                self._max_boundary, self._min_boundary
Packit 0f19cf
        return self
Packit 0f19cf
Packit 0f19cf
    def contains(self, char):
Packit 0f19cf
        """
Packit 0f19cf
        Determine whether the specified character is contained in this
Packit 0f19cf
        instance's interval.
Packit 0f19cf
        """
Packit 0f19cf
        #print "%d in [%d - %d]?" % (ord(char), self._min_boundary,self._max_boundary)
Packit 0f19cf
        return (ord(char) >= self._min_boundary
Packit 0f19cf
                and ord(char) <= self._max_boundary)
Packit 0f19cf
Packit 0f19cf
Packit 0f19cf
class FontSpec:
Packit 0f19cf
    """
Packit 0f19cf
    Font specification, consisting of one or several unicode character
Packit 0f19cf
    intervals and of fonts to select for those characters. The object
Packit 0f19cf
    fully defines the fonts to switch to.
Packit 0f19cf
    """
Packit 0f19cf
Packit 0f19cf
    # Internal data attributes:
Packit 0f19cf
    # _intervals: UnicodeInterval list
Packit 0f19cf
Packit 0f19cf
    transition_types = ['enter', 'inter', 'exit']
Packit 0f19cf
    _re_interval = re.compile(r'^([Uu][0-9A-Fa-f]+)-([Uu][0-9A-Fa-f]+)$')
Packit 0f19cf
    _re_codepoint = re.compile(r'^([Uu][0-9A-Fa-f]+)$')
Packit 0f19cf
Packit 0f19cf
    def __init__(self, intervals=None, subfont_first=False):
Packit 0f19cf
        """Create a font specification from the specified codepoint intervals.
Packit 0f19cf
        The other data attributes will be set by the caller later.
Packit 0f19cf
        """
Packit 0f19cf
        self.type = ""
Packit 0f19cf
        self.id = None
Packit 0f19cf
        self.refmode = None
Packit 0f19cf
        self.transitions = {}
Packit 0f19cf
        self.fontspecs = [self]
Packit 0f19cf
        self.subfont_first = subfont_first
Packit 0f19cf
        self._ignored = []
Packit 0f19cf
        self.log = logging.getLogger("dblatex")
Packit 0f19cf
Packit 0f19cf
        for type in self.transition_types:
Packit 0f19cf
            self.transitions[type] = {}
Packit 0f19cf
Packit 0f19cf
        if not(intervals):
Packit 0f19cf
            self._intervals = []
Packit 0f19cf
            return
Packit 0f19cf
Packit 0f19cf
        try:
Packit 0f19cf
            self._intervals = list(intervals)
Packit 0f19cf
        except TypeError:
Packit 0f19cf
            self._intervals = [intervals]
Packit 0f19cf
Packit 0f19cf
    def fromnode(self, node):
Packit 0f19cf
        range = node.getAttribute('range')
Packit 0f19cf
        charset = node.getAttribute('charset')
Packit 0f19cf
        id = node.getAttribute('id')
Packit 0f19cf
        refmode = node.getAttribute('refmode')
Packit 0f19cf
        self.type = node.getAttribute('type')
Packit 0f19cf
Packit 0f19cf
        if (range):
Packit 0f19cf
            self._intervals = self._parse_range(range)
Packit 0f19cf
        elif (charset):
Packit 0f19cf
            for char in charset:
Packit 0f19cf
                self.add_char(char)
Packit 0f19cf
Packit 0f19cf
        # Unique identifier
Packit 0f19cf
        if (id):
Packit 0f19cf
            self.id = id
Packit 0f19cf
        if (refmode):
Packit 0f19cf
            self.refmode = refmode
Packit 0f19cf
Packit 0f19cf
        for transition_type in self.transition_types:
Packit 0f19cf
            self._parse_transitions(node, transition_type)
Packit 0f19cf
Packit 0f19cf
    def mainfont(self):
Packit 0f19cf
        # Try to return the most representative font of this spec
Packit 0f19cf
        return (self.transitions["enter"].get("main") or 
Packit 0f19cf
                self.transitions["enter"].get("sans"))
Packit 0f19cf
Packit 0f19cf
    def _parse_range(self, range):
Packit 0f19cf
        """Parse the specified /fonts/fontspec@range attribute to a
Packit 0f19cf
        UnicodeInterval list.
Packit 0f19cf
        """
Packit 0f19cf
        #print range
Packit 0f19cf
        intervals = []
Packit 0f19cf
        chunks = range.split()
Packit 0f19cf
        for chunk in chunks:
Packit 0f19cf
            m = self._re_interval.match(chunk)
Packit 0f19cf
            #print match
Packit 0f19cf
            if m:
Packit 0f19cf
                urange = UnicodeInterval().from_interval(m.group(1), m.group(2))
Packit 0f19cf
                intervals.append(urange)
Packit 0f19cf
            else:
Packit 0f19cf
                m = self._re_codepoint.match(chunk)
Packit 0f19cf
                if m:
Packit 0f19cf
                    intervals.append(
Packit 0f19cf
                        UnicodeInterval().from_codepoint(m.group(1)))
Packit 0f19cf
                else:
Packit Service f3de8e
                    raise RuntimeError('Unable to parse range: "' + range + '"')
Packit 0f19cf
        return intervals
Packit 0f19cf
Packit 0f19cf
    def _parse_transitions(self, node, transition_type):
Packit 0f19cf
        """Evaluate the font elements of the specified fontspec element for the
Packit 0f19cf
        specified transition type (enter, inter or exit).
Packit 0f19cf
        """
Packit 0f19cf
        fontlist = self.transitions[transition_type]
Packit 0f19cf
Packit 0f19cf
        for dom_transition in node.getElementsByTagName(transition_type):
Packit 0f19cf
            for dom_font in dom_transition.getElementsByTagName('font'):
Packit 0f19cf
                font = ''
Packit 0f19cf
                types = dom_font.getAttribute("type")
Packit 0f19cf
                types = types.split()
Packit 0f19cf
                for dom_child in dom_font.childNodes:
Packit 0f19cf
                    if dom_child.nodeType == dom_child.TEXT_NODE:
Packit 0f19cf
                        font += dom_child.nodeValue
Packit 0f19cf
                if (font):
Packit 0f19cf
                    for type in types:
Packit 0f19cf
                        fontlist[type] = font
Packit 0f19cf
Packit 0f19cf
    def _switch_to(self, fonts):
Packit 0f19cf
        """
Packit 0f19cf
        Return a string with the XeTeX font switching commands for the
Packit 0f19cf
        specified font types.
Packit 0f19cf
        """
Packit 0f19cf
        s = ''
Packit 0f19cf
        for type, font in fonts.items():
Packit 0f19cf
            s += '\switch%sfont{%s}' % (type, font)
Packit 0f19cf
        if s:
Packit 0f19cf
            s = r"\savefamily" + s + r"\loadfamily{}"
Packit 0f19cf
        return s
Packit 0f19cf
Packit 0f19cf
    def enter(self):
Packit 0f19cf
        self.log.debug("enter in %s" % self.id)
Packit 0f19cf
        s = self._switch_to(self.transitions["enter"])
Packit 0f19cf
        return s
Packit 0f19cf
Packit 0f19cf
    def exit(self):
Packit 0f19cf
        self.log.debug("exit from %s" % self.id)
Packit 0f19cf
        s = self._switch_to(self.transitions["exit"])
Packit 0f19cf
        return s
Packit 0f19cf
Packit 0f19cf
    def interchar(self):
Packit 0f19cf
        s = self._switch_to(self.transitions["inter"])
Packit 0f19cf
        return s
Packit 0f19cf
Packit 0f19cf
    def __str__(self):
Packit 0f19cf
        """Dump the instance's data attributes."""
Packit 0f19cf
        string = 'FontSpec:'
Packit 0f19cf
        string += '\n  Id: %s' % self.id
Packit 0f19cf
        string += '\n  Refmode: %s' % self.refmode
Packit 0f19cf
        string += '\n  subFirst: %s' % self.subfont_first
Packit 0f19cf
        for interval in self._intervals:
Packit 0f19cf
            string += '\n' + _indent(str(interval))
Packit 0f19cf
        return string
Packit 0f19cf
Packit 0f19cf
    def add_subfont(self, fontspec):
Packit 0f19cf
        self.log.debug("%s -> %s" % (self.id, fontspec.id))
Packit 0f19cf
        if self.subfont_first:
Packit 0f19cf
            self.fontspecs.insert(-1, fontspec)
Packit 0f19cf
        else:
Packit 0f19cf
            self.fontspecs.append(fontspec)
Packit 0f19cf
Packit 0f19cf
    def add_char(self, char):
Packit 0f19cf
        self._intervals.append(UnicodeInterval().from_char(char))
Packit 0f19cf
Packit 0f19cf
    def add_uranges(self, ranges, depth=1):
Packit 0f19cf
        # Recursively extend the supported character range
Packit 0f19cf
        if depth:
Packit 0f19cf
            for f in self.fontspecs:
Packit 0f19cf
                if f != self:
Packit 0f19cf
                    f.add_uranges(ranges)
Packit 0f19cf
        self._intervals.extend(ranges)
Packit 0f19cf
Packit 0f19cf
    def add_ignored(self, ranges, depth=1):
Packit 0f19cf
        if depth:
Packit 0f19cf
            for f in self.fontspecs:
Packit 0f19cf
                if f != self:
Packit 0f19cf
                    f.add_ignored(ranges)
Packit 0f19cf
        self._ignored.extend(ranges)
Packit 0f19cf
Packit 0f19cf
    def get_uranges(self):
Packit 0f19cf
        return self._intervals
Packit 0f19cf
Packit 0f19cf
    def contains(self, char):
Packit 0f19cf
        #print "%s: %s" % (self.id, self._intervals)
Packit 0f19cf
        for interval in self._intervals:
Packit 0f19cf
            if interval.contains(char):
Packit 0f19cf
                return True
Packit 0f19cf
        else:
Packit 0f19cf
            return False
Packit 0f19cf
Packit 0f19cf
    def isignored(self, char):
Packit 0f19cf
        self.log.debug("%s: %s" % (self.id, [ str(a) for a in self._ignored ]))
Packit 0f19cf
        for interval in self._ignored:
Packit 0f19cf
            if interval.contains(char):
Packit 0f19cf
                return True
Packit 0f19cf
        else:
Packit 0f19cf
            return False
Packit 0f19cf
Packit 0f19cf
    def _loghas(self, id, char):
Packit 0f19cf
        try:
Packit 0f19cf
            self.log.debug("%s has '%s'" % (id, str(char)))
Packit 0f19cf
        except:
Packit 0f19cf
            self.log.debug("%s has '%s'" % (id, ord(char)))
Packit 0f19cf
Packit 0f19cf
    def match(self, char, excluded=None):
Packit 0f19cf
        """Determine whether the font specification matches the specified
Packit 0f19cf
        object, thereby considering refmode.
Packit 0f19cf
        """
Packit 0f19cf
        fontspec = None
Packit 0f19cf
        self.log.debug( "Lookup in %s" % self.id)
Packit 0f19cf
        if self.isignored(char):
Packit 0f19cf
            self._loghas(self.id, char)
Packit 0f19cf
            return self
Packit 0f19cf
Packit 0f19cf
        for fontspec in self.fontspecs:
Packit 0f19cf
            # Don't waste time in scanning excluded nodes
Packit 0f19cf
            if fontspec == excluded:
Packit 0f19cf
                continue
Packit 0f19cf
            #print " Look in %s" % fontspec.id
Packit 0f19cf
            if fontspec.contains(char):
Packit 0f19cf
                self._loghas(fontspec.id, char)
Packit 0f19cf
                return fontspec
Packit 0f19cf
        return None
Packit 0f19cf