Blob Blame History Raw
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Time-stamp: <2008-06-23 22:21:26 ah>

"""
Provide an encoder for a font specification configuration: the encoder is fed
with Unicode characters one by one and determines the needed font switches
between the preceding and the current character.
"""

import re
import xml.dom.minidom
import logging


def _indent(string, width=2):
    """Indent the <string> lines by <width> blank characters."""
    istr = ' ' * width
    s = istr + istr.join(string.splitlines(1))
    return s

class UnicodeInterval:
    """Unicode codepoint interval, including all codepoints between its minimum
    and maximum boundary.
    For any Unicode codepoint it can be queried if it belongs to the interval.
    """

    # Internal data attributes:
    # _min_boundary: Minimum boundary of the codepoint interval (ordinal)
    # _max_boundary: Maximum boundary of the codepoint interval (ordinal)

    _re_codepoint = re.compile(r'^[Uu]\+?([0-9A-Fa-f]+)$')

    def __init__(self):
        self._min_boundary = 0
        self._max_boundary = 0

    def __str__(self):
        """Dump the instance's data attributes."""
        string = '[' + str(self._min_boundary)
        if self._max_boundary != self._min_boundary:
            string += ',' + str(self._max_boundary)
        string += ']'
        return string

    def _unicode_to_ordinal(self, codepoint):
        """Return the ordinal of the specified codepoint."""
        m = self._re_codepoint.match(codepoint)
        if m:
            return int(m.group(1), 16)
        else:
            raise RuntimeError('Not a unicode codepoint: ' + codepoint)

    def from_char(self, char):
        """Interval for a single character"""
        self._min_boundary = ord(char)
        self._max_boundary = self._min_boundary
        return self

    def from_codepoint(self, codepoint):
        """Interval for a single character defined as unicode string."""
        self._min_boundary = self._unicode_to_ordinal(codepoint)
        self._max_boundary = self._min_boundary
        return self

    def from_interval(self, codepoint1, codepoint2):
        """Interval from a unicode range."""
        self._min_boundary = self._unicode_to_ordinal(codepoint1)
        self._max_boundary = self._unicode_to_ordinal(codepoint2)
        if self._min_boundary > self._max_boundary:
            self._min_boundary, self._max_boundary = \
                self._max_boundary, self._min_boundary
        return self

    def contains(self, char):
        """
        Determine whether the specified character is contained in this
        instance's interval.
        """
        #print "%d in [%d - %d]?" % (ord(char), self._min_boundary,self._max_boundary)
        return (ord(char) >= self._min_boundary
                and ord(char) <= self._max_boundary)


class FontSpec:
    """
    Font specification, consisting of one or several unicode character
    intervals and of fonts to select for those characters. The object
    fully defines the fonts to switch to.
    """

    # Internal data attributes:
    # _intervals: UnicodeInterval list

    transition_types = ['enter', 'inter', 'exit']
    _re_interval = re.compile(r'^([Uu][0-9A-Fa-f]+)-([Uu][0-9A-Fa-f]+)$')
    _re_codepoint = re.compile(r'^([Uu][0-9A-Fa-f]+)$')

    def __init__(self, intervals=None, subfont_first=False):
        """Create a font specification from the specified codepoint intervals.
        The other data attributes will be set by the caller later.
        """
        self.type = ""
        self.id = None
        self.refmode = None
        self.transitions = {}
        self.fontspecs = [self]
        self.subfont_first = subfont_first
        self._ignored = []
        self.log = logging.getLogger("dblatex")

        for type in self.transition_types:
            self.transitions[type] = {}

        if not(intervals):
            self._intervals = []
            return

        try:
            self._intervals = list(intervals)
        except TypeError:
            self._intervals = [intervals]

    def fromnode(self, node):
        range = node.getAttribute('range')
        charset = node.getAttribute('charset')
        id = node.getAttribute('id')
        refmode = node.getAttribute('refmode')
        self.type = node.getAttribute('type')

        if (range):
            self._intervals = self._parse_range(range)
        elif (charset):
            for char in charset:
                self.add_char(char)

        # Unique identifier
        if (id):
            self.id = id
        if (refmode):
            self.refmode = refmode

        for transition_type in self.transition_types:
            self._parse_transitions(node, transition_type)

    def mainfont(self):
        # Try to return the most representative font of this spec
        return (self.transitions["enter"].get("main") or 
                self.transitions["enter"].get("sans"))

    def _parse_range(self, range):
        """Parse the specified /fonts/fontspec@range attribute to a
        UnicodeInterval list.
        """
        #print range
        intervals = []
        chunks = range.split()
        for chunk in chunks:
            m = self._re_interval.match(chunk)
            #print match
            if m:
                urange = UnicodeInterval().from_interval(m.group(1), m.group(2))
                intervals.append(urange)
            else:
                m = self._re_codepoint.match(chunk)
                if m:
                    intervals.append(
                        UnicodeInterval().from_codepoint(m.group(1)))
                else:
                    raise RuntimeError('Unable to parse range: "' + range + '"')
        return intervals

    def _parse_transitions(self, node, transition_type):
        """Evaluate the font elements of the specified fontspec element for the
        specified transition type (enter, inter or exit).
        """
        fontlist = self.transitions[transition_type]

        for dom_transition in node.getElementsByTagName(transition_type):
            for dom_font in dom_transition.getElementsByTagName('font'):
                font = ''
                types = dom_font.getAttribute("type")
                types = types.split()
                for dom_child in dom_font.childNodes:
                    if dom_child.nodeType == dom_child.TEXT_NODE:
                        font += dom_child.nodeValue
                if (font):
                    for type in types:
                        fontlist[type] = font

    def _switch_to(self, fonts):
        """
        Return a string with the XeTeX font switching commands for the
        specified font types.
        """
        s = ''
        for type, font in fonts.items():
            s += '\switch%sfont{%s}' % (type, font)
        if s:
            s = r"\savefamily" + s + r"\loadfamily{}"
        return s

    def enter(self):
        self.log.debug("enter in %s" % self.id)
        s = self._switch_to(self.transitions["enter"])
        return s

    def exit(self):
        self.log.debug("exit from %s" % self.id)
        s = self._switch_to(self.transitions["exit"])
        return s

    def interchar(self):
        s = self._switch_to(self.transitions["inter"])
        return s

    def __str__(self):
        """Dump the instance's data attributes."""
        string = 'FontSpec:'
        string += '\n  Id: %s' % self.id
        string += '\n  Refmode: %s' % self.refmode
        string += '\n  subFirst: %s' % self.subfont_first
        for interval in self._intervals:
            string += '\n' + _indent(str(interval))
        return string

    def add_subfont(self, fontspec):
        self.log.debug("%s -> %s" % (self.id, fontspec.id))
        if self.subfont_first:
            self.fontspecs.insert(-1, fontspec)
        else:
            self.fontspecs.append(fontspec)

    def add_char(self, char):
        self._intervals.append(UnicodeInterval().from_char(char))

    def add_uranges(self, ranges, depth=1):
        # Recursively extend the supported character range
        if depth:
            for f in self.fontspecs:
                if f != self:
                    f.add_uranges(ranges)
        self._intervals.extend(ranges)

    def add_ignored(self, ranges, depth=1):
        if depth:
            for f in self.fontspecs:
                if f != self:
                    f.add_ignored(ranges)
        self._ignored.extend(ranges)

    def get_uranges(self):
        return self._intervals

    def contains(self, char):
        #print "%s: %s" % (self.id, self._intervals)
        for interval in self._intervals:
            if interval.contains(char):
                return True
        else:
            return False

    def isignored(self, char):
        self.log.debug("%s: %s" % (self.id, [ str(a) for a in self._ignored ]))
        for interval in self._ignored:
            if interval.contains(char):
                return True
        else:
            return False

    def _loghas(self, id, char):
        try:
            self.log.debug("%s has '%s'" % (id, str(char)))
        except:
            self.log.debug("%s has '%s'" % (id, ord(char)))

    def match(self, char, excluded=None):
        """Determine whether the font specification matches the specified
        object, thereby considering refmode.
        """
        fontspec = None
        self.log.debug( "Lookup in %s" % self.id)
        if self.isignored(char):
            self._loghas(self.id, char)
            return self

        for fontspec in self.fontspecs:
            # Don't waste time in scanning excluded nodes
            if fontspec == excluded:
                continue
            #print " Look in %s" % fontspec.id
            if fontspec.contains(char):
                self._loghas(fontspec.id, char)
                return fontspec
        return None