|
Packit Service |
5e066f |
#!/usr/bin/python3
|
|
Packit |
0f19cf |
# -*- coding: utf-8 -*-
|
|
Packit |
0f19cf |
# Time-stamp: <2008-06-23 22:21:26 ah>
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
Provide an encoder for a font specification configuration: the encoder is fed
|
|
Packit |
0f19cf |
with Unicode characters one by one and determines the needed font switches
|
|
Packit |
0f19cf |
between the preceding and the current character.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
import re
|
|
Packit |
0f19cf |
import xml.dom.minidom
|
|
Packit |
0f19cf |
import logging
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def _indent(string, width=2):
|
|
Packit |
0f19cf |
"""Indent the <string> lines by <width> blank characters."""
|
|
Packit |
0f19cf |
istr = ' ' * width
|
|
Packit |
0f19cf |
s = istr + istr.join(string.splitlines(1))
|
|
Packit |
0f19cf |
return s
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
class UnicodeInterval:
|
|
Packit |
0f19cf |
"""Unicode codepoint interval, including all codepoints between its minimum
|
|
Packit |
0f19cf |
and maximum boundary.
|
|
Packit |
0f19cf |
For any Unicode codepoint it can be queried if it belongs to the interval.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
# Internal data attributes:
|
|
Packit |
0f19cf |
# _min_boundary: Minimum boundary of the codepoint interval (ordinal)
|
|
Packit |
0f19cf |
# _max_boundary: Maximum boundary of the codepoint interval (ordinal)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
_re_codepoint = re.compile(r'^[Uu]\+?([0-9A-Fa-f]+)$')
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def __init__(self):
|
|
Packit |
0f19cf |
self._min_boundary = 0
|
|
Packit |
0f19cf |
self._max_boundary = 0
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def __str__(self):
|
|
Packit |
0f19cf |
"""Dump the instance's data attributes."""
|
|
Packit |
0f19cf |
string = '[' + str(self._min_boundary)
|
|
Packit |
0f19cf |
if self._max_boundary != self._min_boundary:
|
|
Packit |
0f19cf |
string += ',' + str(self._max_boundary)
|
|
Packit |
0f19cf |
string += ']'
|
|
Packit |
0f19cf |
return string
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def _unicode_to_ordinal(self, codepoint):
|
|
Packit |
0f19cf |
"""Return the ordinal of the specified codepoint."""
|
|
Packit |
0f19cf |
m = self._re_codepoint.match(codepoint)
|
|
Packit |
0f19cf |
if m:
|
|
Packit |
0f19cf |
return int(m.group(1), 16)
|
|
Packit |
0f19cf |
else:
|
|
Packit Service |
f3de8e |
raise RuntimeError('Not a unicode codepoint: ' + codepoint)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def from_char(self, char):
|
|
Packit |
0f19cf |
"""Interval for a single character"""
|
|
Packit |
0f19cf |
self._min_boundary = ord(char)
|
|
Packit |
0f19cf |
self._max_boundary = self._min_boundary
|
|
Packit |
0f19cf |
return self
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def from_codepoint(self, codepoint):
|
|
Packit |
0f19cf |
"""Interval for a single character defined as unicode string."""
|
|
Packit |
0f19cf |
self._min_boundary = self._unicode_to_ordinal(codepoint)
|
|
Packit |
0f19cf |
self._max_boundary = self._min_boundary
|
|
Packit |
0f19cf |
return self
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def from_interval(self, codepoint1, codepoint2):
|
|
Packit |
0f19cf |
"""Interval from a unicode range."""
|
|
Packit |
0f19cf |
self._min_boundary = self._unicode_to_ordinal(codepoint1)
|
|
Packit |
0f19cf |
self._max_boundary = self._unicode_to_ordinal(codepoint2)
|
|
Packit |
0f19cf |
if self._min_boundary > self._max_boundary:
|
|
Packit |
0f19cf |
self._min_boundary, self._max_boundary = \
|
|
Packit |
0f19cf |
self._max_boundary, self._min_boundary
|
|
Packit |
0f19cf |
return self
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def contains(self, char):
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
Determine whether the specified character is contained in this
|
|
Packit |
0f19cf |
instance's interval.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
#print "%d in [%d - %d]?" % (ord(char), self._min_boundary,self._max_boundary)
|
|
Packit |
0f19cf |
return (ord(char) >= self._min_boundary
|
|
Packit |
0f19cf |
and ord(char) <= self._max_boundary)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
class FontSpec:
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
Font specification, consisting of one or several unicode character
|
|
Packit |
0f19cf |
intervals and of fonts to select for those characters. The object
|
|
Packit |
0f19cf |
fully defines the fonts to switch to.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
# Internal data attributes:
|
|
Packit |
0f19cf |
# _intervals: UnicodeInterval list
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
transition_types = ['enter', 'inter', 'exit']
|
|
Packit |
0f19cf |
_re_interval = re.compile(r'^([Uu][0-9A-Fa-f]+)-([Uu][0-9A-Fa-f]+)$')
|
|
Packit |
0f19cf |
_re_codepoint = re.compile(r'^([Uu][0-9A-Fa-f]+)$')
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def __init__(self, intervals=None, subfont_first=False):
|
|
Packit |
0f19cf |
"""Create a font specification from the specified codepoint intervals.
|
|
Packit |
0f19cf |
The other data attributes will be set by the caller later.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
self.type = ""
|
|
Packit |
0f19cf |
self.id = None
|
|
Packit |
0f19cf |
self.refmode = None
|
|
Packit |
0f19cf |
self.transitions = {}
|
|
Packit |
0f19cf |
self.fontspecs = [self]
|
|
Packit |
0f19cf |
self.subfont_first = subfont_first
|
|
Packit |
0f19cf |
self._ignored = []
|
|
Packit |
0f19cf |
self.log = logging.getLogger("dblatex")
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
for type in self.transition_types:
|
|
Packit |
0f19cf |
self.transitions[type] = {}
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
if not(intervals):
|
|
Packit |
0f19cf |
self._intervals = []
|
|
Packit |
0f19cf |
return
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
try:
|
|
Packit |
0f19cf |
self._intervals = list(intervals)
|
|
Packit |
0f19cf |
except TypeError:
|
|
Packit |
0f19cf |
self._intervals = [intervals]
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def fromnode(self, node):
|
|
Packit |
0f19cf |
range = node.getAttribute('range')
|
|
Packit |
0f19cf |
charset = node.getAttribute('charset')
|
|
Packit |
0f19cf |
id = node.getAttribute('id')
|
|
Packit |
0f19cf |
refmode = node.getAttribute('refmode')
|
|
Packit |
0f19cf |
self.type = node.getAttribute('type')
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
if (range):
|
|
Packit |
0f19cf |
self._intervals = self._parse_range(range)
|
|
Packit |
0f19cf |
elif (charset):
|
|
Packit |
0f19cf |
for char in charset:
|
|
Packit |
0f19cf |
self.add_char(char)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
# Unique identifier
|
|
Packit |
0f19cf |
if (id):
|
|
Packit |
0f19cf |
self.id = id
|
|
Packit |
0f19cf |
if (refmode):
|
|
Packit |
0f19cf |
self.refmode = refmode
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
for transition_type in self.transition_types:
|
|
Packit |
0f19cf |
self._parse_transitions(node, transition_type)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def mainfont(self):
|
|
Packit |
0f19cf |
# Try to return the most representative font of this spec
|
|
Packit |
0f19cf |
return (self.transitions["enter"].get("main") or
|
|
Packit |
0f19cf |
self.transitions["enter"].get("sans"))
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def _parse_range(self, range):
|
|
Packit |
0f19cf |
"""Parse the specified /fonts/fontspec@range attribute to a
|
|
Packit |
0f19cf |
UnicodeInterval list.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
#print range
|
|
Packit |
0f19cf |
intervals = []
|
|
Packit |
0f19cf |
chunks = range.split()
|
|
Packit |
0f19cf |
for chunk in chunks:
|
|
Packit |
0f19cf |
m = self._re_interval.match(chunk)
|
|
Packit |
0f19cf |
#print match
|
|
Packit |
0f19cf |
if m:
|
|
Packit |
0f19cf |
urange = UnicodeInterval().from_interval(m.group(1), m.group(2))
|
|
Packit |
0f19cf |
intervals.append(urange)
|
|
Packit |
0f19cf |
else:
|
|
Packit |
0f19cf |
m = self._re_codepoint.match(chunk)
|
|
Packit |
0f19cf |
if m:
|
|
Packit |
0f19cf |
intervals.append(
|
|
Packit |
0f19cf |
UnicodeInterval().from_codepoint(m.group(1)))
|
|
Packit |
0f19cf |
else:
|
|
Packit Service |
f3de8e |
raise RuntimeError('Unable to parse range: "' + range + '"')
|
|
Packit |
0f19cf |
return intervals
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def _parse_transitions(self, node, transition_type):
|
|
Packit |
0f19cf |
"""Evaluate the font elements of the specified fontspec element for the
|
|
Packit |
0f19cf |
specified transition type (enter, inter or exit).
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
fontlist = self.transitions[transition_type]
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
for dom_transition in node.getElementsByTagName(transition_type):
|
|
Packit |
0f19cf |
for dom_font in dom_transition.getElementsByTagName('font'):
|
|
Packit |
0f19cf |
font = ''
|
|
Packit |
0f19cf |
types = dom_font.getAttribute("type")
|
|
Packit |
0f19cf |
types = types.split()
|
|
Packit |
0f19cf |
for dom_child in dom_font.childNodes:
|
|
Packit |
0f19cf |
if dom_child.nodeType == dom_child.TEXT_NODE:
|
|
Packit |
0f19cf |
font += dom_child.nodeValue
|
|
Packit |
0f19cf |
if (font):
|
|
Packit |
0f19cf |
for type in types:
|
|
Packit |
0f19cf |
fontlist[type] = font
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def _switch_to(self, fonts):
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
Return a string with the XeTeX font switching commands for the
|
|
Packit |
0f19cf |
specified font types.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
s = ''
|
|
Packit |
0f19cf |
for type, font in fonts.items():
|
|
Packit |
0f19cf |
s += '\switch%sfont{%s}' % (type, font)
|
|
Packit |
0f19cf |
if s:
|
|
Packit |
0f19cf |
s = r"\savefamily" + s + r"\loadfamily{}"
|
|
Packit |
0f19cf |
return s
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def enter(self):
|
|
Packit |
0f19cf |
self.log.debug("enter in %s" % self.id)
|
|
Packit |
0f19cf |
s = self._switch_to(self.transitions["enter"])
|
|
Packit |
0f19cf |
return s
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def exit(self):
|
|
Packit |
0f19cf |
self.log.debug("exit from %s" % self.id)
|
|
Packit |
0f19cf |
s = self._switch_to(self.transitions["exit"])
|
|
Packit |
0f19cf |
return s
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def interchar(self):
|
|
Packit |
0f19cf |
s = self._switch_to(self.transitions["inter"])
|
|
Packit |
0f19cf |
return s
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def __str__(self):
|
|
Packit |
0f19cf |
"""Dump the instance's data attributes."""
|
|
Packit |
0f19cf |
string = 'FontSpec:'
|
|
Packit |
0f19cf |
string += '\n Id: %s' % self.id
|
|
Packit |
0f19cf |
string += '\n Refmode: %s' % self.refmode
|
|
Packit |
0f19cf |
string += '\n subFirst: %s' % self.subfont_first
|
|
Packit |
0f19cf |
for interval in self._intervals:
|
|
Packit |
0f19cf |
string += '\n' + _indent(str(interval))
|
|
Packit |
0f19cf |
return string
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def add_subfont(self, fontspec):
|
|
Packit |
0f19cf |
self.log.debug("%s -> %s" % (self.id, fontspec.id))
|
|
Packit |
0f19cf |
if self.subfont_first:
|
|
Packit |
0f19cf |
self.fontspecs.insert(-1, fontspec)
|
|
Packit |
0f19cf |
else:
|
|
Packit |
0f19cf |
self.fontspecs.append(fontspec)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def add_char(self, char):
|
|
Packit |
0f19cf |
self._intervals.append(UnicodeInterval().from_char(char))
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def add_uranges(self, ranges, depth=1):
|
|
Packit |
0f19cf |
# Recursively extend the supported character range
|
|
Packit |
0f19cf |
if depth:
|
|
Packit |
0f19cf |
for f in self.fontspecs:
|
|
Packit |
0f19cf |
if f != self:
|
|
Packit |
0f19cf |
f.add_uranges(ranges)
|
|
Packit |
0f19cf |
self._intervals.extend(ranges)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def add_ignored(self, ranges, depth=1):
|
|
Packit |
0f19cf |
if depth:
|
|
Packit |
0f19cf |
for f in self.fontspecs:
|
|
Packit |
0f19cf |
if f != self:
|
|
Packit |
0f19cf |
f.add_ignored(ranges)
|
|
Packit |
0f19cf |
self._ignored.extend(ranges)
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def get_uranges(self):
|
|
Packit |
0f19cf |
return self._intervals
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def contains(self, char):
|
|
Packit |
0f19cf |
#print "%s: %s" % (self.id, self._intervals)
|
|
Packit |
0f19cf |
for interval in self._intervals:
|
|
Packit |
0f19cf |
if interval.contains(char):
|
|
Packit |
0f19cf |
return True
|
|
Packit |
0f19cf |
else:
|
|
Packit |
0f19cf |
return False
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def isignored(self, char):
|
|
Packit |
0f19cf |
self.log.debug("%s: %s" % (self.id, [ str(a) for a in self._ignored ]))
|
|
Packit |
0f19cf |
for interval in self._ignored:
|
|
Packit |
0f19cf |
if interval.contains(char):
|
|
Packit |
0f19cf |
return True
|
|
Packit |
0f19cf |
else:
|
|
Packit |
0f19cf |
return False
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def _loghas(self, id, char):
|
|
Packit |
0f19cf |
try:
|
|
Packit |
0f19cf |
self.log.debug("%s has '%s'" % (id, str(char)))
|
|
Packit |
0f19cf |
except:
|
|
Packit |
0f19cf |
self.log.debug("%s has '%s'" % (id, ord(char)))
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
def match(self, char, excluded=None):
|
|
Packit |
0f19cf |
"""Determine whether the font specification matches the specified
|
|
Packit |
0f19cf |
object, thereby considering refmode.
|
|
Packit |
0f19cf |
"""
|
|
Packit |
0f19cf |
fontspec = None
|
|
Packit |
0f19cf |
self.log.debug( "Lookup in %s" % self.id)
|
|
Packit |
0f19cf |
if self.isignored(char):
|
|
Packit |
0f19cf |
self._loghas(self.id, char)
|
|
Packit |
0f19cf |
return self
|
|
Packit |
0f19cf |
|
|
Packit |
0f19cf |
for fontspec in self.fontspecs:
|
|
Packit |
0f19cf |
# Don't waste time in scanning excluded nodes
|
|
Packit |
0f19cf |
if fontspec == excluded:
|
|
Packit |
0f19cf |
continue
|
|
Packit |
0f19cf |
#print " Look in %s" % fontspec.id
|
|
Packit |
0f19cf |
if fontspec.contains(char):
|
|
Packit |
0f19cf |
self._loghas(fontspec.id, char)
|
|
Packit |
0f19cf |
return fontspec
|
|
Packit |
0f19cf |
return None
|
|
Packit |
0f19cf |
|