Tree - source-git/fonttools - CentOS Git server

source-git / fonttools

Files

Commit: 6a2e4c6b54dec229a7caa6d49f40af58beaae888
Blob Blame History Raw
# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals
from fontTools.misc.py23 import *
from fontTools.misc import sstruct
from fontTools.misc.textTools import safeEval
from fontTools.misc.encodingTools import getEncoding
from fontTools.ttLib import newTable
from . import DefaultTable
import struct
import logging


log = logging.getLogger(__name__)

nameRecordFormat = """
		>	# big endian
		platformID:	H
		platEncID:	H
		langID:		H
		nameID:		H
		length:		H
		offset:		H
"""

nameRecordSize = sstruct.calcsize(nameRecordFormat)


class table__n_a_m_e(DefaultTable.DefaultTable):
	dependencies = ["ltag"]

	def decompile(self, data, ttFont):
		format, n, stringOffset = struct.unpack(b">HHH", data[:6])
		expectedStringOffset = 6 + n * nameRecordSize
		if stringOffset != expectedStringOffset:
			log.error(
				"'name' table stringOffset incorrect. Expected: %s; Actual: %s",
				expectedStringOffset, stringOffset)
		stringData = data[stringOffset:]
		data = data[6:]
		self.names = []
		for i in range(n):
			if len(data) < 12:
				log.error('skipping malformed name record #%d', i)
				continue
			name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
			name.string = stringData[name.offset:name.offset+name.length]
			if name.offset + name.length > len(stringData):
				log.error('skipping malformed name record #%d', i)
				continue
			assert len(name.string) == name.length
			#if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
			#	if len(name.string) % 2:
			#		print "2-byte string doesn't have even length!"
			#		print name.__dict__
			del name.offset, name.length
			self.names.append(name)

	def compile(self, ttFont):
		if not hasattr(self, "names"):
			# only happens when there are NO name table entries read
			# from the TTX file
			self.names = []
		names = self.names
		names.sort() # sort according to the spec; see NameRecord.__lt__()
		stringData = b""
		format = 0
		n = len(names)
		stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
		data = struct.pack(b">HHH", format, n, stringOffset)
		lastoffset = 0
		done = {}  # remember the data so we can reuse the "pointers"
		for name in names:
			string = name.toBytes()
			if string in done:
				name.offset, name.length = done[string]
			else:
				name.offset, name.length = done[string] = len(stringData), len(string)
				stringData = bytesjoin([stringData, string])
			data = data + sstruct.pack(nameRecordFormat, name)
		return data + stringData

	def toXML(self, writer, ttFont):
		for name in self.names:
			name.toXML(writer, ttFont)

	def fromXML(self, name, attrs, content, ttFont):
		if name != "namerecord":
			return # ignore unknown tags
		if not hasattr(self, "names"):
			self.names = []
		name = NameRecord()
		self.names.append(name)
		name.fromXML(name, attrs, content, ttFont)

	def getName(self, nameID, platformID, platEncID, langID=None):
		for namerecord in self.names:
			if (	namerecord.nameID == nameID and
					namerecord.platformID == platformID and
					namerecord.platEncID == platEncID):
				if langID is None or namerecord.langID == langID:
					return namerecord
		return None # not found

	def getDebugName(self, nameID):
		englishName = someName = None
		for name in self.names:
			if name.nameID != nameID:
				continue
			try:
				unistr = name.toUnicode()
			except UnicodeDecodeError:
				continue

			someName = unistr
			if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
				englishName = unistr
				break
		if englishName:
			return englishName
		elif someName:
			return someName
		else:
			return None

	def setName(self, string, nameID, platformID, platEncID, langID):
		""" Set the 'string' for the name record identified by 'nameID', 'platformID',
		'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
		and append to the name table.

		'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
		it is assumed to be already encoded with the correct plaform-specific encoding
		identified by the (platformID, platEncID, langID) triplet. A warning is issued
		to prevent unexpected results.
		"""
		if not hasattr(self, 'names'):
			self.names = []
		if not isinstance(string, unicode):
			if isinstance(string, bytes):
				log.warning(
					"name string is bytes, ensure it's correctly encoded: %r", string)
			else:
				raise TypeError(
					"expected unicode or bytes, found %s: %r" % (
						type(string).__name__, string))
		namerecord = self.getName(nameID, platformID, platEncID, langID)
		if namerecord:
			namerecord.string = string
		else:
			self.names.append(makeName(string, nameID, platformID, platEncID, langID))

	def _findUnusedNameID(self, minNameID=256):
		"""Finds an unused name id.

		The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
		following the last nameID in the name table.
		"""
		names = getattr(self, 'names', [])
		nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
		if nameID > 32767:
			raise ValueError("nameID must be less than 32768")
		return nameID

	def addMultilingualName(self, names, ttFont=None, nameID=None):
		"""Add a multilingual name, returning its name ID

		'names' is a dictionary with the name in multiple languages,
		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
		The keys can be arbitrary IETF BCP 47 language codes;
		the values are Unicode strings.

		'ttFont' is the TTFont to which the names are added, or None.
		If present, the font's 'ltag' table can get populated
		to store exotic language codes, which allows encoding
		names that otherwise cannot get encoded at all.

		'nameID' is the name ID to be used, or None to let the library
		pick an unused name ID.
		"""
		if not hasattr(self, 'names'):
			self.names = []
		if nameID is None:
			nameID = self._findUnusedNameID()
		# TODO: Should minimize BCP 47 language codes.
		# https://github.com/fonttools/fonttools/issues/930
		for lang, name in sorted(names.items()):
			# Apple platforms have been recognizing Windows names
			# since early OSX (~2001), so we only add names
			# for the Macintosh platform when we cannot not make
			# a Windows name. This can happen for exotic BCP47
			# language tags that have no Windows language code.
			windowsName = _makeWindowsName(name, nameID, lang)
			if windowsName is not None:
				self.names.append(windowsName)
			else:
				macName = _makeMacName(name, nameID, lang, ttFont)
				if macName is not None:
					self.names.append(macName)
		return nameID

	def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
		""" Add a new name record containing 'string' for each (platformID, platEncID,
		langID) tuple specified in the 'platforms' list.

		The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
		following the last nameID in the name table.
		If no 'platforms' are specified, two English name records are added, one for the
		Macintosh (platformID=0), and one for the Windows platform (3).

		The 'string' must be a Unicode string, so it can be encoded with different,
		platform-specific encodings.

		Return the new nameID.
		"""
		assert len(platforms) > 0, \
			"'platforms' must contain at least one (platformID, platEncID, langID) tuple"
		if not hasattr(self, 'names'):
			self.names = []
		if not isinstance(string, unicode):
			raise TypeError(
				"expected %s, found %s: %r" % (
					unicode.__name__, type(string).__name__,string ))
		nameID = self._findUnusedNameID(minNameID + 1)
		for platformID, platEncID, langID in platforms:
			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
		return nameID


def makeName(string, nameID, platformID, platEncID, langID):
	name = NameRecord()
	name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
		string, nameID, platformID, platEncID, langID)
	return name


def _makeWindowsName(name, nameID, language):
	"""Create a NameRecord for the Microsoft Windows platform

	'language' is an arbitrary IETF BCP 47 language identifier such
	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
	does not support the desired language, the result will be None.
	Future versions of fonttools might return a NameRecord for the
	OpenType 'name' table format 1, but this is not implemented yet.
	"""
	langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
	if langID is not None:
		return makeName(name, nameID, 3, 1, langID)
	else:
		log.warning("cannot add Windows name in language %s "
		            "because fonttools does not yet support "
		            "name table format 1" % language)
		return None


def _makeMacName(name, nameID, language, font=None):
	"""Create a NameRecord for Apple platforms

	'language' is an arbitrary IETF BCP 47 language identifier such
	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
	create a Macintosh NameRecord that is understood by old applications
	(platform ID 1 and an old-style Macintosh language enum). If this
	is not possible, we create a Unicode NameRecord (platform ID 0)
	whose language points to the font’s 'ltag' table. The latter
	can encode any string in any language, but legacy applications
	might not recognize the format (in which case they will ignore
	those names).

	'font' should be the TTFont for which you want to create a name.
	If 'font' is None, we only return NameRecords for legacy Macintosh;
	in that case, the result will be None for names that need to
	be encoded with an 'ltag' table.

	See the section “The language identifier” in Apple’s specification:
	https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
	"""
	macLang = _MAC_LANGUAGE_CODES.get(language.lower())
	macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
	if macLang is not None and macScript is not None:
		encoding = getEncoding(1, macScript, macLang, default="ascii")
		# Check if we can actually encode this name. If we can't,
		# for example because we have no support for the legacy
		# encoding, or because the name string contains Unicode
		# characters that the legacy encoding cannot represent,
		# we fall back to encoding the name in Unicode and put
		# the language tag into the ltag table.
		try:
			_ = tobytes(name, encoding, errors="strict")
			return makeName(name, nameID, 1, macScript, macLang)
		except UnicodeEncodeError:
			pass
	if font is not None:
		ltag = font.tables.get("ltag")
		if ltag is None:
			ltag = font["ltag"] = newTable("ltag")
		# 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
		# “The preferred platform-specific code for Unicode would be 3 or 4.”
		# https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
		return makeName(name, nameID, 0, 4, ltag.addTag(language))
	else:
		log.warning("cannot store language %s into 'ltag' table "
		            "without having access to the TTFont object" %
		            language)
		return None


class NameRecord(object):

	def getEncoding(self, default='ascii'):
		"""Returns the Python encoding name for this name entry based on its platformID,
		platEncID, and langID.  If encoding for these values is not known, by default
		'ascii' is returned.  That can be overriden by passing a value to the default
		argument.
		"""
		return getEncoding(self.platformID, self.platEncID, self.langID, default)

	def encodingIsUnicodeCompatible(self):
		return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']

	def __str__(self):
		return self.toStr(errors='backslashreplace')

	def isUnicode(self):
		return (self.platformID == 0 or
			(self.platformID == 3 and self.platEncID in [0, 1, 10]))

	def toUnicode(self, errors='strict'):
		"""
		If self.string is a Unicode string, return it; otherwise try decoding the
		bytes in self.string to a Unicode string using the encoding of this
		entry as returned by self.getEncoding(); Note that  self.getEncoding()
		returns 'ascii' if the encoding is unknown to the library.

		Certain heuristics are performed to recover data from bytes that are
		ill-formed in the chosen encoding, or that otherwise look misencoded
		(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
		but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
		the error is handled according to the errors parameter to this function, which is
		passed to the underlying decode() function; by default it throws a
		UnicodeDecodeError exception.

		Note: The mentioned heuristics mean that roundtripping a font to XML and back
		to binary might recover some misencoded data whereas just loading the font
		and saving it back will not change them.
		"""
		def isascii(b):
			return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
		encoding = self.getEncoding()
		string = self.string

		if encoding == 'utf_16_be' and len(string) % 2 == 1:
			# Recover badly encoded UTF-16 strings that have an odd number of bytes:
			# - If the last byte is zero, drop it.  Otherwise,
			# - If all the odd bytes are zero and all the even bytes are ASCII,
			#   prepend one zero byte.  Otherwise,
			# - If first byte is zero and all other bytes are ASCII, insert zero
			#   bytes between consecutive ASCII bytes.
			#
			# (Yes, I've seen all of these in the wild... sigh)
			if byteord(string[-1]) == 0:
				string = string[:-1]
			elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
				string = b'\0' + string
			elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
				string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])

		string = tounicode(string, encoding=encoding, errors=errors)

		# If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
		# Fix it up.
		if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
			# If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
			# narrow it down.
			string = ''.join(c for c in string[1::2])

		return string

	def toBytes(self, errors='strict'):
		""" If self.string is a bytes object, return it; otherwise try encoding
		the Unicode string in self.string to bytes using the encoding of this
		entry as returned by self.getEncoding(); Note that self.getEncoding()
		returns 'ascii' if the encoding is unknown to the library.

		If the Unicode string cannot be encoded to bytes in the chosen encoding,
		the error is handled according to the errors parameter to this function,
		which is passed to the underlying encode() function; by default it throws a
		UnicodeEncodeError exception.
		"""
		return tobytes(self.string, encoding=self.getEncoding(), errors=errors)

	def toStr(self, errors='strict'):
		if str == bytes:
			# python 2
			return self.toBytes(errors)
		else:
			# python 3
			return self.toUnicode(errors)

	def toXML(self, writer, ttFont):
		try:
			unistr = self.toUnicode()
		except UnicodeDecodeError:
			unistr = None
		attrs = [
				("nameID", self.nameID),
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("langID", hex(self.langID)),
			]

		if unistr is None or not self.encodingIsUnicodeCompatible():
			attrs.append(("unicode", unistr is not None))

		writer.begintag("namerecord", attrs)
		writer.newline()
		if unistr is not None:
			writer.write(unistr)
		else:
			writer.write8bit(self.string)
		writer.newline()
		writer.endtag("namerecord")
		writer.newline()

	def fromXML(self, name, attrs, content, ttFont):
		self.nameID = safeEval(attrs["nameID"])
		self.platformID = safeEval(attrs["platformID"])
		self.platEncID = safeEval(attrs["platEncID"])
		self.langID =  safeEval(attrs["langID"])
		s = strjoin(content).strip()
		encoding = self.getEncoding()
		if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
			self.string = s.encode(encoding)
		else:
			# This is the inverse of write8bit...
			self.string = s.encode("latin1")

	def __lt__(self, other):
		if type(self) != type(other):
			return NotImplemented

		# implemented so that list.sort() sorts according to the spec.
		selfTuple = (
			getattr(self, "platformID", None),
			getattr(self, "platEncID", None),
			getattr(self, "langID", None),
			getattr(self, "nameID", None),
			getattr(self, "string", None),
		)
		otherTuple = (
			getattr(other, "platformID", None),
			getattr(other, "platEncID", None),
			getattr(other, "langID", None),
			getattr(other, "nameID", None),
			getattr(other, "string", None),
		)
		return selfTuple < otherTuple

	def __repr__(self):
		return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
				self.nameID, self.platformID, self.langID)


# Windows language ID → IETF BCP-47 language tag
#
# While Microsoft indicates a region/country for all its language
# IDs, we follow Unicode practice by omitting “most likely subtags”
# as per Unicode CLDR. For example, English is simply “en” and not
# “en-Latn” because according to Unicode, the default script
# for English is Latin.
#
# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
_WINDOWS_LANGUAGES = {
    0x0436: 'af',
    0x041C: 'sq',
    0x0484: 'gsw',
    0x045E: 'am',
    0x1401: 'ar-DZ',
    0x3C01: 'ar-BH',
    0x0C01: 'ar',
    0x0801: 'ar-IQ',
    0x2C01: 'ar-JO',
    0x3401: 'ar-KW',
    0x3001: 'ar-LB',
    0x1001: 'ar-LY',
    0x1801: 'ary',
    0x2001: 'ar-OM',
    0x4001: 'ar-QA',
    0x0401: 'ar-SA',
    0x2801: 'ar-SY',
    0x1C01: 'aeb',
    0x3801: 'ar-AE',
    0x2401: 'ar-YE',
    0x042B: 'hy',
    0x044D: 'as',
    0x082C: 'az-Cyrl',
    0x042C: 'az',
    0x046D: 'ba',
    0x042D: 'eu',
    0x0423: 'be',
    0x0845: 'bn',
    0x0445: 'bn-IN',
    0x201A: 'bs-Cyrl',
    0x141A: 'bs',
    0x047E: 'br',
    0x0402: 'bg',
    0x0403: 'ca',
    0x0C04: 'zh-HK',
    0x1404: 'zh-MO',
    0x0804: 'zh',
    0x1004: 'zh-SG',
    0x0404: 'zh-TW',
    0x0483: 'co',
    0x041A: 'hr',
    0x101A: 'hr-BA',
    0x0405: 'cs',
    0x0406: 'da',
    0x048C: 'prs',
    0x0465: 'dv',
    0x0813: 'nl-BE',
    0x0413: 'nl',
    0x0C09: 'en-AU',
    0x2809: 'en-BZ',
    0x1009: 'en-CA',
    0x2409: 'en-029',
    0x4009: 'en-IN',
    0x1809: 'en-IE',
    0x2009: 'en-JM',
    0x4409: 'en-MY',
    0x1409: 'en-NZ',
    0x3409: 'en-PH',
    0x4809: 'en-SG',
    0x1C09: 'en-ZA',
    0x2C09: 'en-TT',
    0x0809: 'en-GB',
    0x0409: 'en',
    0x3009: 'en-ZW',
    0x0425: 'et',
    0x0438: 'fo',
    0x0464: 'fil',
    0x040B: 'fi',
    0x080C: 'fr-BE',
    0x0C0C: 'fr-CA',
    0x040C: 'fr',
    0x140C: 'fr-LU',
    0x180C: 'fr-MC',
    0x100C: 'fr-CH',
    0x0462: 'fy',
    0x0456: 'gl',
    0x0437: 'ka',
    0x0C07: 'de-AT',
    0x0407: 'de',
    0x1407: 'de-LI',
    0x1007: 'de-LU',
    0x0807: 'de-CH',
    0x0408: 'el',
    0x046F: 'kl',
    0x0447: 'gu',
    0x0468: 'ha',
    0x040D: 'he',
    0x0439: 'hi',
    0x040E: 'hu',
    0x040F: 'is',
    0x0470: 'ig',
    0x0421: 'id',
    0x045D: 'iu',
    0x085D: 'iu-Latn',
    0x083C: 'ga',
    0x0434: 'xh',
    0x0435: 'zu',
    0x0410: 'it',
    0x0810: 'it-CH',
    0x0411: 'ja',
    0x044B: 'kn',
    0x043F: 'kk',
    0x0453: 'km',
    0x0486: 'quc',
    0x0487: 'rw',
    0x0441: 'sw',
    0x0457: 'kok',
    0x0412: 'ko',
    0x0440: 'ky',
    0x0454: 'lo',
    0x0426: 'lv',
    0x0427: 'lt',
    0x082E: 'dsb',
    0x046E: 'lb',
    0x042F: 'mk',
    0x083E: 'ms-BN',
    0x043E: 'ms',
    0x044C: 'ml',
    0x043A: 'mt',
    0x0481: 'mi',
    0x047A: 'arn',
    0x044E: 'mr',
    0x047C: 'moh',
    0x0450: 'mn',
    0x0850: 'mn-CN',
    0x0461: 'ne',
    0x0414: 'nb',
    0x0814: 'nn',
    0x0482: 'oc',
    0x0448: 'or',
    0x0463: 'ps',
    0x0415: 'pl',
    0x0416: 'pt',
    0x0816: 'pt-PT',
    0x0446: 'pa',
    0x046B: 'qu-BO',
    0x086B: 'qu-EC',
    0x0C6B: 'qu',
    0x0418: 'ro',
    0x0417: 'rm',
    0x0419: 'ru',
    0x243B: 'smn',
    0x103B: 'smj-NO',
    0x143B: 'smj',
    0x0C3B: 'se-FI',
    0x043B: 'se',
    0x083B: 'se-SE',
    0x203B: 'sms',
    0x183B: 'sma-NO',
    0x1C3B: 'sms',
    0x044F: 'sa',
    0x1C1A: 'sr-Cyrl-BA',
    0x0C1A: 'sr',
    0x181A: 'sr-Latn-BA',
    0x081A: 'sr-Latn',
    0x046C: 'nso',
    0x0432: 'tn',
    0x045B: 'si',
    0x041B: 'sk',
    0x0424: 'sl',
    0x2C0A: 'es-AR',
    0x400A: 'es-BO',
    0x340A: 'es-CL',
    0x240A: 'es-CO',
    0x140A: 'es-CR',
    0x1C0A: 'es-DO',
    0x300A: 'es-EC',
    0x440A: 'es-SV',
    0x100A: 'es-GT',
    0x480A: 'es-HN',
    0x080A: 'es-MX',
    0x4C0A: 'es-NI',
    0x180A: 'es-PA',
    0x3C0A: 'es-PY',
    0x280A: 'es-PE',
    0x500A: 'es-PR',

    # Microsoft has defined two different language codes for
    # “Spanish with modern sorting” and “Spanish with traditional
    # sorting”. This makes sense for collation APIs, and it would be
    # possible to express this in BCP 47 language tags via Unicode
    # extensions (eg., “es-u-co-trad” is “Spanish with traditional
    # sorting”). However, for storing names in fonts, this distinction
    # does not make sense, so we use “es” in both cases.
    0x0C0A: 'es',
    0x040A: 'es',

    0x540A: 'es-US',
    0x380A: 'es-UY',
    0x200A: 'es-VE',
    0x081D: 'sv-FI',
    0x041D: 'sv',
    0x045A: 'syr',
    0x0428: 'tg',
    0x085F: 'tzm',
    0x0449: 'ta',
    0x0444: 'tt',
    0x044A: 'te',
    0x041E: 'th',
    0x0451: 'bo',
    0x041F: 'tr',
    0x0442: 'tk',
    0x0480: 'ug',
    0x0422: 'uk',
    0x042E: 'hsb',
    0x0420: 'ur',
    0x0843: 'uz-Cyrl',
    0x0443: 'uz',
    0x042A: 'vi',
    0x0452: 'cy',
    0x0488: 'wo',
    0x0485: 'sah',
    0x0478: 'ii',
    0x046A: 'yo',
}


_MAC_LANGUAGES = {
    0: 'en',
    1: 'fr',
    2: 'de',
    3: 'it',
    4: 'nl',
    5: 'sv',
    6: 'es',
    7: 'da',
    8: 'pt',
    9: 'no',
    10: 'he',
    11: 'ja',
    12: 'ar',
    13: 'fi',
    14: 'el',
    15: 'is',
    16: 'mt',
    17: 'tr',
    18: 'hr',
    19: 'zh-Hant',
    20: 'ur',
    21: 'hi',
    22: 'th',
    23: 'ko',
    24: 'lt',
    25: 'pl',
    26: 'hu',
    27: 'es',
    28: 'lv',
    29: 'se',
    30: 'fo',
    31: 'fa',
    32: 'ru',
    33: 'zh',
    34: 'nl-BE',
    35: 'ga',
    36: 'sq',
    37: 'ro',
    38: 'cz',
    39: 'sk',
    40: 'sl',
    41: 'yi',
    42: 'sr',
    43: 'mk',
    44: 'bg',
    45: 'uk',
    46: 'be',
    47: 'uz',
    48: 'kk',
    49: 'az-Cyrl',
    50: 'az-Arab',
    51: 'hy',
    52: 'ka',
    53: 'mo',
    54: 'ky',
    55: 'tg',
    56: 'tk',
    57: 'mn-CN',
    58: 'mn',
    59: 'ps',
    60: 'ks',
    61: 'ku',
    62: 'sd',
    63: 'bo',
    64: 'ne',
    65: 'sa',
    66: 'mr',
    67: 'bn',
    68: 'as',
    69: 'gu',
    70: 'pa',
    71: 'or',
    72: 'ml',
    73: 'kn',
    74: 'ta',
    75: 'te',
    76: 'si',
    77: 'my',
    78: 'km',
    79: 'lo',
    80: 'vi',
    81: 'id',
    82: 'tl',
    83: 'ms',
    84: 'ms-Arab',
    85: 'am',
    86: 'ti',
    87: 'om',
    88: 'so',
    89: 'sw',
    90: 'rw',
    91: 'rn',
    92: 'ny',
    93: 'mg',
    94: 'eo',
    128: 'cy',
    129: 'eu',
    130: 'ca',
    131: 'la',
    132: 'qu',
    133: 'gn',
    134: 'ay',
    135: 'tt',
    136: 'ug',
    137: 'dz',
    138: 'jv',
    139: 'su',
    140: 'gl',
    141: 'af',
    142: 'br',
    143: 'iu',
    144: 'gd',
    145: 'gv',
    146: 'ga',
    147: 'to',
    148: 'el-polyton',
    149: 'kl',
    150: 'az',
    151: 'nn',
}


_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()}
_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}


# MacOS language ID → MacOS script ID
#
# Note that the script ID is not sufficient to determine what encoding
# to use in TrueType files. For some languages, MacOS used a modification
# of a mainstream script. For example, an Icelandic name would be stored
# with smRoman in the TrueType naming table, but the actual encoding
# is a special Icelandic version of the normal Macintosh Roman encoding.
# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
# Syllables but MacOS had run out of available script codes, so this was
# done as a (pretty radical) “modification” of Ethiopic.
#
# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
_MAC_LANGUAGE_TO_SCRIPT = {
    0: 0,  # langEnglish → smRoman
    1: 0,  # langFrench → smRoman
    2: 0,  # langGerman → smRoman
    3: 0,  # langItalian → smRoman
    4: 0,  # langDutch → smRoman
    5: 0,  # langSwedish → smRoman
    6: 0,  # langSpanish → smRoman
    7: 0,  # langDanish → smRoman
    8: 0,  # langPortuguese → smRoman
    9: 0,  # langNorwegian → smRoman
    10: 5,  # langHebrew → smHebrew
    11: 1,  # langJapanese → smJapanese
    12: 4,  # langArabic → smArabic
    13: 0,  # langFinnish → smRoman
    14: 6,  # langGreek → smGreek
    15: 0,  # langIcelandic → smRoman (modified)
    16: 0,  # langMaltese → smRoman
    17: 0,  # langTurkish → smRoman (modified)
    18: 0,  # langCroatian → smRoman (modified)
    19: 2,  # langTradChinese → smTradChinese
    20: 4,  # langUrdu → smArabic
    21: 9,  # langHindi → smDevanagari
    22: 21,  # langThai → smThai
    23: 3,  # langKorean → smKorean
    24: 29,  # langLithuanian → smCentralEuroRoman
    25: 29,  # langPolish → smCentralEuroRoman
    26: 29,  # langHungarian → smCentralEuroRoman
    27: 29,  # langEstonian → smCentralEuroRoman
    28: 29,  # langLatvian → smCentralEuroRoman
    29: 0,  # langSami → smRoman
    30: 0,  # langFaroese → smRoman (modified)
    31: 4,  # langFarsi → smArabic (modified)
    32: 7,  # langRussian → smCyrillic
    33: 25,  # langSimpChinese → smSimpChinese
    34: 0,  # langFlemish → smRoman
    35: 0,  # langIrishGaelic → smRoman (modified)
    36: 0,  # langAlbanian → smRoman
    37: 0,  # langRomanian → smRoman (modified)
    38: 29,  # langCzech → smCentralEuroRoman
    39: 29,  # langSlovak → smCentralEuroRoman
    40: 0,  # langSlovenian → smRoman (modified)
    41: 5,  # langYiddish → smHebrew
    42: 7,  # langSerbian → smCyrillic
    43: 7,  # langMacedonian → smCyrillic
    44: 7,  # langBulgarian → smCyrillic
    45: 7,  # langUkrainian → smCyrillic (modified)
    46: 7,  # langByelorussian → smCyrillic
    47: 7,  # langUzbek → smCyrillic
    48: 7,  # langKazakh → smCyrillic
    49: 7,  # langAzerbaijani → smCyrillic
    50: 4,  # langAzerbaijanAr → smArabic
    51: 24,  # langArmenian → smArmenian
    52: 23,  # langGeorgian → smGeorgian
    53: 7,  # langMoldavian → smCyrillic
    54: 7,  # langKirghiz → smCyrillic
    55: 7,  # langTajiki → smCyrillic
    56: 7,  # langTurkmen → smCyrillic
    57: 27,  # langMongolian → smMongolian
    58: 7,  # langMongolianCyr → smCyrillic
    59: 4,  # langPashto → smArabic
    60: 4,  # langKurdish → smArabic
    61: 4,  # langKashmiri → smArabic
    62: 4,  # langSindhi → smArabic
    63: 26,  # langTibetan → smTibetan
    64: 9,  # langNepali → smDevanagari
    65: 9,  # langSanskrit → smDevanagari
    66: 9,  # langMarathi → smDevanagari
    67: 13,  # langBengali → smBengali
    68: 13,  # langAssamese → smBengali
    69: 11,  # langGujarati → smGujarati
    70: 10,  # langPunjabi → smGurmukhi
    71: 12,  # langOriya → smOriya
    72: 17,  # langMalayalam → smMalayalam
    73: 16,  # langKannada → smKannada
    74: 14,  # langTamil → smTamil
    75: 15,  # langTelugu → smTelugu
    76: 18,  # langSinhalese → smSinhalese
    77: 19,  # langBurmese → smBurmese
    78: 20,  # langKhmer → smKhmer
    79: 22,  # langLao → smLao
    80: 30,  # langVietnamese → smVietnamese
    81: 0,  # langIndonesian → smRoman
    82: 0,  # langTagalog → smRoman
    83: 0,  # langMalayRoman → smRoman
    84: 4,  # langMalayArabic → smArabic
    85: 28,  # langAmharic → smEthiopic
    86: 28,  # langTigrinya → smEthiopic
    87: 28,  # langOromo → smEthiopic
    88: 0,  # langSomali → smRoman
    89: 0,  # langSwahili → smRoman
    90: 0,  # langKinyarwanda → smRoman
    91: 0,  # langRundi → smRoman
    92: 0,  # langNyanja → smRoman
    93: 0,  # langMalagasy → smRoman
    94: 0,  # langEsperanto → smRoman
    128: 0,  # langWelsh → smRoman (modified)
    129: 0,  # langBasque → smRoman
    130: 0,  # langCatalan → smRoman
    131: 0,  # langLatin → smRoman
    132: 0,  # langQuechua → smRoman
    133: 0,  # langGuarani → smRoman
    134: 0,  # langAymara → smRoman
    135: 7,  # langTatar → smCyrillic
    136: 4,  # langUighur → smArabic
    137: 26,  # langDzongkha → smTibetan
    138: 0,  # langJavaneseRom → smRoman
    139: 0,  # langSundaneseRom → smRoman
    140: 0,  # langGalician → smRoman
    141: 0,  # langAfrikaans → smRoman
    142: 0,  # langBreton → smRoman (modified)
    143: 28,  # langInuktitut → smEthiopic (modified)
    144: 0,  # langScottishGaelic → smRoman (modified)
    145: 0,  # langManxGaelic → smRoman (modified)
    146: 0,  # langIrishGaelicScript → smRoman (modified)
    147: 0,  # langTongan → smRoman
    148: 6,  # langGreekAncient → smRoman
    149: 0,  # langGreenlandic → smRoman
    150: 0,  # langAzerbaijanRoman → smRoman
    151: 0,   # langNynorsk → smRoman
}
source-git / fonttools

Source Code

Files