Blame src/gen-use-table.py

Packit Service 5bcba8
#!/usr/bin/python
Packit Service 5bcba8
Packit Service 5bcba8
import sys
Packit Service 5bcba8
Packit Service 5bcba8
if len (sys.argv) != 5:
Packit Service 5bcba8
	print >>sys.stderr, "usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt"
Packit Service 5bcba8
	sys.exit (1)
Packit Service 5bcba8
Packit Service 5bcba8
BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
Packit Service 5bcba8
Packit Service 5bcba8
files = [file (x) for x in sys.argv[1:]]
Packit Service 5bcba8
Packit Service 5bcba8
headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2]
Packit Service 5bcba8
headers.append (["UnicodeData.txt does not have a header."])
Packit Service 5bcba8
Packit Service 5bcba8
data = [{} for f in files]
Packit Service 5bcba8
values = [{} for f in files]
Packit Service 5bcba8
for i, f in enumerate (files):
Packit Service 5bcba8
	for line in f:
Packit Service 5bcba8
Packit Service 5bcba8
		j = line.find ('#')
Packit Service 5bcba8
		if j >= 0:
Packit Service 5bcba8
			line = line[:j]
Packit Service 5bcba8
Packit Service 5bcba8
		fields = [x.strip () for x in line.split (';')]
Packit Service 5bcba8
		if len (fields) == 1:
Packit Service 5bcba8
			continue
Packit Service 5bcba8
Packit Service 5bcba8
		uu = fields[0].split ('..')
Packit Service 5bcba8
		start = int (uu[0], 16)
Packit Service 5bcba8
		if len (uu) == 1:
Packit Service 5bcba8
			end = start
Packit Service 5bcba8
		else:
Packit Service 5bcba8
			end = int (uu[1], 16)
Packit Service 5bcba8
Packit Service 5bcba8
		t = fields[1 if i != 2 else 2]
Packit Service 5bcba8
Packit Service 5bcba8
		for u in range (start, end + 1):
Packit Service 5bcba8
			data[i][u] = t
Packit Service 5bcba8
		values[i][t] = values[i].get (t, 0) + end - start + 1
Packit Service 5bcba8
Packit Service 5bcba8
defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
Packit Service 5bcba8
Packit Service 5bcba8
# TODO Characters that are not in Unicode Indic files, but used in USE
Packit Service 5bcba8
data[0][0x034F] = defaults[0]
Packit Service 5bcba8
data[0][0x2060] = defaults[0]
Packit Service 5bcba8
for u in range (0xFE00, 0xFE0F + 1):
Packit Service 5bcba8
	data[0][u] = defaults[0]
Packit Service 5bcba8
Packit Service 5bcba8
# Merge data into one dict:
Packit Service 5bcba8
for i,v in enumerate (defaults):
Packit Service 5bcba8
	values[i][v] = values[i].get (v, 0) + 1
Packit Service 5bcba8
combined = {}
Packit Service 5bcba8
for i,d in enumerate (data):
Packit Service 5bcba8
	for u,v in d.items ():
Packit Service 5bcba8
		if i >= 2 and not u in combined:
Packit Service 5bcba8
			continue
Packit Service 5bcba8
		if not u in combined:
Packit Service 5bcba8
			combined[u] = list (defaults)
Packit Service 5bcba8
		combined[u][i] = v
Packit Service 5bcba8
combined = {k:v for k,v in combined.items() if v[3] not in BLACKLISTED_BLOCKS}
Packit Service 5bcba8
data = combined
Packit Service 5bcba8
del combined
Packit Service 5bcba8
num = len (data)
Packit Service 5bcba8
Packit Service 5bcba8
Packit Service 5bcba8
property_names = [
Packit Service 5bcba8
	# General_Category
Packit Service 5bcba8
	'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc',
Packit Service 5bcba8
	'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po',
Packit Service 5bcba8
	'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',
Packit Service 5bcba8
	# Indic_Syllabic_Category
Packit Service 5bcba8
	'Other',
Packit Service 5bcba8
	'Bindu',
Packit Service 5bcba8
	'Visarga',
Packit Service 5bcba8
	'Avagraha',
Packit Service 5bcba8
	'Nukta',
Packit Service 5bcba8
	'Virama',
Packit Service 5bcba8
	'Pure_Killer',
Packit Service 5bcba8
	'Invisible_Stacker',
Packit Service 5bcba8
	'Vowel_Independent',
Packit Service 5bcba8
	'Vowel_Dependent',
Packit Service 5bcba8
	'Vowel',
Packit Service 5bcba8
	'Consonant_Placeholder',
Packit Service 5bcba8
	'Consonant',
Packit Service 5bcba8
	'Consonant_Dead',
Packit Service 5bcba8
	'Consonant_With_Stacker',
Packit Service 5bcba8
	'Consonant_Prefixed',
Packit Service 5bcba8
	'Consonant_Preceding_Repha',
Packit Service 5bcba8
	'Consonant_Succeeding_Repha',
Packit Service 5bcba8
	'Consonant_Subjoined',
Packit Service 5bcba8
	'Consonant_Medial',
Packit Service 5bcba8
	'Consonant_Final',
Packit Service 5bcba8
	'Consonant_Head_Letter',
Packit Service 5bcba8
	'Modifying_Letter',
Packit Service 5bcba8
	'Tone_Letter',
Packit Service 5bcba8
	'Tone_Mark',
Packit Service 5bcba8
	'Gemination_Mark',
Packit Service 5bcba8
	'Cantillation_Mark',
Packit Service 5bcba8
	'Register_Shifter',
Packit Service 5bcba8
	'Syllable_Modifier',
Packit Service 5bcba8
	'Consonant_Killer',
Packit Service 5bcba8
	'Non_Joiner',
Packit Service 5bcba8
	'Joiner',
Packit Service 5bcba8
	'Number_Joiner',
Packit Service 5bcba8
	'Number',
Packit Service 5bcba8
	'Brahmi_Joining_Number',
Packit Service 5bcba8
	# Indic_Positional_Category
Packit Service 5bcba8
	'Not_Applicable',
Packit Service 5bcba8
	'Right',
Packit Service 5bcba8
	'Left',
Packit Service 5bcba8
	'Visual_Order_Left',
Packit Service 5bcba8
	'Left_And_Right',
Packit Service 5bcba8
	'Top',
Packit Service 5bcba8
	'Bottom',
Packit Service 5bcba8
	'Top_And_Bottom',
Packit Service 5bcba8
	'Top_And_Right',
Packit Service 5bcba8
	'Top_And_Left',
Packit Service 5bcba8
	'Top_And_Left_And_Right',
Packit Service 5bcba8
	'Bottom_And_Right',
Packit Service 5bcba8
	'Top_And_Bottom_And_Right',
Packit Service 5bcba8
	'Overstruck',
Packit Service 5bcba8
]
Packit Service 5bcba8
Packit Service 5bcba8
class PropertyValue(object):
Packit Service 5bcba8
	def __init__(self, name_):
Packit Service 5bcba8
		self.name = name_
Packit Service 5bcba8
	def __str__(self):
Packit Service 5bcba8
		return self.name
Packit Service 5bcba8
	def __eq__(self, other):
Packit Service 5bcba8
		return self.name == (other if isinstance(other, basestring) else other.name)
Packit Service 5bcba8
	def __ne__(self, other):
Packit Service 5bcba8
		return not (self == other)
Packit Service 5bcba8
Packit Service 5bcba8
property_values = {}
Packit Service 5bcba8
Packit Service 5bcba8
for name in property_names:
Packit Service 5bcba8
	value = PropertyValue(name)
Packit Service 5bcba8
	assert value not in property_values
Packit Service 5bcba8
	assert value not in globals()
Packit Service 5bcba8
	property_values[name] = value
Packit Service 5bcba8
globals().update(property_values)
Packit Service 5bcba8
Packit Service 5bcba8
Packit Service 5bcba8
def is_BASE(U, UISC, UGC):
Packit Service 5bcba8
	return (UISC in [Number, Consonant, Consonant_Head_Letter,
Packit Service 5bcba8
			#SPEC-DRAFT Consonant_Placeholder,
Packit Service 5bcba8
			Tone_Letter,
Packit Service 5bcba8
			Vowel_Independent #SPEC-DRAFT
Packit Service 5bcba8
			] or
Packit Service 5bcba8
		(UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
Packit Service 5bcba8
					Consonant_Subjoined, Vowel, Vowel_Dependent]))
Packit Service 5bcba8
def is_BASE_IND(U, UISC, UGC):
Packit Service 5bcba8
	#SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
Packit Service 5bcba8
	return (UISC in [Consonant_Dead, Modifying_Letter] or
Packit Service 5bcba8
		(UGC == Po and not U in [0x104E, 0x2022]) or
Packit Service 5bcba8
		False # SPEC-DRAFT-OUTDATED! U == 0x002D
Packit Service 5bcba8
		)
Packit Service 5bcba8
def is_BASE_NUM(U, UISC, UGC):
Packit Service 5bcba8
	return UISC == Brahmi_Joining_Number
Packit Service 5bcba8
def is_BASE_OTHER(U, UISC, UGC):
Packit Service 5bcba8
	if UISC == Consonant_Placeholder: return True #SPEC-DRAFT
Packit Service 5bcba8
	#SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
Packit Service 5bcba8
	return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
Packit Service 5bcba8
def is_CGJ(U, UISC, UGC):
Packit Service 5bcba8
	return U == 0x034F
Packit Service 5bcba8
def is_CONS_FINAL(U, UISC, UGC):
Packit Service 5bcba8
	return ((UISC == Consonant_Final and UGC != Lo) or
Packit Service 5bcba8
		UISC == Consonant_Succeeding_Repha)
Packit Service 5bcba8
def is_CONS_FINAL_MOD(U, UISC, UGC):
Packit Service 5bcba8
	#SPEC-DRAFT return  UISC in [Consonant_Final_Modifier, Syllable_Modifier]
Packit Service 5bcba8
	return  UISC == Syllable_Modifier
Packit Service 5bcba8
def is_CONS_MED(U, UISC, UGC):
Packit Service 5bcba8
	return UISC == Consonant_Medial and UGC != Lo
Packit Service 5bcba8
def is_CONS_MOD(U, UISC, UGC):
Packit Service 5bcba8
	return UISC in [Nukta, Gemination_Mark, Consonant_Killer]
Packit Service 5bcba8
def is_CONS_SUB(U, UISC, UGC):
Packit Service 5bcba8
	#SPEC-DRAFT return UISC == Consonant_Subjoined
Packit Service 5bcba8
	return UISC == Consonant_Subjoined and UGC != Lo
Packit Service 5bcba8
def is_HALANT(U, UISC, UGC):
Packit Service 5bcba8
	return UISC in [Virama, Invisible_Stacker]
Packit Service 5bcba8
def is_HALANT_NUM(U, UISC, UGC):
Packit Service 5bcba8
	return UISC == Number_Joiner
Packit Service 5bcba8
def is_ZWNJ(U, UISC, UGC):
Packit Service 5bcba8
	return UISC == Non_Joiner
Packit Service 5bcba8
def is_ZWJ(U, UISC, UGC):
Packit Service 5bcba8
	return UISC == Joiner
Packit Service 5bcba8
def is_Word_Joiner(U, UISC, UGC):
Packit Service 5bcba8
	return U == 0x2060
Packit Service 5bcba8
def is_OTHER(U, UISC, UGC):
Packit Service 5bcba8
	#SPEC-OUTDATED return UGC == Zs # or any other SCRIPT_COMMON characters
Packit Service 5bcba8
	return (UISC == Other
Packit Service 5bcba8
		and not is_SYM_MOD(U, UISC, UGC)
Packit Service 5bcba8
		and not is_CGJ(U, UISC, UGC)
Packit Service 5bcba8
		and not is_Word_Joiner(U, UISC, UGC)
Packit Service 5bcba8
		and not is_VARIATION_SELECTOR(U, UISC, UGC)
Packit Service 5bcba8
	)
Packit Service 5bcba8
def is_Reserved(U, UISC, UGC):
Packit Service 5bcba8
	return UGC == 'Cn'
Packit Service 5bcba8
def is_REPHA(U, UISC, UGC):
Packit Service 5bcba8
	#return UISC == Consonant_Preceding_Repha
Packit Service 5bcba8
	#SPEC-OUTDATED hack to categorize Consonant_With_Stacker and Consonant_Prefixed
Packit Service 5bcba8
	return UISC in [Consonant_Preceding_Repha, Consonant_With_Stacker, Consonant_Prefixed]
Packit Service 5bcba8
def is_SYM(U, UISC, UGC):
Packit Service 5bcba8
	if U == 0x25CC: return False #SPEC-DRAFT
Packit Service 5bcba8
	#SPEC-DRAFT return UGC in [So, Sc] or UISC == Symbol_Letter
Packit Service 5bcba8
	return UGC in [So, Sc]
Packit Service 5bcba8
def is_SYM_MOD(U, UISC, UGC):
Packit Service 5bcba8
	return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
Packit Service 5bcba8
def is_VARIATION_SELECTOR(U, UISC, UGC):
Packit Service 5bcba8
	return 0xFE00 <= U <= 0xFE0F
Packit Service 5bcba8
def is_VOWEL(U, UISC, UGC):
Packit Service 5bcba8
	# https://github.com/roozbehp/unicode-data/issues/6
Packit Service 5bcba8
	return (UISC == Pure_Killer or
Packit Service 5bcba8
		(UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29]))
Packit Service 5bcba8
def is_VOWEL_MOD(U, UISC, UGC):
Packit Service 5bcba8
	# https://github.com/roozbehp/unicode-data/issues/6
Packit Service 5bcba8
	return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
Packit Service 5bcba8
		(UGC != Lo and (UISC == Bindu or U in [0xAA29])))
Packit Service 5bcba8
Packit Service 5bcba8
use_mapping = {
Packit Service 5bcba8
	'B':	is_BASE,
Packit Service 5bcba8
	'IND':	is_BASE_IND,
Packit Service 5bcba8
	'N':	is_BASE_NUM,
Packit Service 5bcba8
	'GB':	is_BASE_OTHER,
Packit Service 5bcba8
	'CGJ':	is_CGJ,
Packit Service 5bcba8
	'F':	is_CONS_FINAL,
Packit Service 5bcba8
	'FM':	is_CONS_FINAL_MOD,
Packit Service 5bcba8
	'M':	is_CONS_MED,
Packit Service 5bcba8
	'CM':	is_CONS_MOD,
Packit Service 5bcba8
	'SUB':	is_CONS_SUB,
Packit Service 5bcba8
	'H':	is_HALANT,
Packit Service 5bcba8
	'HN':	is_HALANT_NUM,
Packit Service 5bcba8
	'ZWNJ':	is_ZWNJ,
Packit Service 5bcba8
	'ZWJ':	is_ZWJ,
Packit Service 5bcba8
	'WJ':	is_Word_Joiner,
Packit Service 5bcba8
	'O':	is_OTHER,
Packit Service 5bcba8
	'Rsv':	is_Reserved,
Packit Service 5bcba8
	'R':	is_REPHA,
Packit Service 5bcba8
	'S':	is_SYM,
Packit Service 5bcba8
	'SM':	is_SYM_MOD,
Packit Service 5bcba8
	'VS':	is_VARIATION_SELECTOR,
Packit Service 5bcba8
	'V':	is_VOWEL,
Packit Service 5bcba8
	'VM':	is_VOWEL_MOD,
Packit Service 5bcba8
}
Packit Service 5bcba8
Packit Service 5bcba8
use_positions = {
Packit Service 5bcba8
	'F': {
Packit Service 5bcba8
		'Abv': [Top],
Packit Service 5bcba8
		'Blw': [Bottom],
Packit Service 5bcba8
		'Pst': [Right],
Packit Service 5bcba8
	},
Packit Service 5bcba8
	'M': {
Packit Service 5bcba8
		'Abv': [Top],
Packit Service 5bcba8
		'Blw': [Bottom],
Packit Service 5bcba8
		'Pst': [Right],
Packit Service 5bcba8
		'Pre': [Left],
Packit Service 5bcba8
	},
Packit Service 5bcba8
	'CM': {
Packit Service 5bcba8
		'Abv': [Top],
Packit Service 5bcba8
		'Blw': [Bottom],
Packit Service 5bcba8
	},
Packit Service 5bcba8
	'V': {
Packit Service 5bcba8
		'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
Packit Service 5bcba8
		'Blw': [Bottom, Overstruck, Bottom_And_Right],
Packit Service 5bcba8
		'Pst': [Right],
Packit Service 5bcba8
		'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
Packit Service 5bcba8
	},
Packit Service 5bcba8
	'VM': {
Packit Service 5bcba8
		'Abv': [Top],
Packit Service 5bcba8
		'Blw': [Bottom, Overstruck],
Packit Service 5bcba8
		'Pst': [Right],
Packit Service 5bcba8
		'Pre': [Left],
Packit Service 5bcba8
	},
Packit Service 5bcba8
	'SM': {
Packit Service 5bcba8
		'Abv': [Top],
Packit Service 5bcba8
		'Blw': [Bottom],
Packit Service 5bcba8
	},
Packit Service 5bcba8
	'H': None,
Packit Service 5bcba8
	'B': None,
Packit Service 5bcba8
	'FM': None,
Packit Service 5bcba8
	'SUB': None,
Packit Service 5bcba8
}
Packit Service 5bcba8
Packit Service 5bcba8
def map_to_use(data):
Packit Service 5bcba8
	out = {}
Packit Service 5bcba8
	items = use_mapping.items()
Packit Service 5bcba8
	for U,(UISC,UIPC,UGC,UBlock) in data.items():
Packit Service 5bcba8
Packit Service 5bcba8
		# Resolve Indic_Syllabic_Category
Packit Service 5bcba8
Packit Service 5bcba8
		# TODO: These don't have UISC assigned in Unicode 8.0, but
Packit Service 5bcba8
		# have UIPC
Packit Service 5bcba8
		if U == 0x17DD: UISC = Vowel_Dependent
Packit Service 5bcba8
		if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
Packit Service 5bcba8
Packit Service 5bcba8
		# TODO: U+1CED should only be allowed after some of
Packit Service 5bcba8
		# the nasalization marks, maybe only for U+1CE9..U+1CF1.
Packit Service 5bcba8
		if U == 0x1CED: UISC = Tone_Mark
Packit Service 5bcba8
Packit Service 5bcba8
		evals = [(k, v(U,UISC,UGC)) for k,v in items]
Packit Service 5bcba8
		values = [k for k,v in evals if v]
Packit Service 5bcba8
		assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
Packit Service 5bcba8
		USE = values[0]
Packit Service 5bcba8
Packit Service 5bcba8
		# Resolve Indic_Positional_Category
Packit Service 5bcba8
Packit Service 5bcba8
		# TODO: Not in Unicode 8.0 yet, but in spec.
Packit Service 5bcba8
		if U == 0x1B6C: UIPC = Bottom
Packit Service 5bcba8
Packit Service 5bcba8
		# TODO: These should die, but have UIPC in Unicode 8.0
Packit Service 5bcba8
		if U in [0x953, 0x954]: UIPC = Not_Applicable
Packit Service 5bcba8
Packit Service 5bcba8
		# TODO: In USE's override list but not in Unicode 8.0
Packit Service 5bcba8
		if U == 0x103C: UIPC = Left
Packit Service 5bcba8
Packit Service 5bcba8
		# TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0
Packit Service 5bcba8
		if 0xA926 <= U <= 0xA92A: UIPC = Top
Packit Service 5bcba8
		if U == 0x111CA: UIPC = Bottom
Packit Service 5bcba8
		if U == 0x11300: UIPC = Top
Packit Service 5bcba8
		if U == 0x1133C: UIPC = Bottom
Packit Service 5bcba8
		if U == 0x1171E: UIPC = Left # Correct?!
Packit Service 5bcba8
		if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
Packit Service 5bcba8
		if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
Packit Service 5bcba8
Packit Service 5bcba8
		assert (UIPC in [Not_Applicable, Visual_Order_Left] or
Packit Service 5bcba8
			USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
Packit Service 5bcba8
Packit Service 5bcba8
		pos_mapping = use_positions.get(USE, None)
Packit Service 5bcba8
		if pos_mapping:
Packit Service 5bcba8
			values = [k for k,v in pos_mapping.items() if v and UIPC in v]
Packit Service 5bcba8
			assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values)
Packit Service 5bcba8
			USE = USE + values[0]
Packit Service 5bcba8
Packit Service 5bcba8
		out[U] = (USE, UBlock)
Packit Service 5bcba8
	return out
Packit Service 5bcba8
Packit Service 5bcba8
defaults = ('O', 'No_Block')
Packit Service 5bcba8
data = map_to_use(data)
Packit Service 5bcba8
Packit Service 5bcba8
# Remove the outliers
Packit Service 5bcba8
singles = {}
Packit Service 5bcba8
for u in [0x034F, 0x25CC, 0x1107F]:
Packit Service 5bcba8
	singles[u] = data[u]
Packit Service 5bcba8
	del data[u]
Packit Service 5bcba8
Packit Service 5bcba8
print "/* == Start of generated table == */"
Packit Service 5bcba8
print "/*"
Packit Service 5bcba8
print " * The following table is generated by running:"
Packit Service 5bcba8
print " *"
Packit Service 5bcba8
print " *   ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt"
Packit Service 5bcba8
print " *"
Packit Service 5bcba8
print " * on files with these headers:"
Packit Service 5bcba8
print " *"
Packit Service 5bcba8
for h in headers:
Packit Service 5bcba8
	for l in h:
Packit Service 5bcba8
		print " * %s" % (l.strip())
Packit Service 5bcba8
print " */"
Packit Service 5bcba8
print
Packit Service 5bcba8
print '#include "hb-ot-shape-complex-use-private.hh"'
Packit Service 5bcba8
print
Packit Service 5bcba8
Packit Service 5bcba8
total = 0
Packit Service 5bcba8
used = 0
Packit Service 5bcba8
last_block = None
Packit Service 5bcba8
def print_block (block, start, end, data):
Packit Service 5bcba8
	global total, used, last_block
Packit Service 5bcba8
	if block and block != last_block:
Packit Service 5bcba8
		print
Packit Service 5bcba8
		print
Packit Service 5bcba8
		print "  /* %s */" % block
Packit Service 5bcba8
		if start % 16:
Packit Service 5bcba8
			print ' ' * (20 + (start % 16 * 6)),
Packit Service 5bcba8
	num = 0
Packit Service 5bcba8
	assert start % 8 == 0
Packit Service 5bcba8
	assert (end+1) % 8 == 0
Packit Service 5bcba8
	for u in range (start, end+1):
Packit Service 5bcba8
		if u % 16 == 0:
Packit Service 5bcba8
			print
Packit Service 5bcba8
			print "  /* %04X */" % u,
Packit Service 5bcba8
		if u in data:
Packit Service 5bcba8
			num += 1
Packit Service 5bcba8
		d = data.get (u, defaults)
Packit Service 5bcba8
		sys.stdout.write ("%6s," % d[0])
Packit Service 5bcba8
Packit Service 5bcba8
	total += end - start + 1
Packit Service 5bcba8
	used += num
Packit Service 5bcba8
	if block:
Packit Service 5bcba8
		last_block = block
Packit Service 5bcba8
Packit Service 5bcba8
uu = data.keys ()
Packit Service 5bcba8
uu.sort ()
Packit Service 5bcba8
Packit Service 5bcba8
last = -100000
Packit Service 5bcba8
num = 0
Packit Service 5bcba8
offset = 0
Packit Service 5bcba8
starts = []
Packit Service 5bcba8
ends = []
Packit Service 5bcba8
for k,v in sorted(use_mapping.items()):
Packit Service 5bcba8
	if k in use_positions and use_positions[k]: continue
Packit Service 5bcba8
	print "#define %s	USE_%s	/* %s */" % (k, k, v.__name__[3:])
Packit Service 5bcba8
for k,v in sorted(use_positions.items()):
Packit Service 5bcba8
	if not v: continue
Packit Service 5bcba8
	for suf in v.keys():
Packit Service 5bcba8
		tag = k + suf
Packit Service 5bcba8
		print "#define %s	USE_%s" % (tag, tag)
Packit Service 5bcba8
print ""
Packit Service 5bcba8
print "static const USE_TABLE_ELEMENT_TYPE use_table[] = {"
Packit Service 5bcba8
for u in uu:
Packit Service 5bcba8
	if u <= last:
Packit Service 5bcba8
		continue
Packit Service 5bcba8
	block = data[u][1]
Packit Service 5bcba8
Packit Service 5bcba8
	start = u//8*8
Packit Service 5bcba8
	end = start+1
Packit Service 5bcba8
	while end in uu and block == data[end][1]:
Packit Service 5bcba8
		end += 1
Packit Service 5bcba8
	end = (end-1)//8*8 + 7
Packit Service 5bcba8
Packit Service 5bcba8
	if start != last + 1:
Packit Service 5bcba8
		if start - last <= 1+16*3:
Packit Service 5bcba8
			print_block (None, last+1, start-1, data)
Packit Service 5bcba8
			last = start-1
Packit Service 5bcba8
		else:
Packit Service 5bcba8
			if last >= 0:
Packit Service 5bcba8
				ends.append (last + 1)
Packit Service 5bcba8
				offset += ends[-1] - starts[-1]
Packit Service 5bcba8
			print
Packit Service 5bcba8
			print
Packit Service 5bcba8
			print "#define use_offset_0x%04xu %d" % (start, offset)
Packit Service 5bcba8
			starts.append (start)
Packit Service 5bcba8
Packit Service 5bcba8
	print_block (block, start, end, data)
Packit Service 5bcba8
	last = end
Packit Service 5bcba8
ends.append (last + 1)
Packit Service 5bcba8
offset += ends[-1] - starts[-1]
Packit Service 5bcba8
print
Packit Service 5bcba8
print
Packit Service 5bcba8
occupancy = used * 100. / total
Packit Service 5bcba8
page_bits = 12
Packit Service 5bcba8
print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
Packit Service 5bcba8
print
Packit Service 5bcba8
print "USE_TABLE_ELEMENT_TYPE"
Packit Service 5bcba8
print "hb_use_get_categories (hb_codepoint_t u)"
Packit Service 5bcba8
print "{"
Packit Service 5bcba8
print "  switch (u >> %d)" % page_bits
Packit Service 5bcba8
print "  {"
Packit Service 5bcba8
pages = set([u>>page_bits for u in starts+ends+singles.keys()])
Packit Service 5bcba8
for p in sorted(pages):
Packit Service 5bcba8
	print "    case 0x%0Xu:" % p
Packit Service 5bcba8
	for (start,end) in zip (starts, ends):
Packit Service 5bcba8
		if p not in [start>>page_bits, end>>page_bits]: continue
Packit Service 5bcba8
		offset = "use_offset_0x%04xu" % start
Packit Service 5bcba8
		print "      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
Packit Service 5bcba8
	for u,d in singles.items ():
Packit Service 5bcba8
		if p != u>>page_bits: continue
Packit Service 5bcba8
		print "      if (unlikely (u == 0x%04Xu)) return %s;" % (u, d[0])
Packit Service 5bcba8
	print "      break;"
Packit Service 5bcba8
	print ""
Packit Service 5bcba8
print "    default:"
Packit Service 5bcba8
print "      break;"
Packit Service 5bcba8
print "  }"
Packit Service 5bcba8
print "  return USE_O;"
Packit Service 5bcba8
print "}"
Packit Service 5bcba8
print
Packit Service 5bcba8
for k in sorted(use_mapping.keys()):
Packit Service 5bcba8
	if k in use_positions and use_positions[k]: continue
Packit Service 5bcba8
	print "#undef %s" % k
Packit Service 5bcba8
for k,v in sorted(use_positions.items()):
Packit Service 5bcba8
	if not v: continue
Packit Service 5bcba8
	for suf in v.keys():
Packit Service 5bcba8
		tag = k + suf
Packit Service 5bcba8
		print "#undef %s" % tag
Packit Service 5bcba8
print
Packit Service 5bcba8
print "/* == End of generated table == */"
Packit Service 5bcba8
Packit Service 5bcba8
# Maintain at least 50% occupancy in the table */
Packit Service 5bcba8
if occupancy < 50:
Packit Service 5bcba8
	raise Exception ("Table too sparse, please investigate: ", occupancy)