|
Packit |
b89d10 |
#!/usr/bin/python
|
|
Packit |
b89d10 |
# -*- coding: utf-8 -*-
|
|
Packit |
b89d10 |
# make_unicode_egcb_data.py
|
|
Packit |
b89d10 |
# Copyright (c) 2017 K.Kosako
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
import sys
|
|
Packit |
b89d10 |
import re
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
MAX_CODE_POINT = 0x10ffff
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
PR_TOTAL_REG = re.compile("#\s*Total\s+(?:code\s+points|elements):")
|
|
Packit |
b89d10 |
PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
|
|
Packit |
b89d10 |
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
|
|
Packit |
b89d10 |
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
|
|
Packit |
b89d10 |
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
|
|
Packit |
b89d10 |
VERSION_REG = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt")
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
VERSION_INFO = None
|
|
Packit |
b89d10 |
DIC = { }
|
|
Packit |
b89d10 |
PROPS = []
|
|
Packit |
b89d10 |
PropIndex = { }
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def check_version_info(s):
|
|
Packit |
b89d10 |
global VERSION_INFO
|
|
Packit |
b89d10 |
m = VERSION_REG.match(s)
|
|
Packit |
b89d10 |
if m is not None:
|
|
Packit |
b89d10 |
VERSION_INFO = m.group(1)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def print_ranges(ranges):
|
|
Packit |
b89d10 |
for (start, end) in ranges:
|
|
Packit |
b89d10 |
print "0x%06x, 0x%06x" % (start, end)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def print_prop_and_index(prop, i):
|
|
Packit |
b89d10 |
print "%-35s %3d" % (prop + ',', i)
|
|
Packit |
b89d10 |
PropIndex[prop] = i
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def dic_find_by_value(dic, v):
|
|
Packit |
b89d10 |
for key, val in dic.items():
|
|
Packit |
b89d10 |
if val == v:
|
|
Packit |
b89d10 |
return key
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return None
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def normalize_ranges(in_ranges, sort=False):
|
|
Packit |
b89d10 |
if sort:
|
|
Packit |
b89d10 |
ranges = sorted(in_ranges)
|
|
Packit |
b89d10 |
else:
|
|
Packit |
b89d10 |
ranges = in_ranges
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r = []
|
|
Packit |
b89d10 |
prev = None
|
|
Packit |
b89d10 |
for (start, end) in ranges:
|
|
Packit |
b89d10 |
if prev >= start - 1:
|
|
Packit |
b89d10 |
(pstart, pend) = r.pop()
|
|
Packit |
b89d10 |
end = max(pend, end)
|
|
Packit |
b89d10 |
start = pstart
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r.append((start, end))
|
|
Packit |
b89d10 |
prev = end
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def inverse_ranges(in_ranges):
|
|
Packit |
b89d10 |
r = []
|
|
Packit |
b89d10 |
prev = 0x000000
|
|
Packit |
b89d10 |
for (start, end) in in_ranges:
|
|
Packit |
b89d10 |
if prev < start:
|
|
Packit |
b89d10 |
r.append((prev, start - 1))
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
prev = end + 1
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if prev < MAX_CODE_POINT:
|
|
Packit |
b89d10 |
r.append((prev, MAX_CODE_POINT))
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def add_ranges(r1, r2):
|
|
Packit |
b89d10 |
r = r1 + r2
|
|
Packit |
b89d10 |
return normalize_ranges(r, True)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def sub_one_range(one_range, rs):
|
|
Packit |
b89d10 |
r = []
|
|
Packit |
b89d10 |
(s1, e1) = one_range
|
|
Packit |
b89d10 |
n = len(rs)
|
|
Packit |
b89d10 |
for i in range(0, n):
|
|
Packit |
b89d10 |
(s2, e2) = rs[i]
|
|
Packit |
b89d10 |
if s2 >= s1 and s2 <= e1:
|
|
Packit |
b89d10 |
if s2 > s1:
|
|
Packit |
b89d10 |
r.append((s1, s2 - 1))
|
|
Packit |
b89d10 |
if e2 >= e1:
|
|
Packit |
b89d10 |
return r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
s1 = e2 + 1
|
|
Packit |
b89d10 |
elif s2 < s1 and e2 >= s1:
|
|
Packit |
b89d10 |
if e2 < e1:
|
|
Packit |
b89d10 |
s1 = e2 + 1
|
|
Packit |
b89d10 |
else:
|
|
Packit |
b89d10 |
return r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
r.append((s1, e1))
|
|
Packit |
b89d10 |
return r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def sub_ranges(r1, r2):
|
|
Packit |
b89d10 |
r = []
|
|
Packit |
b89d10 |
for one_range in r1:
|
|
Packit |
b89d10 |
rs = sub_one_range(one_range, r2)
|
|
Packit |
b89d10 |
r.extend(rs)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def add_ranges_in_dic(dic):
|
|
Packit |
b89d10 |
r = []
|
|
Packit |
b89d10 |
for k, v in dic.items():
|
|
Packit |
b89d10 |
r = r + v
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
return normalize_ranges(r, True)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def normalize_ranges_in_dic(dic, sort=False):
|
|
Packit |
b89d10 |
for k, v in dic.items():
|
|
Packit |
b89d10 |
r = normalize_ranges(v, sort)
|
|
Packit |
b89d10 |
dic[k] = r
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def merge_dic(to_dic, from_dic):
|
|
Packit |
b89d10 |
to_keys = to_dic.keys()
|
|
Packit |
b89d10 |
from_keys = from_dic.keys()
|
|
Packit |
b89d10 |
common = list(set(to_keys) & set(from_keys))
|
|
Packit |
b89d10 |
if len(common) != 0:
|
|
Packit |
b89d10 |
print >> sys.stderr, "merge_dic: collision: %s" % sorted(common)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
to_dic.update(from_dic)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def merge_props(to_props, from_props):
|
|
Packit |
b89d10 |
common = list(set(to_props) & set(from_props))
|
|
Packit |
b89d10 |
if len(common) != 0:
|
|
Packit |
b89d10 |
print >> sys.stderr, "merge_props: collision: %s" % sorted(common)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
to_props.extend(from_props)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def add_range_into_dic(dic, name, start, end):
|
|
Packit |
b89d10 |
d = dic.get(name, None)
|
|
Packit |
b89d10 |
if d is None:
|
|
Packit |
b89d10 |
d = [(start, end)]
|
|
Packit |
b89d10 |
dic[name] = d
|
|
Packit |
b89d10 |
else:
|
|
Packit |
b89d10 |
d.append((start, end))
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def list_sub(a, b):
|
|
Packit |
b89d10 |
x = set(a) - set(b)
|
|
Packit |
b89d10 |
return list(x)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
def parse_properties(path):
|
|
Packit |
b89d10 |
with open(path, 'r') as f:
|
|
Packit |
b89d10 |
dic = { }
|
|
Packit |
b89d10 |
prop = None
|
|
Packit |
b89d10 |
props = []
|
|
Packit |
b89d10 |
for line in f:
|
|
Packit |
b89d10 |
s = line.strip()
|
|
Packit |
b89d10 |
if len(s) == 0:
|
|
Packit |
b89d10 |
continue
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
if s[0] == '#':
|
|
Packit |
b89d10 |
if VERSION_INFO is None:
|
|
Packit |
b89d10 |
check_version_info(s)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
m = PR_LINE_REG.match(s)
|
|
Packit |
b89d10 |
if m:
|
|
Packit |
b89d10 |
prop = m.group(3)
|
|
Packit |
b89d10 |
if m.group(2):
|
|
Packit |
b89d10 |
start = int(m.group(1), 16)
|
|
Packit |
b89d10 |
end = int(m.group(2), 16)
|
|
Packit |
b89d10 |
add_range_into_dic(dic, prop, start, end)
|
|
Packit |
b89d10 |
else:
|
|
Packit |
b89d10 |
start = int(m.group(1), 16)
|
|
Packit |
b89d10 |
add_range_into_dic(dic, prop, start, start)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
elif PR_TOTAL_REG.match(s) is not None:
|
|
Packit |
b89d10 |
props.append(prop)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
normalize_ranges_in_dic(dic)
|
|
Packit |
b89d10 |
return (dic, props)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
### main ###
|
|
Packit |
b89d10 |
argv = sys.argv
|
|
Packit |
b89d10 |
argc = len(argv)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
dic, props = parse_properties('GraphemeBreakProperty.txt')
|
|
Packit |
b89d10 |
merge_dic(DIC, dic)
|
|
Packit |
b89d10 |
merge_props(PROPS, props)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
PROPS = sorted(PROPS)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */'
|
|
Packit |
b89d10 |
COPYRIGHT = '''
|
|
Packit |
b89d10 |
/*-
|
|
Packit |
b89d10 |
* Copyright (c) 2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
|
Packit |
b89d10 |
* All rights reserved.
|
|
Packit |
b89d10 |
*
|
|
Packit |
b89d10 |
* Redistribution and use in source and binary forms, with or without
|
|
Packit |
b89d10 |
* modification, are permitted provided that the following conditions
|
|
Packit |
b89d10 |
* are met:
|
|
Packit |
b89d10 |
* 1. Redistributions of source code must retain the above copyright
|
|
Packit |
b89d10 |
* notice, this list of conditions and the following disclaimer.
|
|
Packit |
b89d10 |
* 2. Redistributions in binary form must reproduce the above copyright
|
|
Packit |
b89d10 |
* notice, this list of conditions and the following disclaimer in the
|
|
Packit |
b89d10 |
* documentation and/or other materials provided with the distribution.
|
|
Packit |
b89d10 |
*
|
|
Packit |
b89d10 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
Packit |
b89d10 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
Packit |
b89d10 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
Packit |
b89d10 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
Packit |
b89d10 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
Packit |
b89d10 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
Packit |
b89d10 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
Packit |
b89d10 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
Packit |
b89d10 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
Packit |
b89d10 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
Packit |
b89d10 |
* SUCH DAMAGE.
|
|
Packit |
b89d10 |
*/
|
|
Packit |
b89d10 |
'''.strip()
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
print COPYRIGHT
|
|
Packit |
b89d10 |
print ''
|
|
Packit |
b89d10 |
if VERSION_INFO is not None:
|
|
Packit |
b89d10 |
print "#define GRAPHEME_BREAK_PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
|
|
Packit |
b89d10 |
print ''
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
ranges = []
|
|
Packit |
b89d10 |
for prop in PROPS:
|
|
Packit |
b89d10 |
rs = DIC[prop]
|
|
Packit |
b89d10 |
for (start, end) in rs:
|
|
Packit |
b89d10 |
ranges.append((start, end, prop))
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
ranges = sorted(ranges, key=lambda x: x[0])
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
prev = -1
|
|
Packit |
b89d10 |
for (start, end, prop) in ranges:
|
|
Packit |
b89d10 |
if prev >= start:
|
|
Packit |
b89d10 |
raise ValueError("{2}:{0} - {1} range overlap prev value {3}".format(start, end, prop, prev))
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
print '/*'
|
|
Packit |
b89d10 |
for prop in PROPS:
|
|
Packit |
b89d10 |
print "%s" % prop
|
|
Packit |
b89d10 |
print '*/'
|
|
Packit |
b89d10 |
print ''
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
num_ranges = len(ranges)
|
|
Packit |
b89d10 |
print "static int EGCB_RANGE_NUM = %d;" % num_ranges
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
print 'static EGCB_RANGE_TYPE EGCB_RANGES[] = {'
|
|
Packit |
b89d10 |
for i, (start, end, prop) in enumerate(ranges):
|
|
Packit |
b89d10 |
if i == num_ranges - 1:
|
|
Packit |
b89d10 |
comma = ''
|
|
Packit |
b89d10 |
else:
|
|
Packit |
b89d10 |
comma = ','
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
type_name = 'EGCB_' + prop
|
|
Packit |
b89d10 |
print " {0x%06x, 0x%06x, %s }%s" % (start, end, type_name, comma)
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
print '};'
|
|
Packit |
b89d10 |
|
|
Packit |
b89d10 |
sys.exit(0)
|