Tree - source-git/glibc - CentOS Git server

source-git / glibc

Blame localedata/unicode-gen/ctype_compatibility.py

Blob History Raw

Packit	6c4009	`#!/usr/bin/python3`
Packit	6c4009	`# -- coding: utf-8 --`
Packit	6c4009	`# Copyright (C) 2014-2018 Free Software Foundation, Inc.`
Packit	6c4009	`# This file is part of the GNU C Library.`
Packit	6c4009	`#`
Packit	6c4009	`# The GNU C Library is free software; you can redistribute it and/or`
Packit	6c4009	`# modify it under the terms of the GNU Lesser General Public`
Packit	6c4009	`# License as published by the Free Software Foundation; either`
Packit	6c4009	`# version 2.1 of the License, or (at your option) any later version.`
Packit	6c4009	`#`
Packit	6c4009	`# The GNU C Library is distributed in the hope that it will be useful,`
Packit	6c4009	`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	6c4009	`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit	6c4009	`# Lesser General Public License for more details.`
Packit	6c4009	`#`
Packit	6c4009	`# You should have received a copy of the GNU Lesser General Public`
Packit	6c4009	`# License along with the GNU C Library; if not, see`
Packit	6c4009	`# <http://www.gnu.org/licenses/>.`
Packit	6c4009
Packit	6c4009	`'''`
Packit	6c4009	`This script is useful for checking the differences between`
Packit	6c4009	`an old LC_CTYPE file /usr/share/i18n/locale/i18n and a`
Packit	6c4009	`new one generated by gen_unicode_ctype.py`
Packit	6c4009
Packit	6c4009	`To see how it is used, call it with the “-h” option:`
Packit	6c4009
Packit	6c4009	`$ ./ctype_compatibility.py -h`
Packit	6c4009	`… prints usage message …`
Packit	6c4009	`'''`
Packit	6c4009
Packit	6c4009	`import sys`
Packit	6c4009	`import re`
Packit	6c4009	`import unicodedata`
Packit	6c4009	`import argparse`
Packit	6c4009
Packit	6c4009	`from ctype_compatibility_test_cases import TEST_CASES`
Packit	6c4009
Packit	6c4009	`def get_lines_from_file(filename):`
Packit	6c4009	`'''Get all non-comment lines from a i18n file`
Packit	6c4009
Packit	6c4009	`Also merge all lines which are continued on the next line because`
Packit	6c4009	`they end in “/” into a single line.`
Packit	6c4009	`'''`
Packit	6c4009	`with open(filename) as i18n_file:`
Packit	6c4009	`current_line = ''`
Packit	6c4009	`for line in i18n_file:`
Packit	6c4009	`line = line.strip('\n')`
Packit	6c4009	`if '%' in line:`
Packit	6c4009	`if line.endswith('/'):`
Packit	6c4009	`line = line[0:line.find('%')] + '/'`
Packit	6c4009	`else:`
Packit	6c4009	`line = line[0:line.find('%')]`
Packit	6c4009	`line = line.strip()`
Packit	6c4009	`if line.endswith('/'):`
Packit	6c4009	`current_line += line[:-1]`
Packit	6c4009	`else:`
Packit	6c4009	`yield current_line + line`
Packit	6c4009	`current_line = ''`
Packit	6c4009	`if current_line: # file ends with a continuation line`
Packit	6c4009	`yield current_line`
Packit	6c4009
Packit	6c4009	`def extract_character_classes(filename):`
Packit	6c4009	`'''Get all Unicode code points for each character class from a file`
Packit	6c4009
Packit	6c4009	`Store these code points in a dictionary using the character classes`
Packit	6c4009	`as keys and the list of code points in this character class as values.`
Packit	6c4009
Packit	6c4009	`In case of the character classes “toupper”, “tolower”, and “totitle”,`
Packit	6c4009	`these area actually pairs of code points`
Packit	6c4009	`'''`
Packit	6c4009	`ctype_dict = {}`
Packit	6c4009	`for line in get_lines_from_file(filename):`
Packit	6c4009	`for char_class in [`
Packit	6c4009	`'upper',`
Packit	6c4009	`'lower',`
Packit	6c4009	`'alpha',`
Packit	6c4009	`'digit',`
Packit	6c4009	`'outdigit',`
Packit	6c4009	`'space',`
Packit	6c4009	`'cntrl',`
Packit	6c4009	`'punct',`
Packit	6c4009	`'graph',`
Packit	6c4009	`'print',`
Packit	6c4009	`'xdigit',`
Packit	6c4009	`'blank',`
Packit	6c4009	`'combining',`
Packit	6c4009	`'combining_level3',`
Packit	6c4009	`'toupper',`
Packit	6c4009	`'tolower',`
Packit	6c4009	`'totitle']:`
Packit	6c4009	`match = re.match(r'^('`
Packit	6c4009	`+'(?:(?:class\|map)\s+")'`
Packit	6c4009	`+re.escape(char_class)+`
Packit	6c4009	`'(?:";)\s+'`
Packit	6c4009	`+'\|'`
Packit	6c4009	`+re.escape(char_class)+'\s+'`
Packit	6c4009	`+')', line)`
Packit	6c4009	`if match:`
Packit	6c4009	`if char_class not in ctype_dict:`
Packit	6c4009	`ctype_dict[char_class] = []`
Packit	6c4009	`process_chars(`
Packit	6c4009	`ctype_dict[char_class],`
Packit	6c4009	`line[match.end():])`
Packit	6c4009	`return ctype_dict`
Packit	6c4009
Packit	6c4009	`def process_chars(char_class_list, code_point_line):`
Packit	6c4009	`'''`
Packit	6c4009	`Extract Unicode values from code_point_line`
Packit	6c4009	`and add to the list of code points in a character class`
Packit	6c4009	`'''`
Packit	6c4009	`for code_points in code_point_line.split(';'):`
Packit	6c4009	`code_points = code_points.strip()`
Packit	6c4009	`match = re.match(r'^<U(?P<codepoint>[0-9A-F]{4,8})>$', code_points)`
Packit	6c4009	`if match: # <Uxxxx>`
Packit	6c4009	`char_class_list.append(`
Packit	6c4009	`int(match.group('codepoint'), 16))`
Packit	6c4009	`continue`
Packit	6c4009	`match = re.match(`
Packit	6c4009	`r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'`
Packit	6c4009	`+'\.\.'+`
Packit	6c4009	`'<U(?P<codepoint2>[0-9A-F]{4,8})>$',`
Packit	6c4009	`code_points)`
Packit	6c4009	`if match: # <Uxxxx>..<Uxxxx>`
Packit	6c4009	`for codepoint in range(`
Packit	6c4009	`int(match.group('codepoint1'), 16),`
Packit	6c4009	`int(match.group('codepoint2'), 16) + 1):`
Packit	6c4009	`char_class_list.append(codepoint)`
Packit	6c4009	`continue`
Packit	6c4009	`match = re.match(`
Packit	6c4009	`r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'`
Packit	6c4009	`+'\.\.$2$\.\.'+`
Packit	6c4009	`'<U(?P<codepoint2>[0-9A-F]{4,8})>$',`
Packit	6c4009	`code_points)`
Packit	6c4009	`if match: # <Uxxxx>..(2)..<Uxxxx>`
Packit	6c4009	`for codepoint in range(`
Packit	6c4009	`int(match.group('codepoint1'), 16),`
Packit	6c4009	`int(match.group('codepoint2'), 16) + 1,`
Packit	6c4009	`2):`
Packit	6c4009	`char_class_list.append(codepoint)`
Packit	6c4009	`continue`
Packit	6c4009	`match = re.match(`
Packit	6c4009	`r'^\('`
Packit	6c4009	`+'<U(?P<codepoint1>[0-9A-F]{4,8})>'`
Packit	6c4009	`+','+`
Packit	6c4009	`'<U(?P<codepoint2>[0-9A-F]{4,8})>'`
Packit	6c4009	`+'\)$',`
Packit	6c4009	`code_points)`
Packit	6c4009	`if match: # (<Uxxxx>,<Uxxxx>)`
Packit	6c4009	`char_class_list.append((`
Packit	6c4009	`int(match.group('codepoint1'), 16),`
Packit	6c4009	`int(match.group('codepoint2'), 16)))`
Packit	6c4009	`continue`
Packit	6c4009	`sys.stderr.write(`
Packit	6c4009	`('None of the regexps matched '`
Packit	6c4009	`+ 'code_points=%(cp)s in code_point_line=%(cpl)s\n') %{`
Packit	6c4009	`'cp': code_points,`
Packit	6c4009	`'cpl': code_point_line`
Packit	6c4009	`})`
Packit	6c4009	`exit(1)`
Packit	6c4009
Packit	6c4009	`def compare_lists(old_ctype_dict, new_ctype_dict):`
Packit	6c4009	`'''Compare character classes in the old and the new LC_CTYPE'''`
Packit	6c4009	`print('****************************************************')`
Packit	6c4009	`print('Character classes which are only in the new '`
Packit	6c4009	`+ 'or only in the old file:')`
Packit	6c4009	`for char_class in sorted(old_ctype_dict):`
Packit	6c4009	`if char_class not in new_ctype_dict:`
Packit	6c4009	`print('Character class %s is in old ctype but not in new ctype'`
Packit	6c4009	`%char_class)`
Packit	6c4009	`for char_class in sorted(new_ctype_dict):`
Packit	6c4009	`if char_class not in old_ctype_dict:`
Packit	6c4009	`print('Character class %s is in new ctype but not in old ctype'`
Packit	6c4009	`%char_class)`
Packit	6c4009	`for char_class in sorted(old_ctype_dict):`
Packit	6c4009	`print("****************************************************")`
Packit	6c4009	`print("%s: %d chars in old ctype and %d chars in new ctype" %(`
Packit	6c4009	`char_class,`
Packit	6c4009	`len(old_ctype_dict[char_class]),`
Packit	6c4009	`len(new_ctype_dict[char_class])))`
Packit	6c4009	`print("----------------------------------------------------")`
Packit	6c4009	`report(char_class,`
Packit	6c4009	`old_ctype_dict[char_class],`
Packit	6c4009	`new_ctype_dict[char_class])`
Packit	6c4009
Packit	6c4009	`def report_code_points(char_class, code_point_list, text=''):`
Packit	6c4009	`'''Report all code points which have been added to or removed from a`
Packit	6c4009	`character class.`
Packit	6c4009	`'''`
Packit	6c4009	`for code_point in sorted(code_point_list):`
Packit	6c4009	`if type(code_point) == type(int()):`
Packit	6c4009	`print('%(char_class)s: %(text)s: %(char)s %(code_point)s %(name)s'`
Packit	6c4009	`%{'text': text,`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'char_class': char_class,`
Packit	6c4009	`'code_point': hex(code_point),`
Packit	6c4009	`'name': unicodedata.name(chr(code_point), 'name unknown')})`
Packit	6c4009	`else:`
Packit	6c4009	`print(('%(char_class)s: %(text)s: '`
Packit	6c4009	`+ '%(char0)s → %(char1)s '`
Packit	6c4009	`+ '%(code_point0)s → %(code_point1)s '`
Packit	6c4009	`+ '%(name0)s → %(name1)s') %{`
Packit	6c4009	`'text': text,`
Packit	6c4009	`'char_class': char_class,`
Packit	6c4009	`'char0': chr(code_point[0]),`
Packit	6c4009	`'code_point0': hex(code_point[0]),`
Packit	6c4009	`'name0': unicodedata.name(chr(code_point[0]), 'name unknown'),`
Packit	6c4009	`'char1': chr(code_point[1]),`
Packit	6c4009	`'code_point1': hex(code_point[1]),`
Packit	6c4009	`'name1': unicodedata.name(chr(code_point[1]), 'name unknown')`
Packit	6c4009	`})`
Packit	6c4009
Packit	6c4009	`def report(char_class, old_list, new_list):`
Packit	6c4009	`'''Report the differences for a certain LC_CTYPE character class`
Packit	6c4009	`between the old and the newly generated state`
Packit	6c4009	`'''`
Packit	6c4009	`missing_chars = list(set(old_list)-set(new_list))`
Packit	6c4009	`print(('%(char_class)s: Missing %(number)d characters '`
Packit	6c4009	`+ 'of old ctype in new ctype ')`
Packit	6c4009	`%{'char_class': char_class, 'number': len(missing_chars)})`
Packit	6c4009	`if ARGS.show_missing_characters:`
Packit	6c4009	`report_code_points(char_class, missing_chars, 'Missing')`
Packit	6c4009	`added_chars = list(set(new_list)-set(old_list))`
Packit	6c4009	`print(('%(char_class)s: Added %(number)d characters '`
Packit	6c4009	`+ 'in new ctype which were not in old ctype')`
Packit	6c4009	`%{'char_class': char_class, 'number': len(added_chars)})`
Packit	6c4009	`if ARGS.show_added_characters:`
Packit	6c4009	`report_code_points(char_class, added_chars, 'Added')`
Packit	6c4009
Packit	6c4009
Packit	6c4009	`def cperror(error_message, errorcounter=0):`
Packit	6c4009	`'''Increase number of errors by one and print an error message'''`
Packit	6c4009	`print(error_message)`
Packit	6c4009	`return errorcounter + 1`
Packit	6c4009
Packit	6c4009	`def cpcheck(ctype_dict, code_point_list_with_ranges, char_classes, reason='',`
Packit	6c4009	`errorcounter=0):`
Packit	6c4009	`'''The parameter “code_point_list_with_ranges” is a list of`
Packit	6c4009	`integers or pairs of integers, for example:`
Packit	6c4009
Packit	6c4009	`[0x0E31, (0x0E34, 0x0E3A), (0x0E47, 0x0E4E)]`
Packit	6c4009
Packit	6c4009	`where the pairs of integers stand for all the code points in the range`
Packit	6c4009	`of the two integers given, including the two integers of the pair.`
Packit	6c4009
Packit	6c4009	`'''`
Packit	6c4009	`for code_point_range in code_point_list_with_ranges:`
Packit	6c4009	`for code_point in ([code_point_range]`
Packit	6c4009	`if type(code_point_range) == type(int())`
Packit	6c4009	`else range(code_point_range[0],`
Packit	6c4009	`code_point_range[1]+1)):`
Packit	6c4009	`for char_class_tuple in char_classes:`
Packit	6c4009	`char_class = char_class_tuple[0]`
Packit	6c4009	`in_char_class = char_class_tuple[1]`
Packit	6c4009	`if (code_point in ctype_dict[char_class]) != in_char_class:`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`('error: %(code_point)s %(char)s '`
Packit	6c4009	`+ '%(char_class)s %(in)s: %(reason)s') %{`
Packit	6c4009	`'code_point': hex(code_point),`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'char_class': char_class,`
Packit	6c4009	`'in': not in_char_class,`
Packit	6c4009	`'reason': reason},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`return errorcounter`
Packit	6c4009
Packit	6c4009	`def tests(ctype_dict, errorcounter = 0):`
Packit	6c4009	`'''Test a LC_CTYPE character class dictionary for known errors'''`
Packit	6c4009	`# copy the information from ctype_dict (which contains lists) in`
Packit	6c4009	`# a new dictionary ctype_dict2 (which contains dictionaries).`
Packit	6c4009	`# The checks below are easier with that type of data structure.`
Packit	6c4009
Packit	6c4009	`ctype_dict2 = {}`
Packit	6c4009	`for key in ctype_dict:`
Packit	6c4009	`ctype_dict2[key] = {}`
Packit	6c4009	`if ctype_dict[key]:`
Packit	6c4009	`if type(ctype_dict[key][0]) == type(int()):`
Packit	6c4009	`for value in ctype_dict[key]:`
Packit	6c4009	`ctype_dict2[key][value] = 1`
Packit	6c4009	`else: # key is 'toupper', 'tolower', or 'totitle'`
Packit	6c4009	`for value in ctype_dict[key]:`
Packit	6c4009	`ctype_dict2[key][value[0]] = value[1]`
Packit	6c4009
Packit	6c4009	`for test_case in TEST_CASES:`
Packit	6c4009	`errorcounter = cpcheck(ctype_dict2,`
Packit	6c4009	`test_case[0],`
Packit	6c4009	`test_case[1],`
Packit	6c4009	`test_case[2],`
Packit	6c4009	`errorcounter = errorcounter)`
Packit	6c4009
Packit	6c4009	`for code_point in range(0, 0x110000):`
Packit	6c4009	`# toupper restriction: "Only characters specified for the keywords`
Packit	6c4009	`# lower and upper shall be specified.`
Packit	6c4009	`if (code_point in ctype_dict2['toupper']`
Packit	6c4009	`and code_point != ctype_dict2['toupper'][code_point]`
Packit	6c4009	`and not (code_point in ctype_dict2['lower']`
Packit	6c4009	`or code_point in ctype_dict2['upper'])):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`('error: %(char1)s is not upper\|lower '`
Packit	6c4009	`+ 'but toupper(%(cp1)s)=%(cp2)s (%(char2)s)') %{`
Packit	6c4009	`'char1': chr(code_point),`
Packit	6c4009	`'cp1': hex(code_point),`
Packit	6c4009	`'cp2': hex(ctype_dict2['toupper'][code_point]),`
Packit	6c4009	`'char2': chr(ctype_dict2['toupper'][code_point])`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# tolower restriction: "Only characters specified for the keywords`
Packit	6c4009	`# lower and upper shall be specified.`
Packit	6c4009	`if (code_point in ctype_dict2['tolower']`
Packit	6c4009	`and code_point != ctype_dict2['tolower'][code_point]`
Packit	6c4009	`and not (code_point in ctype_dict2['lower']`
Packit	6c4009	`or code_point in ctype_dict2['upper'])):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`('error: %(char1)s is not upper\|lower '`
Packit	6c4009	`+ 'but tolower(%(cp1)s)=%(cp2)s (%(char2)s)') %{`
Packit	6c4009	`'char1': chr(code_point),`
Packit	6c4009	`'cp1': hex(code_point),`
Packit	6c4009	`'cp2': hex(ctype_dict2['tolower'][code_point]),`
Packit	6c4009	`'char2': chr(ctype_dict2['tolower'][code_point])`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# alpha restriction: "Characters classified as either upper or lower`
Packit	6c4009	`# shall automatically belong to this class.`
Packit	6c4009	`if ((code_point in ctype_dict2['lower']`
Packit	6c4009	`or code_point in ctype_dict2['upper'])`
Packit	6c4009	`and code_point not in ctype_dict2['alpha']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is upper\|lower but not alpha' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# alpha restriction: "No character specified for the keywords cntrl,`
Packit	6c4009	`# digit, punct or space shall be specified."`
Packit	6c4009	`if (code_point in ctype_dict2['alpha']`
Packit	6c4009	`and code_point in ctype_dict2['cntrl']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is alpha and cntrl' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['alpha']`
Packit	6c4009	`and code_point in ctype_dict2['digit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is alpha and digit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['alpha']`
Packit	6c4009	`and code_point in ctype_dict2['punct']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is alpha and punct' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['alpha']`
Packit	6c4009	`and code_point in ctype_dict2['space']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is alpha and space' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# space restriction: "No character specified for the keywords upper,`
Packit	6c4009	`# lower, alpha, digit, graph or xdigit shall be specified."`
Packit	6c4009	`# upper, lower, alpha already checked above.`
Packit	6c4009	`if (code_point in ctype_dict2['space']`
Packit	6c4009	`and code_point in ctype_dict2['digit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is space and digit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['space']`
Packit	6c4009	`and code_point in ctype_dict2['graph']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is space and graph' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['space']`
Packit	6c4009	`and code_point in ctype_dict2['xdigit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is space and xdigit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# cntrl restriction: "No character specified for the keywords upper,`
Packit	6c4009	`# lower, alpha, digit, punct, graph, print or xdigit shall be`
Packit	6c4009	`# specified." upper, lower, alpha already checked above.`
Packit	6c4009	`if (code_point in ctype_dict2['cntrl']`
Packit	6c4009	`and code_point in ctype_dict2['digit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is cntrl and digit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['cntrl']`
Packit	6c4009	`and code_point in ctype_dict2['punct']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is cntrl and punct' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['cntrl']`
Packit	6c4009	`and code_point in ctype_dict2['graph']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is cntrl and graph' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['cntrl']`
Packit	6c4009	`and code_point in ctype_dict2['print']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is cntrl and print' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['cntrl']`
Packit	6c4009	`and code_point in ctype_dict2['xdigit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is cntrl and xdigit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# punct restriction: "No character specified for the keywords upper,`
Packit	6c4009	`# lower, alpha, digit, cntrl, xdigit or as the <space> character shall`
Packit	6c4009	`# be specified." upper, lower, alpha, cntrl already checked above.`
Packit	6c4009	`if (code_point in ctype_dict2['punct']`
Packit	6c4009	`and code_point in ctype_dict2['digit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is punct and digit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['punct']`
Packit	6c4009	`and code_point in ctype_dict2['xdigit']):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is punct and xdigit' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point in ctype_dict2['punct']`
Packit	6c4009	`and code_point == 0x0020):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is punct.' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`# graph restriction: "No character specified for the keyword cntrl`
Packit	6c4009	`# shall be specified." Already checked above.`
Packit	6c4009
Packit	6c4009	`# print restriction: "No character specified for the keyword cntrl`
Packit	6c4009	`# shall be specified." Already checked above.`
Packit	6c4009
Packit	6c4009	`# graph - print relation: differ only in the <space> character.`
Packit	6c4009	`# How is this possible if there are more than one space character?!`
Packit	6c4009	`# I think susv2/xbd/locale.html should speak of "space characters",`
Packit	6c4009	`# not "space character".`
Packit	6c4009	`if (code_point in ctype_dict2['print']`
Packit	6c4009	`and not (code_point in ctype_dict2['graph']`
Packit	6c4009	`or code_point in ctype_dict2['space'])):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s is print but not graph\|space' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`if (code_point not in ctype_dict2['print']`
Packit	6c4009	`and (code_point in ctype_dict2['graph']`
Packit	6c4009	`or code_point == 0x0020)):`
Packit	6c4009	`errorcounter = cperror(`
Packit	6c4009	`'error: %(char)s %(cp)s graph\|space but not print' %{`
Packit	6c4009	`'char': chr(code_point),`
Packit	6c4009	`'cp': hex(code_point)`
Packit	6c4009	`},`
Packit	6c4009	`errorcounter)`
Packit	6c4009	`return errorcounter`
Packit	6c4009
Packit	6c4009	`if __name__ == "__main__":`
Packit	6c4009	`PARSER = argparse.ArgumentParser(`
Packit	6c4009	`description='''`
Packit	6c4009	`Compare the contents of LC_CTYPE in two files and check for errors.`
Packit	6c4009	`''')`
Packit	6c4009	`PARSER.add_argument(`
Packit	6c4009	`'-o', '--old_ctype_file',`
Packit	6c4009	`nargs='?',`
Packit	6c4009	`type=str,`
Packit	6c4009	`default='i18n',`
Packit	6c4009	`help='The old ctype file, default: %(default)s')`
Packit	6c4009	`PARSER.add_argument(`
Packit	6c4009	`'-n', '--new_ctype_file',`
Packit	6c4009	`nargs='?',`
Packit	6c4009	`type=str,`
Packit	6c4009	`default='unicode-ctype',`
Packit	6c4009	`help='The new ctype file, default: %(default)s')`
Packit	6c4009	`PARSER.add_argument(`
Packit	6c4009	`'-a', '--show_added_characters',`
Packit	6c4009	`action='store_true',`
Packit	6c4009	`help=('Show characters which were added to each '`
Packit	6c4009	`+ 'character class in detail.'))`
Packit	6c4009	`PARSER.add_argument(`
Packit	6c4009	`'-m', '--show_missing_characters',`
Packit	6c4009	`action='store_true',`
Packit	6c4009	`help=('Show characters which were removed from each '`
Packit	6c4009	`+ 'character class in detail.'))`
Packit	6c4009	`ARGS = PARSER.parse_args()`
Packit	6c4009
Packit	6c4009	`OLD_CTYPE_DICT = extract_character_classes(`
Packit	6c4009	`ARGS.old_ctype_file)`
Packit	6c4009	`NEW_CTYPE_DICT = extract_character_classes(`
Packit	6c4009	`ARGS.new_ctype_file)`
Packit	6c4009	`compare_lists(OLD_CTYPE_DICT, NEW_CTYPE_DICT)`
Packit	6c4009	`print('============================================================')`
Packit	6c4009	`print('Checking for errors in old ctype file: %s' %ARGS.old_ctype_file)`
Packit	6c4009	`print('------------------------------------------------------------')`
Packit	6c4009	`NUMBER_OF_ERRORS_IN_OLD_FILE = tests(OLD_CTYPE_DICT, errorcounter = 0)`
Packit	6c4009	`print('------------------------------------------------------------')`
Packit	6c4009	`print('Old file = %s' %ARGS.old_ctype_file)`
Packit	6c4009	`print('Number of errors in old file = %s' %NUMBER_OF_ERRORS_IN_OLD_FILE)`
Packit	6c4009	`print('------------------------------------------------------------')`
Packit	6c4009	`print('============================================================')`
Packit	6c4009	`print('Checking for errors in new ctype file: %s' %ARGS.new_ctype_file)`
Packit	6c4009	`print('------------------------------------------------------------')`
Packit	6c4009	`NUMBER_OF_ERRORS_IN_NEW_FILE = tests(NEW_CTYPE_DICT, errorcounter = 0)`
Packit	6c4009	`print('------------------------------------------------------------')`
Packit	6c4009	`print('New file = %s' %ARGS.new_ctype_file)`
Packit	6c4009	`print('Number of errors in new file = %s' %NUMBER_OF_ERRORS_IN_NEW_FILE)`
Packit	6c4009	`print('------------------------------------------------------------')`
Packit	6c4009	`if NUMBER_OF_ERRORS_IN_NEW_FILE > 0:`
Packit	6c4009	`exit(1)`
Packit	6c4009	`else:`
Packit	6c4009	`exit(0)`

source-git / glibc

Source Code

Blame localedata/unicode-gen/ctype_compatibility.py