Blame localedata/unicode-gen/ctype_compatibility.py

Packit 6c4009
#!/usr/bin/python3
Packit 6c4009
# -*- coding: utf-8 -*-
Packit 6c4009
# Copyright (C) 2014-2018 Free Software Foundation, Inc.
Packit 6c4009
# This file is part of the GNU C Library.
Packit 6c4009
#
Packit 6c4009
# The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
# modify it under the terms of the GNU Lesser General Public
Packit 6c4009
# License as published by the Free Software Foundation; either
Packit 6c4009
# version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
#
Packit 6c4009
# The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Packit 6c4009
# Lesser General Public License for more details.
Packit 6c4009
#
Packit 6c4009
# You should have received a copy of the GNU Lesser General Public
Packit 6c4009
# License along with the GNU C Library; if not, see
Packit 6c4009
# <http://www.gnu.org/licenses/>.
Packit 6c4009
Packit 6c4009
'''
Packit 6c4009
This script is useful for checking the differences between
Packit 6c4009
an old LC_CTYPE file /usr/share/i18n/locale/i18n and a
Packit 6c4009
new one generated by gen_unicode_ctype.py
Packit 6c4009
Packit 6c4009
To see how it is used, call it with the “-h” option:
Packit 6c4009
Packit 6c4009
    $ ./ctype_compatibility.py -h
Packit 6c4009
    … prints usage message …
Packit 6c4009
'''
Packit 6c4009
Packit 6c4009
import sys
Packit 6c4009
import re
Packit 6c4009
import unicodedata
Packit 6c4009
import argparse
Packit 6c4009
Packit 6c4009
from ctype_compatibility_test_cases import TEST_CASES
Packit 6c4009
Packit 6c4009
def get_lines_from_file(filename):
Packit 6c4009
    '''Get all non-comment lines from a i18n file
Packit 6c4009
Packit 6c4009
    Also merge all lines which are continued on the next line because
Packit 6c4009
    they end in “/” into a single line.
Packit 6c4009
    '''
Packit 6c4009
    with open(filename) as i18n_file:
Packit 6c4009
        current_line = ''
Packit 6c4009
        for line in i18n_file:
Packit 6c4009
            line = line.strip('\n')
Packit 6c4009
            if '%' in line:
Packit 6c4009
                if line.endswith('/'):
Packit 6c4009
                    line = line[0:line.find('%')] + '/'
Packit 6c4009
                else:
Packit 6c4009
                    line = line[0:line.find('%')]
Packit 6c4009
            line = line.strip()
Packit 6c4009
            if line.endswith('/'):
Packit 6c4009
                current_line += line[:-1]
Packit 6c4009
            else:
Packit 6c4009
                yield current_line + line
Packit 6c4009
                current_line = ''
Packit 6c4009
    if current_line: # file ends with a continuation line
Packit 6c4009
        yield current_line
Packit 6c4009
Packit 6c4009
def extract_character_classes(filename):
Packit 6c4009
    '''Get all Unicode code points for each character class from a file
Packit 6c4009
Packit 6c4009
    Store these code points in a dictionary using the character classes
Packit 6c4009
    as keys and the list of code points in this character class as values.
Packit 6c4009
Packit 6c4009
    In case  of the character classes “toupper”, “tolower”, and “totitle”,
Packit 6c4009
    these area actually pairs of code points
Packit 6c4009
    '''
Packit 6c4009
    ctype_dict = {}
Packit 6c4009
    for line in get_lines_from_file(filename):
Packit 6c4009
        for char_class in [
Packit 6c4009
                'upper',
Packit 6c4009
                'lower',
Packit 6c4009
                'alpha',
Packit 6c4009
                'digit',
Packit 6c4009
                'outdigit',
Packit 6c4009
                'space',
Packit 6c4009
                'cntrl',
Packit 6c4009
                'punct',
Packit 6c4009
                'graph',
Packit 6c4009
                'print',
Packit 6c4009
                'xdigit',
Packit 6c4009
                'blank',
Packit 6c4009
                'combining',
Packit 6c4009
                'combining_level3',
Packit 6c4009
                'toupper',
Packit 6c4009
                'tolower',
Packit 6c4009
                'totitle']:
Packit 6c4009
            match = re.match(r'^('
Packit 6c4009
                             +'(?:(?:class|map)\s+")'
Packit 6c4009
                             +re.escape(char_class)+
Packit 6c4009
                             '(?:";)\s+'
Packit 6c4009
                             +'|'
Packit 6c4009
                             +re.escape(char_class)+'\s+'
Packit 6c4009
                             +')', line)
Packit 6c4009
            if match:
Packit 6c4009
                if char_class not in ctype_dict:
Packit 6c4009
                    ctype_dict[char_class] = []
Packit 6c4009
                process_chars(
Packit 6c4009
                    ctype_dict[char_class],
Packit 6c4009
                    line[match.end():])
Packit 6c4009
    return ctype_dict
Packit 6c4009
Packit 6c4009
def process_chars(char_class_list, code_point_line):
Packit 6c4009
    '''
Packit 6c4009
    Extract Unicode values from code_point_line
Packit 6c4009
    and add to the list of code points in a character class
Packit 6c4009
    '''
Packit 6c4009
    for code_points in code_point_line.split(';'):
Packit 6c4009
        code_points = code_points.strip()
Packit 6c4009
        match = re.match(r'^<U(?P<codepoint>[0-9A-F]{4,8})>$', code_points)
Packit 6c4009
        if match: # <Uxxxx>
Packit 6c4009
            char_class_list.append(
Packit 6c4009
                int(match.group('codepoint'), 16))
Packit 6c4009
            continue
Packit 6c4009
        match = re.match(
Packit 6c4009
            r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'
Packit 6c4009
            +'\.\.'+
Packit 6c4009
            '<U(?P<codepoint2>[0-9A-F]{4,8})>$',
Packit 6c4009
            code_points)
Packit 6c4009
        if match: # <Uxxxx>..<Uxxxx>
Packit 6c4009
            for codepoint in range(
Packit 6c4009
                    int(match.group('codepoint1'), 16),
Packit 6c4009
                    int(match.group('codepoint2'), 16) + 1):
Packit 6c4009
                char_class_list.append(codepoint)
Packit 6c4009
            continue
Packit 6c4009
        match = re.match(
Packit 6c4009
            r'^<U(?P<codepoint1>[0-9A-F]{4,8})>'
Packit 6c4009
            +'\.\.\(2\)\.\.'+
Packit 6c4009
            '<U(?P<codepoint2>[0-9A-F]{4,8})>$',
Packit 6c4009
            code_points)
Packit 6c4009
        if match: # <Uxxxx>..(2)..<Uxxxx>
Packit 6c4009
            for codepoint in range(
Packit 6c4009
                    int(match.group('codepoint1'), 16),
Packit 6c4009
                    int(match.group('codepoint2'), 16) + 1,
Packit 6c4009
                    2):
Packit 6c4009
                char_class_list.append(codepoint)
Packit 6c4009
            continue
Packit 6c4009
        match = re.match(
Packit 6c4009
            r'^\('
Packit 6c4009
            +'<U(?P<codepoint1>[0-9A-F]{4,8})>'
Packit 6c4009
            +','+
Packit 6c4009
            '<U(?P<codepoint2>[0-9A-F]{4,8})>'
Packit 6c4009
            +'\)$',
Packit 6c4009
            code_points)
Packit 6c4009
        if match: # (<Uxxxx>,<Uxxxx>)
Packit 6c4009
            char_class_list.append((
Packit 6c4009
                int(match.group('codepoint1'), 16),
Packit 6c4009
                int(match.group('codepoint2'), 16)))
Packit 6c4009
            continue
Packit 6c4009
        sys.stderr.write(
Packit 6c4009
            ('None of the regexps matched '
Packit 6c4009
             + 'code_points=%(cp)s in code_point_line=%(cpl)s\n') %{
Packit 6c4009
            'cp': code_points,
Packit 6c4009
            'cpl': code_point_line
Packit 6c4009
        })
Packit 6c4009
        exit(1)
Packit 6c4009
Packit 6c4009
def compare_lists(old_ctype_dict, new_ctype_dict):
Packit 6c4009
    '''Compare character classes in the old and the new LC_CTYPE'''
Packit 6c4009
    print('****************************************************')
Packit 6c4009
    print('Character classes which are only in the new '
Packit 6c4009
          + 'or only in the old file:')
Packit 6c4009
    for char_class in sorted(old_ctype_dict):
Packit 6c4009
        if char_class not in new_ctype_dict:
Packit 6c4009
            print('Character class %s is in old ctype but not in new ctype'
Packit 6c4009
                  %char_class)
Packit 6c4009
    for char_class in sorted(new_ctype_dict):
Packit 6c4009
        if char_class not in old_ctype_dict:
Packit 6c4009
            print('Character class %s is in new ctype but not in old ctype'
Packit 6c4009
                  %char_class)
Packit 6c4009
    for char_class in sorted(old_ctype_dict):
Packit 6c4009
        print("****************************************************")
Packit 6c4009
        print("%s: %d chars in old ctype and %d chars in new ctype" %(
Packit 6c4009
            char_class,
Packit 6c4009
            len(old_ctype_dict[char_class]),
Packit 6c4009
            len(new_ctype_dict[char_class])))
Packit 6c4009
        print("----------------------------------------------------")
Packit 6c4009
        report(char_class,
Packit 6c4009
               old_ctype_dict[char_class],
Packit 6c4009
               new_ctype_dict[char_class])
Packit 6c4009
Packit 6c4009
def report_code_points(char_class, code_point_list, text=''):
Packit 6c4009
    '''Report all code points which have been added to or removed from a
Packit 6c4009
    character class.
Packit 6c4009
    '''
Packit 6c4009
    for code_point in sorted(code_point_list):
Packit 6c4009
        if type(code_point) == type(int()):
Packit 6c4009
            print('%(char_class)s: %(text)s: %(char)s %(code_point)s %(name)s'
Packit 6c4009
                  %{'text': text,
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'char_class': char_class,
Packit 6c4009
                    'code_point': hex(code_point),
Packit 6c4009
                    'name': unicodedata.name(chr(code_point), 'name unknown')})
Packit 6c4009
        else:
Packit 6c4009
            print(('%(char_class)s: %(text)s: '
Packit 6c4009
                   + '%(char0)s → %(char1)s '
Packit 6c4009
                   + '%(code_point0)s → %(code_point1)s '
Packit 6c4009
                   + '%(name0)s → %(name1)s') %{
Packit 6c4009
                'text': text,
Packit 6c4009
                'char_class': char_class,
Packit 6c4009
                'char0': chr(code_point[0]),
Packit 6c4009
                'code_point0': hex(code_point[0]),
Packit 6c4009
                'name0': unicodedata.name(chr(code_point[0]), 'name unknown'),
Packit 6c4009
                'char1': chr(code_point[1]),
Packit 6c4009
                'code_point1': hex(code_point[1]),
Packit 6c4009
                'name1': unicodedata.name(chr(code_point[1]), 'name unknown')
Packit 6c4009
            })
Packit 6c4009
Packit 6c4009
def report(char_class, old_list, new_list):
Packit 6c4009
    '''Report the differences for a certain LC_CTYPE character class
Packit 6c4009
    between the old and the newly generated state
Packit 6c4009
    '''
Packit 6c4009
    missing_chars = list(set(old_list)-set(new_list))
Packit 6c4009
    print(('%(char_class)s: Missing %(number)d characters '
Packit 6c4009
           + 'of old ctype in new ctype ')
Packit 6c4009
          %{'char_class': char_class, 'number': len(missing_chars)})
Packit 6c4009
    if ARGS.show_missing_characters:
Packit 6c4009
        report_code_points(char_class, missing_chars, 'Missing')
Packit 6c4009
    added_chars = list(set(new_list)-set(old_list))
Packit 6c4009
    print(('%(char_class)s: Added %(number)d characters '
Packit 6c4009
           + 'in new ctype which were not in old ctype')
Packit 6c4009
          %{'char_class': char_class, 'number': len(added_chars)})
Packit 6c4009
    if ARGS.show_added_characters:
Packit 6c4009
        report_code_points(char_class, added_chars, 'Added')
Packit 6c4009
Packit 6c4009
Packit 6c4009
def cperror(error_message, errorcounter=0):
Packit 6c4009
    '''Increase number of errors by one and print an error message'''
Packit 6c4009
    print(error_message)
Packit 6c4009
    return errorcounter + 1
Packit 6c4009
Packit 6c4009
def cpcheck(ctype_dict, code_point_list_with_ranges, char_classes, reason='',
Packit 6c4009
            errorcounter=0):
Packit 6c4009
    '''The parameter “code_point_list_with_ranges” is a list of
Packit 6c4009
    integers or pairs of integers, for example:
Packit 6c4009
Packit 6c4009
    [0x0E31, (0x0E34, 0x0E3A), (0x0E47, 0x0E4E)]
Packit 6c4009
Packit 6c4009
    where the pairs of integers stand for all the code points in the range
Packit 6c4009
    of the two integers given, including the two integers of the pair.
Packit 6c4009
Packit 6c4009
    '''
Packit 6c4009
    for code_point_range in code_point_list_with_ranges:
Packit 6c4009
        for code_point in ([code_point_range]
Packit 6c4009
                           if type(code_point_range) == type(int())
Packit 6c4009
                           else range(code_point_range[0],
Packit 6c4009
                                      code_point_range[1]+1)):
Packit 6c4009
            for char_class_tuple in char_classes:
Packit 6c4009
                char_class = char_class_tuple[0]
Packit 6c4009
                in_char_class = char_class_tuple[1]
Packit 6c4009
                if (code_point in ctype_dict[char_class]) != in_char_class:
Packit 6c4009
                    errorcounter = cperror(
Packit 6c4009
                        ('error: %(code_point)s %(char)s '
Packit 6c4009
                         + '%(char_class)s %(in)s: %(reason)s') %{
Packit 6c4009
                             'code_point': hex(code_point),
Packit 6c4009
                             'char': chr(code_point),
Packit 6c4009
                             'char_class': char_class,
Packit 6c4009
                             'in': not in_char_class,
Packit 6c4009
                             'reason': reason},
Packit 6c4009
                        errorcounter)
Packit 6c4009
    return errorcounter
Packit 6c4009
Packit 6c4009
def tests(ctype_dict, errorcounter = 0):
Packit 6c4009
    '''Test a LC_CTYPE character class dictionary for known errors'''
Packit 6c4009
    # copy the information from ctype_dict (which contains lists) in
Packit 6c4009
    # a new dictionary ctype_dict2 (which contains dictionaries).
Packit 6c4009
    # The checks below are easier with that type of data structure.
Packit 6c4009
Packit 6c4009
    ctype_dict2 = {}
Packit 6c4009
    for key in ctype_dict:
Packit 6c4009
        ctype_dict2[key] = {}
Packit 6c4009
        if ctype_dict[key]:
Packit 6c4009
            if type(ctype_dict[key][0]) == type(int()):
Packit 6c4009
                for value in ctype_dict[key]:
Packit 6c4009
                    ctype_dict2[key][value] = 1
Packit 6c4009
            else: # key is 'toupper', 'tolower', or 'totitle'
Packit 6c4009
                for value in ctype_dict[key]:
Packit 6c4009
                    ctype_dict2[key][value[0]] = value[1]
Packit 6c4009
Packit 6c4009
    for test_case in TEST_CASES:
Packit 6c4009
        errorcounter = cpcheck(ctype_dict2,
Packit 6c4009
                               test_case[0],
Packit 6c4009
                               test_case[1],
Packit 6c4009
                               test_case[2],
Packit 6c4009
                               errorcounter = errorcounter)
Packit 6c4009
Packit 6c4009
    for code_point in range(0, 0x110000):
Packit 6c4009
        # toupper restriction: "Only characters specified for the keywords
Packit 6c4009
	# lower and upper shall be specified.
Packit 6c4009
        if (code_point in ctype_dict2['toupper']
Packit 6c4009
            and code_point != ctype_dict2['toupper'][code_point]
Packit 6c4009
            and not (code_point in ctype_dict2['lower']
Packit 6c4009
                     or code_point in ctype_dict2['upper'])):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                ('error: %(char1)s is not upper|lower '
Packit 6c4009
                 + 'but toupper(%(cp1)s)=%(cp2)s (%(char2)s)') %{
Packit 6c4009
                     'char1': chr(code_point),
Packit 6c4009
                     'cp1': hex(code_point),
Packit 6c4009
                     'cp2': hex(ctype_dict2['toupper'][code_point]),
Packit 6c4009
                     'char2': chr(ctype_dict2['toupper'][code_point])
Packit 6c4009
                 },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # tolower restriction: "Only characters specified for the keywords
Packit 6c4009
	# lower and upper shall be specified.
Packit 6c4009
        if (code_point in ctype_dict2['tolower']
Packit 6c4009
            and code_point != ctype_dict2['tolower'][code_point]
Packit 6c4009
            and not (code_point in ctype_dict2['lower']
Packit 6c4009
                     or code_point in ctype_dict2['upper'])):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                ('error: %(char1)s is not upper|lower '
Packit 6c4009
                 + 'but tolower(%(cp1)s)=%(cp2)s (%(char2)s)') %{
Packit 6c4009
                     'char1': chr(code_point),
Packit 6c4009
                     'cp1': hex(code_point),
Packit 6c4009
                     'cp2': hex(ctype_dict2['tolower'][code_point]),
Packit 6c4009
                     'char2': chr(ctype_dict2['tolower'][code_point])
Packit 6c4009
                 },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # alpha restriction: "Characters classified as either upper or lower
Packit 6c4009
	# shall automatically belong to this class.
Packit 6c4009
        if ((code_point in ctype_dict2['lower']
Packit 6c4009
             or code_point in ctype_dict2['upper'])
Packit 6c4009
            and code_point not in ctype_dict2['alpha']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is upper|lower but not alpha' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # alpha restriction: "No character specified for the keywords cntrl,
Packit 6c4009
	# digit, punct or space shall be specified."
Packit 6c4009
        if (code_point in ctype_dict2['alpha']
Packit 6c4009
            and code_point in ctype_dict2['cntrl']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is alpha and cntrl' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['alpha']
Packit 6c4009
            and code_point in ctype_dict2['digit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is alpha and digit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['alpha']
Packit 6c4009
            and code_point in ctype_dict2['punct']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is alpha and punct' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['alpha']
Packit 6c4009
            and code_point in ctype_dict2['space']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is alpha and space' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # space restriction: "No character specified for the keywords upper,
Packit 6c4009
	# lower, alpha, digit, graph or xdigit shall be specified."
Packit 6c4009
	# upper, lower, alpha already checked above.
Packit 6c4009
        if (code_point in ctype_dict2['space']
Packit 6c4009
            and code_point in ctype_dict2['digit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is space and digit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['space']
Packit 6c4009
            and code_point in ctype_dict2['graph']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is space and graph' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['space']
Packit 6c4009
            and code_point in ctype_dict2['xdigit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is space and xdigit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # cntrl restriction: "No character specified for the keywords upper,
Packit 6c4009
	# lower, alpha, digit, punct, graph, print or xdigit shall be
Packit 6c4009
	# specified."  upper, lower, alpha already checked above.
Packit 6c4009
        if (code_point in ctype_dict2['cntrl']
Packit 6c4009
            and code_point in ctype_dict2['digit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is cntrl and digit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['cntrl']
Packit 6c4009
            and code_point in ctype_dict2['punct']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is cntrl and punct' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['cntrl']
Packit 6c4009
            and code_point in ctype_dict2['graph']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is cntrl and graph' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['cntrl']
Packit 6c4009
            and code_point in ctype_dict2['print']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is cntrl and print' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['cntrl']
Packit 6c4009
            and code_point in ctype_dict2['xdigit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is cntrl and xdigit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # punct restriction: "No character specified for the keywords upper,
Packit 6c4009
	# lower, alpha, digit, cntrl, xdigit or as the <space> character shall
Packit 6c4009
	# be specified."  upper, lower, alpha, cntrl already checked above.
Packit 6c4009
        if (code_point in ctype_dict2['punct']
Packit 6c4009
            and code_point in ctype_dict2['digit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is punct and digit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['punct']
Packit 6c4009
            and code_point in ctype_dict2['xdigit']):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is punct and xdigit' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point in ctype_dict2['punct']
Packit 6c4009
            and code_point == 0x0020):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is punct.' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        # graph restriction: "No character specified for the keyword cntrl
Packit 6c4009
	# shall be specified."  Already checked above.
Packit 6c4009
Packit 6c4009
        # print restriction: "No character specified for the keyword cntrl
Packit 6c4009
	# shall be specified."  Already checked above.
Packit 6c4009
Packit 6c4009
        # graph - print relation: differ only in the <space> character.
Packit 6c4009
	# How is this possible if there are more than one space character?!
Packit 6c4009
	# I think susv2/xbd/locale.html should speak of "space characters",
Packit 6c4009
	# not "space character".
Packit 6c4009
        if (code_point in ctype_dict2['print']
Packit 6c4009
            and not (code_point in ctype_dict2['graph']
Packit 6c4009
                     or code_point in ctype_dict2['space'])):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s is print but not graph|space' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
        if (code_point not in ctype_dict2['print']
Packit 6c4009
            and (code_point in ctype_dict2['graph']
Packit 6c4009
                 or code_point ==  0x0020)):
Packit 6c4009
            errorcounter = cperror(
Packit 6c4009
                'error: %(char)s %(cp)s graph|space but not print' %{
Packit 6c4009
                    'char': chr(code_point),
Packit 6c4009
                    'cp': hex(code_point)
Packit 6c4009
                },
Packit 6c4009
                errorcounter)
Packit 6c4009
    return errorcounter
Packit 6c4009
Packit 6c4009
if __name__ == "__main__":
Packit 6c4009
    PARSER = argparse.ArgumentParser(
Packit 6c4009
        description='''
Packit 6c4009
        Compare the contents of LC_CTYPE in two files and check for errors.
Packit 6c4009
        ''')
Packit 6c4009
    PARSER.add_argument(
Packit 6c4009
        '-o', '--old_ctype_file',
Packit 6c4009
        nargs='?',
Packit 6c4009
        type=str,
Packit 6c4009
        default='i18n',
Packit 6c4009
        help='The old ctype file, default: %(default)s')
Packit 6c4009
    PARSER.add_argument(
Packit 6c4009
        '-n', '--new_ctype_file',
Packit 6c4009
        nargs='?',
Packit 6c4009
        type=str,
Packit 6c4009
        default='unicode-ctype',
Packit 6c4009
        help='The new ctype file, default: %(default)s')
Packit 6c4009
    PARSER.add_argument(
Packit 6c4009
        '-a', '--show_added_characters',
Packit 6c4009
        action='store_true',
Packit 6c4009
        help=('Show characters which were added to each '
Packit 6c4009
              + 'character class in detail.'))
Packit 6c4009
    PARSER.add_argument(
Packit 6c4009
        '-m', '--show_missing_characters',
Packit 6c4009
        action='store_true',
Packit 6c4009
        help=('Show characters which were removed from each '
Packit 6c4009
              + 'character class in detail.'))
Packit 6c4009
    ARGS = PARSER.parse_args()
Packit 6c4009
Packit 6c4009
    OLD_CTYPE_DICT = extract_character_classes(
Packit 6c4009
        ARGS.old_ctype_file)
Packit 6c4009
    NEW_CTYPE_DICT = extract_character_classes(
Packit 6c4009
        ARGS.new_ctype_file)
Packit 6c4009
    compare_lists(OLD_CTYPE_DICT, NEW_CTYPE_DICT)
Packit 6c4009
    print('============================================================')
Packit 6c4009
    print('Checking for errors in old ctype file: %s' %ARGS.old_ctype_file)
Packit 6c4009
    print('------------------------------------------------------------')
Packit 6c4009
    NUMBER_OF_ERRORS_IN_OLD_FILE = tests(OLD_CTYPE_DICT, errorcounter = 0)
Packit 6c4009
    print('------------------------------------------------------------')
Packit 6c4009
    print('Old file = %s' %ARGS.old_ctype_file)
Packit 6c4009
    print('Number of errors in old file = %s' %NUMBER_OF_ERRORS_IN_OLD_FILE)
Packit 6c4009
    print('------------------------------------------------------------')
Packit 6c4009
    print('============================================================')
Packit 6c4009
    print('Checking for errors in new ctype file: %s' %ARGS.new_ctype_file)
Packit 6c4009
    print('------------------------------------------------------------')
Packit 6c4009
    NUMBER_OF_ERRORS_IN_NEW_FILE = tests(NEW_CTYPE_DICT, errorcounter = 0)
Packit 6c4009
    print('------------------------------------------------------------')
Packit 6c4009
    print('New file = %s' %ARGS.new_ctype_file)
Packit 6c4009
    print('Number of errors in new file = %s' %NUMBER_OF_ERRORS_IN_NEW_FILE)
Packit 6c4009
    print('------------------------------------------------------------')
Packit 6c4009
    if NUMBER_OF_ERRORS_IN_NEW_FILE > 0:
Packit 6c4009
        exit(1)
Packit 6c4009
    else:
Packit 6c4009
        exit(0)