Blame localedata/unicode-gen/utf8_gen.py

Packit Service 82fcde
#!/usr/bin/python3
Packit Service 82fcde
# -*- coding: utf-8 -*-
Packit Service 82fcde
# Copyright (C) 2014-2018 Free Software Foundation, Inc.
Packit Service 82fcde
# This file is part of the GNU C Library.
Packit Service 82fcde
#
Packit Service 82fcde
# The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
# modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
# License as published by the Free Software Foundation; either
Packit Service 82fcde
# version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
#
Packit Service 82fcde
# The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Packit Service 82fcde
# Lesser General Public License for more details.
Packit Service 82fcde
#
Packit Service 82fcde
# You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
# License along with the GNU C Library; if not, see
Packit Service 82fcde
# <http://www.gnu.org/licenses/>.
Packit Service 82fcde
Packit Service 82fcde
'''glibc/localedata/charmaps/UTF-8 file generator script
Packit Service 82fcde
Packit Service 82fcde
This script generates a glibc/localedata/charmaps/UTF-8 file
Packit Service 82fcde
from Unicode data.
Packit Service 82fcde
Packit Service 82fcde
Usage: python3 utf8_gen.py UnicodeData.txt EastAsianWidth.txt
Packit Service 82fcde
Packit Service 82fcde
It will output UTF-8 file
Packit Service 82fcde
'''
Packit Service 82fcde
Packit Service 82fcde
import argparse
Packit Service 82fcde
import sys
Packit Service 82fcde
import re
Packit Service 82fcde
import unicode_utils
Packit Service 82fcde
Packit Service 82fcde
# Auxiliary tables for Hangul syllable names, see the Unicode 3.0 book,
Packit Service 82fcde
# sections 3.11 and 4.4.
Packit Service 82fcde
Packit Service 82fcde
JAMO_INITIAL_SHORT_NAME = (
Packit Service 82fcde
    'G', 'GG', 'N', 'D', 'DD', 'R', 'M', 'B', 'BB', 'S', 'SS', '', 'J', 'JJ',
Packit Service 82fcde
    'C', 'K', 'T', 'P', 'H'
Packit Service 82fcde
)
Packit Service 82fcde
Packit Service 82fcde
JAMO_MEDIAL_SHORT_NAME = (
Packit Service 82fcde
    'A', 'AE', 'YA', 'YAE', 'EO', 'E', 'YEO', 'YE', 'O', 'WA', 'WAE', 'OE',
Packit Service 82fcde
    'YO', 'U', 'WEO', 'WE', 'WI', 'YU', 'EU', 'YI', 'I'
Packit Service 82fcde
)
Packit Service 82fcde
Packit Service 82fcde
JAMO_FINAL_SHORT_NAME = (
Packit Service 82fcde
    '', 'G', 'GG', 'GS', 'N', 'NI', 'NH', 'D', 'L', 'LG', 'LM', 'LB', 'LS',
Packit Service 82fcde
    'LT', 'LP', 'LH', 'M', 'B', 'BS', 'S', 'SS', 'NG', 'J', 'C', 'K', 'T',
Packit Service 82fcde
    'P', 'H'
Packit Service 82fcde
)
Packit Service 82fcde
Packit Service 82fcde
def process_range(start, end, outfile, name):
Packit Service 82fcde
    '''Writes a range of code points into the CHARMAP section of the
Packit Service 82fcde
    output file
Packit Service 82fcde
Packit Service 82fcde
    '''
Packit Service 82fcde
    if 'Hangul Syllable' in name:
Packit Service 82fcde
        # from glibc/localedata/ChangeLog:
Packit Service 82fcde
        #
Packit Service 82fcde
        #  2000-09-24  Bruno Haible  <haible@clisp.cons.org>
Packit Service 82fcde
        #  * charmaps/UTF-8: Expand <Hangul Syllable> and <Private Use> ranges,
Packit Service 82fcde
        #  so they become printable and carry a width. Comment out surrogate
Packit Service 82fcde
        #  ranges. Add a WIDTH table
Packit Service 82fcde
        #
Packit Service 82fcde
        # So we expand the Hangul Syllables here:
Packit Service 82fcde
        for i in range(int(start, 16), int(end, 16)+1 ):
Packit Service 82fcde
            index2, index3 = divmod(i - 0xaC00, 28)
Packit Service 82fcde
            index1, index2 = divmod(index2, 21)
Packit Service 82fcde
            hangul_syllable_name = 'HANGUL SYLLABLE ' \
Packit Service 82fcde
                                   + JAMO_INITIAL_SHORT_NAME[index1] \
Packit Service 82fcde
                                   + JAMO_MEDIAL_SHORT_NAME[index2] \
Packit Service 82fcde
                                   + JAMO_FINAL_SHORT_NAME[index3]
Packit Service 82fcde
            outfile.write('{:<11s} {:<12s} {:s}\n'.format(
Packit Service 82fcde
                unicode_utils.ucs_symbol(i), convert_to_hex(i),
Packit Service 82fcde
                hangul_syllable_name))
Packit Service 82fcde
        return
Packit Service 82fcde
    # UnicodeData.txt file has contains code point ranges like this:
Packit Service 82fcde
    #
Packit Service 82fcde
    # 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
Packit Service 82fcde
    # 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
Packit Service 82fcde
    #
Packit Service 82fcde
    # The glibc UTF-8 file splits ranges like these into shorter
Packit Service 82fcde
    # ranges of 64 code points each:
Packit Service 82fcde
    #
Packit Service 82fcde
    # <U3400>..<U343F>     /xe3/x90/x80         <CJK Ideograph Extension A>
Packit Service 82fcde
    # …
Packit Service 82fcde
    # <U4D80>..<U4DB5>     /xe4/xb6/x80         <CJK Ideograph Extension A>
Packit Service 82fcde
    for i in range(int(start, 16), int(end, 16), 64 ):
Packit Service 82fcde
        if i > (int(end, 16)-64):
Packit Service 82fcde
            outfile.write('{:s}..{:s} {:<12s} {:s}\n'.format(
Packit Service 82fcde
                    unicode_utils.ucs_symbol(i),
Packit Service 82fcde
                    unicode_utils.ucs_symbol(int(end,16)),
Packit Service 82fcde
                    convert_to_hex(i),
Packit Service 82fcde
                    name))
Packit Service 82fcde
            break
Packit Service 82fcde
        outfile.write('{:s}..{:s} {:<12s} {:s}\n'.format(
Packit Service 82fcde
                unicode_utils.ucs_symbol(i),
Packit Service 82fcde
                unicode_utils.ucs_symbol(i+63),
Packit Service 82fcde
                convert_to_hex(i),
Packit Service 82fcde
                name))
Packit Service 82fcde
Packit Service 82fcde
def process_charmap(flines, outfile):
Packit Service 82fcde
    '''This function takes an array which contains *all* lines of
Packit Service 82fcde
    of UnicodeData.txt and write lines to outfile as used in the
Packit Service 82fcde
Packit Service 82fcde
    CHARMAP
Packit Service 82fcde
Packit Service 82fcde
    END CHARMAP
Packit Service 82fcde
Packit Service 82fcde
    section of the UTF-8 file in glibc/localedata/charmaps/UTF-8.
Packit Service 82fcde
Packit Service 82fcde
    Samples for input lines:
Packit Service 82fcde
Packit Service 82fcde
    0010;<control>;Cc;0;BN;;;;;N;DATA LINK ESCAPE;;;;
Packit Service 82fcde
    3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
Packit Service 82fcde
    4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
Packit Service 82fcde
    D800;<Non Private Use High Surrogate, First>;Cs;0;L;;;;;N;;;;;
Packit Service 82fcde
    DB7F;<Non Private Use High Surrogate, Last>;Cs;0;L;;;;;N;;;;;
Packit Service 82fcde
    100000;<Plane 16 Private Use, First>;Co;0;L;;;;;N;;;;;
Packit Service 82fcde
    10FFFD;<Plane 16 Private Use, Last>;Co;0;L;;;;;N;;;;;
Packit Service 82fcde
Packit Service 82fcde
    Samples for output lines (Unicode-Value UTF-8-HEX Unicode-Char-Name):
Packit Service 82fcde
Packit Service 82fcde
    <U0010>     /x10 DATA LINK ESCAPE
Packit Service 82fcde
    <U3400>..<U343F>     /xe3/x90/x80 <CJK Ideograph Extension A>
Packit Service 82fcde
    %<UD800>     /xed/xa0/x80 <Non Private Use High Surrogate, First>
Packit Service 82fcde
    %<UDB7F>     /xed/xad/xbf <Non Private Use High Surrogate, Last>
Packit Service 82fcde
    <U0010FFC0>..<U0010FFFD>     /xf4/x8f/xbf/x80 <Plane 16 Private Use>
Packit Service 82fcde
Packit Service 82fcde
    '''
Packit Service 82fcde
    fields_start = []
Packit Service 82fcde
    for line in flines:
Packit Service 82fcde
        fields = line.split(";")
Packit Service 82fcde
         # Some characters have “<control>” as their name. We try to
Packit Service 82fcde
         # use the “Unicode 1.0 Name” (10th field in
Packit Service 82fcde
         # UnicodeData.txt) for them.
Packit Service 82fcde
         #
Packit Service 82fcde
         # The Characters U+0080, U+0081, U+0084 and U+0099 have
Packit Service 82fcde
         # “<control>” as their name but do not even have aa
Packit Service 82fcde
         # ”Unicode 1.0 Name”. We could write code to take their
Packit Service 82fcde
         # alternate names from NameAliases.txt.
Packit Service 82fcde
        if fields[1] == "<control>" and fields[10]:
Packit Service 82fcde
            fields[1] = fields[10]
Packit Service 82fcde
        # Handling code point ranges like:
Packit Service 82fcde
        #
Packit Service 82fcde
        # 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
Packit Service 82fcde
        # 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
Packit Service 82fcde
        if fields[1].endswith(', First>') and not 'Surrogate,' in fields[1]:
Packit Service 82fcde
            fields_start = fields
Packit Service 82fcde
            continue
Packit Service 82fcde
        if fields[1].endswith(', Last>') and not 'Surrogate,' in fields[1]:
Packit Service 82fcde
            process_range(fields_start[0], fields[0],
Packit Service 82fcde
                          outfile, fields[1][:-7]+'>')
Packit Service 82fcde
            fields_start = []
Packit Service 82fcde
            continue
Packit Service 82fcde
        fields_start = []
Packit Service 82fcde
        if 'Surrogate,' in fields[1]:
Packit Service 82fcde
            # Comment out the surrogates in the UTF-8 file.
Packit Service 82fcde
            # One could of course skip them completely but
Packit Service 82fcde
            # the original UTF-8 file in glibc had them as
Packit Service 82fcde
            # comments, so we keep these comment lines.
Packit Service 82fcde
            outfile.write('%')
Packit Service 82fcde
        outfile.write('{:<11s} {:<12s} {:s}\n'.format(
Packit Service 82fcde
                unicode_utils.ucs_symbol(int(fields[0], 16)),
Packit Service 82fcde
                convert_to_hex(int(fields[0], 16)),
Packit Service 82fcde
                fields[1]))
Packit Service 82fcde
Packit Service 82fcde
def convert_to_hex(code_point):
Packit Service 82fcde
    '''Converts a code point to a hexadecimal UTF-8 representation
Packit Service 82fcde
    like /x**/x**/x**.'''
Packit Service 82fcde
    # Getting UTF8 of Unicode characters.
Packit Service 82fcde
    # In Python3, .encode('UTF-8') does not work for
Packit Service 82fcde
    # surrogates. Therefore, we use this conversion table
Packit Service 82fcde
    surrogates = {
Packit Service 82fcde
        0xD800: '/xed/xa0/x80',
Packit Service 82fcde
        0xDB7F: '/xed/xad/xbf',
Packit Service 82fcde
        0xDB80: '/xed/xae/x80',
Packit Service 82fcde
        0xDBFF: '/xed/xaf/xbf',
Packit Service 82fcde
        0xDC00: '/xed/xb0/x80',
Packit Service 82fcde
        0xDFFF: '/xed/xbf/xbf',
Packit Service 82fcde
    }
Packit Service 82fcde
    if code_point in surrogates:
Packit Service 82fcde
        return surrogates[code_point]
Packit Service 82fcde
    return ''.join([
Packit Service 82fcde
        '/x{:02x}'.format(c) for c in chr(code_point).encode('UTF-8')
Packit Service 82fcde
    ])
Packit Service 82fcde
Packit Service 82fcde
def write_header_charmap(outfile):
Packit Service 82fcde
    '''Write the header on top of the CHARMAP section to the output file'''
Packit Service 82fcde
    outfile.write("<code_set_name> UTF-8\n")
Packit Service 82fcde
    outfile.write("<comment_char> %\n")
Packit Service 82fcde
    outfile.write("<escape_char> /\n")
Packit Service 82fcde
    outfile.write("<mb_cur_min> 1\n")
Packit Service 82fcde
    outfile.write("<mb_cur_max> 6\n\n")
Packit Service 82fcde
    outfile.write("% CHARMAP generated using utf8_gen.py\n")
Packit Service 82fcde
    outfile.write("% alias ISO-10646/UTF-8\n")
Packit Service 82fcde
    outfile.write("CHARMAP\n")
Packit Service 82fcde
Packit Service 82fcde
def write_header_width(outfile, unicode_version):
Packit Service 82fcde
    '''Writes the header on top of the WIDTH section to the output file'''
Packit Service 82fcde
    outfile.write('% Character width according to Unicode '
Packit Service 82fcde
                  + '{:s}.\n'.format(unicode_version))
Packit Service 82fcde
    outfile.write('% - Default width is 1.\n')
Packit Service 82fcde
    outfile.write('% - Double-width characters have width 2; generated from\n')
Packit Service 82fcde
    outfile.write('%        "grep \'^[^;]*;[WF]\' EastAsianWidth.txt"\n')
Packit Service 82fcde
    outfile.write('% - Non-spacing characters have width 0; '
Packit Service 82fcde
                  + 'generated from PropList.txt or\n')
Packit Service 82fcde
    outfile.write('%   "grep \'^[^;]*;[^;]*;[^;]*;[^;]*;NSM;\' '
Packit Service 82fcde
                  + 'UnicodeData.txt"\n')
Packit Service 82fcde
    outfile.write('% - Format control characters have width 0; '
Packit Service 82fcde
                  + 'generated from\n')
Packit Service 82fcde
    outfile.write("%   \"grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt\"\n")
Packit Service 82fcde
#   Not needed covered by Cf
Packit Service 82fcde
#    outfile.write("% - Zero width characters have width 0; generated from\n")
Packit Service 82fcde
#    outfile.write("%   \"grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt\"\n")
Packit Service 82fcde
    outfile.write("WIDTH\n")
Packit Service 82fcde
Packit Service 82fcde
def process_width(outfile, ulines, elines, plines):
Packit Service 82fcde
    '''ulines are lines from UnicodeData.txt, elines are lines from
Packit Service 82fcde
    EastAsianWidth.txt containing characters with width “W” or “F”,
Packit Service 82fcde
    plines are lines from PropList.txt which contain characters
Packit Service 82fcde
    with the property “Prepended_Concatenation_Mark”.
Packit Service 82fcde
Packit Service 82fcde
    '''
Packit Service 82fcde
    width_dict = {}
Packit Service 82fcde
    for line in elines:
Packit Service 82fcde
        fields = line.split(";")
Packit Service 82fcde
        if not '..' in fields[0]:
Packit Service 82fcde
            code_points = (fields[0], fields[0])
Packit Service 82fcde
        else:
Packit Service 82fcde
            code_points = fields[0].split("..")
Packit Service 82fcde
        for key in range(int(code_points[0], 16),
Packit Service 82fcde
                         int(code_points[1], 16)+1):
Packit Service 82fcde
            width_dict[key] = 2
Packit Service 82fcde
Packit Service 82fcde
    for line in ulines:
Packit Service 82fcde
        fields = line.split(";")
Packit Service 82fcde
        if fields[4] == "NSM" or fields[2] in ("Cf", "Me", "Mn"):
Packit Service 82fcde
            width_dict[int(fields[0], 16)] = 0
Packit Service 82fcde
Packit Service 82fcde
    for line in plines:
Packit Service 82fcde
        # Characters with the property “Prepended_Concatenation_Mark”
Packit Service 82fcde
        # should have the width 1:
Packit Service 82fcde
        fields = line.split(";")
Packit Service 82fcde
        if not '..' in fields[0]:
Packit Service 82fcde
            code_points = (fields[0], fields[0])
Packit Service 82fcde
        else:
Packit Service 82fcde
            code_points = fields[0].split("..")
Packit Service 82fcde
        for key in range(int(code_points[0], 16),
Packit Service 82fcde
                         int(code_points[1], 16)+1):
Packit Service 82fcde
            del width_dict[key] # default width is 1
Packit Service 82fcde
Packit Service 82fcde
    # handle special cases for compatibility
Packit Service 82fcde
    for key in list((0x00AD,)):
Packit Service 82fcde
        # https://www.cs.tut.fi/~jkorpela/shy.html
Packit Service 82fcde
        if key in width_dict:
Packit Service 82fcde
            del width_dict[key] # default width is 1
Packit Service 82fcde
    for key in list(range(0x1160, 0x1200)):
Packit Service 82fcde
        width_dict[key] = 0
Packit Service 82fcde
    for key in list(range(0x3248, 0x3250)):
Packit Service 82fcde
        # These are “A” which means we can decide whether to treat them
Packit Service 82fcde
        # as “W” or “N” based on context:
Packit Service 82fcde
        # http://www.unicode.org/mail-arch/unicode-ml/y2017-m08/0023.html
Packit Service 82fcde
        # For us, “W” seems better.
Packit Service 82fcde
        width_dict[key] = 2
Packit Service 82fcde
    for key in list(range(0x4DC0, 0x4E00)):
Packit Service 82fcde
        width_dict[key] = 2
Packit Service 82fcde
Packit Service 82fcde
    same_width_lists = []
Packit Service 82fcde
    current_width_list = []
Packit Service 82fcde
    for key in sorted(width_dict):
Packit Service 82fcde
        if not current_width_list:
Packit Service 82fcde
            current_width_list = [key]
Packit Service 82fcde
        elif (key == current_width_list[-1] + 1
Packit Service 82fcde
              and width_dict[key] == width_dict[current_width_list[0]]):
Packit Service 82fcde
            current_width_list.append(key)
Packit Service 82fcde
        else:
Packit Service 82fcde
            same_width_lists.append(current_width_list)
Packit Service 82fcde
            current_width_list = [key]
Packit Service 82fcde
    if current_width_list:
Packit Service 82fcde
        same_width_lists.append(current_width_list)
Packit Service 82fcde
Packit Service 82fcde
    for same_width_list in same_width_lists:
Packit Service 82fcde
        if len(same_width_list) == 1:
Packit Service 82fcde
            outfile.write('{:s}\t{:d}\n'.format(
Packit Service 82fcde
                unicode_utils.ucs_symbol(same_width_list[0]),
Packit Service 82fcde
                width_dict[same_width_list[0]]))
Packit Service 82fcde
        else:
Packit Service 82fcde
            outfile.write('{:s}...{:s}\t{:d}\n'.format(
Packit Service 82fcde
                unicode_utils.ucs_symbol(same_width_list[0]),
Packit Service 82fcde
                unicode_utils.ucs_symbol(same_width_list[-1]),
Packit Service 82fcde
                width_dict[same_width_list[0]]))
Packit Service 82fcde
Packit Service 82fcde
if __name__ == "__main__":
Packit Service 82fcde
    PARSER = argparse.ArgumentParser(
Packit Service 82fcde
        description='''
Packit Service 82fcde
        Generate a UTF-8 file from UnicodeData.txt, EastAsianWidth.txt, and PropList.txt.
Packit Service 82fcde
        ''')
Packit Service 82fcde
    PARSER.add_argument(
Packit Service 82fcde
        '-u', '--unicode_data_file',
Packit Service 82fcde
        nargs='?',
Packit Service 82fcde
        type=str,
Packit Service 82fcde
        default='UnicodeData.txt',
Packit Service 82fcde
        help=('The UnicodeData.txt file to read, '
Packit Service 82fcde
              + 'default: %(default)s'))
Packit Service 82fcde
    PARSER.add_argument(
Packit Service 82fcde
        '-e', '--east_asian_with_file',
Packit Service 82fcde
        nargs='?',
Packit Service 82fcde
        type=str,
Packit Service 82fcde
        default='EastAsianWidth.txt',
Packit Service 82fcde
        help=('The EastAsianWidth.txt file to read, '
Packit Service 82fcde
              + 'default: %(default)s'))
Packit Service 82fcde
    PARSER.add_argument(
Packit Service 82fcde
        '-p', '--prop_list_file',
Packit Service 82fcde
        nargs='?',
Packit Service 82fcde
        type=str,
Packit Service 82fcde
        default='PropList.txt',
Packit Service 82fcde
        help=('The PropList.txt file to read, '
Packit Service 82fcde
              + 'default: %(default)s'))
Packit Service 82fcde
    PARSER.add_argument(
Packit Service 82fcde
        '--unicode_version',
Packit Service 82fcde
        nargs='?',
Packit Service 82fcde
        required=True,
Packit Service 82fcde
        type=str,
Packit Service 82fcde
        help='The Unicode version of the input files used.')
Packit Service 82fcde
    ARGS = PARSER.parse_args()
Packit Service 82fcde
Packit Service 82fcde
    with open(ARGS.unicode_data_file, mode='r') as UNIDATA_FILE:
Packit Service 82fcde
        UNICODE_DATA_LINES = UNIDATA_FILE.readlines()
Packit Service 82fcde
    with open(ARGS.east_asian_with_file, mode='r') as EAST_ASIAN_WIDTH_FILE:
Packit Service 82fcde
        EAST_ASIAN_WIDTH_LINES = []
Packit Service 82fcde
        for LINE in EAST_ASIAN_WIDTH_FILE:
Packit Service 82fcde
            # If characters from EastAasianWidth.txt which are from
Packit Service 82fcde
            # from reserved ranges (i.e. not yet assigned code points)
Packit Service 82fcde
            # are added to the WIDTH section of the UTF-8 file, then
Packit Service 82fcde
            # “make check” produces “Unknown Character” errors for
Packit Service 82fcde
            # these code points because such unassigned code points
Packit Service 82fcde
            # are not in the CHARMAP section of the UTF-8 file.
Packit Service 82fcde
            #
Packit Service 82fcde
            # Therefore, we skip all reserved code points when reading
Packit Service 82fcde
            # the EastAsianWidth.txt file.
Packit Service 82fcde
            if re.match(r'.*<reserved-.+>\.\.<reserved-.+>.*', LINE):
Packit Service 82fcde
                continue
Packit Service 82fcde
            if re.match(r'^[^;]*;[WF]', LINE):
Packit Service 82fcde
                EAST_ASIAN_WIDTH_LINES.append(LINE.strip())
Packit Service 82fcde
    with open(ARGS.prop_list_file, mode='r') as PROP_LIST_FILE:
Packit Service 82fcde
        PROP_LIST_LINES = []
Packit Service 82fcde
        for LINE in PROP_LIST_FILE:
Packit Service 82fcde
            if re.match(r'^[^;]*;[\s]*Prepended_Concatenation_Mark', LINE):
Packit Service 82fcde
                PROP_LIST_LINES.append(LINE.strip())
Packit Service 82fcde
    with open('UTF-8', mode='w') as OUTFILE:
Packit Service 82fcde
        # Processing UnicodeData.txt and write CHARMAP to UTF-8 file
Packit Service 82fcde
        write_header_charmap(OUTFILE)
Packit Service 82fcde
        process_charmap(UNICODE_DATA_LINES, OUTFILE)
Packit Service 82fcde
        OUTFILE.write("END CHARMAP\n\n")
Packit Service 82fcde
        # Processing EastAsianWidth.txt and write WIDTH to UTF-8 file
Packit Service 82fcde
        write_header_width(OUTFILE, ARGS.unicode_version)
Packit Service 82fcde
        process_width(OUTFILE,
Packit Service 82fcde
                      UNICODE_DATA_LINES,
Packit Service 82fcde
                      EAST_ASIAN_WIDTH_LINES,
Packit Service 82fcde
                      PROP_LIST_LINES)
Packit Service 82fcde
        OUTFILE.write("END WIDTH\n")