Blame scripts/fetch_language_identifiers.py

Packit Service 5a9772
#!/bin/env python3
Packit Service 5a9772
#
Packit Service 5a9772
# This is a helper script that fetches the current language and keyboard tables
Packit Service 5a9772
# and writes the result to a C compatible struct.
Packit Service 5a9772
#
Packit Service 5a9772
import os
Packit Service 5a9772
import sys
Packit Service 5a9772
import requests
Packit Service 5a9772
import numpy as np
Packit Service 5a9772
import traceback
Packit Service 5a9772
from bs4 import BeautifulSoup
Packit Service 5a9772
from bs4 import element
Packit Service 5a9772
Packit Service 5a9772
intro = '''/* This file is auto generated from
Packit Service 5a9772
 *
Packit Service 5a9772
 * https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings
Packit Service 5a9772
 *
Packit Service 5a9772
 * please do not edit but use ./scripts/fetch_language_identifiers.py to regenerate!
Packit Service 5a9772
 */
Packit Service 5a9772
Packit Service 5a9772
'''
Packit Service 5a9772
Packit Service 5a9772
def parse_html(text):
Packit Service 5a9772
    soup = BeautifulSoup(text, 'html.parser')
Packit Service 5a9772
    table = soup.find("table")
Packit Service 5a9772
    head = table.find('thead').find('tr')
Packit Service 5a9772
    headers = []
Packit Service 5a9772
    for th in head:
Packit Service 5a9772
        if type(th) == element.Tag:
Packit Service 5a9772
            headers += th
Packit Service 5a9772
Packit Service 5a9772
    body = table.find('tbody')
Packit Service 5a9772
    languages = []
Packit Service 5a9772
Packit Service 5a9772
    for tr in body:
Packit Service 5a9772
        if type(tr) == element.Tag:
Packit Service 5a9772
            entry = []
Packit Service 5a9772
            for th in tr:
Packit Service 5a9772
                if type(th) == element.Tag:
Packit Service 5a9772
                    if th.string:
Packit Service 5a9772
                        entry += [th.string]
Packit Service 5a9772
                    else:
Packit Service 5a9772
                        entry += ['']
Packit Service 5a9772
            languages += [entry]
Packit Service 5a9772
    return [headers, languages]
Packit Service 5a9772
Packit Service 5a9772
def is_base(num, base):
Packit Service 5a9772
    try:
Packit Service 5a9772
        v = int(num, base)
Packit Service 5a9772
        return True
Packit Service 5a9772
    except ValueError:
Packit Service 5a9772
        return False
Packit Service 5a9772
Packit Service 5a9772
def padhexa(v):
Packit Service 5a9772
    s = hex(v)
Packit Service 5a9772
    return '0x' + s[2:].zfill(8)
Packit Service 5a9772
Packit Service 5a9772
def write_struct(fp, struct, name, url, base, inv = False, typemap = None):
Packit Service 5a9772
    li = requests.get(url)
Packit Service 5a9772
    if li.status_code != requests.codes.ok:
Packit Service 5a9772
        print('Could not fetch ' + str(url) + ', reponse code ' + str(li.status_code))
Packit Service 5a9772
        sys.exit(1)
Packit Service 5a9772
    headers, languages = parse_html(li.text)
Packit Service 5a9772
Packit Service 5a9772
    fp.write('const ' + str(struct) + ' ' + str(name) + '[] =\n')
Packit Service 5a9772
    fp.write('{\n')
Packit Service 5a9772
    fp.write('/* ')
Packit Service 5a9772
    for h in headers:
Packit Service 5a9772
        fp.write('\t[')
Packit Service 5a9772
        fp.write(h)
Packit Service 5a9772
        fp.write(']\t')
Packit Service 5a9772
    fp.write('*/\n')
Packit Service 5a9772
    last = [None] * 32
Packit Service 5a9772
    for language in languages:
Packit Service 5a9772
        fp.write('\t{ ')
Packit Service 5a9772
        line = ''
Packit Service 5a9772
        pos = 0
Packit Service 5a9772
        for e in language:
Packit Service 5a9772
            try:
Packit Service 5a9772
                v = int(e, base=base)
Packit Service 5a9772
                switcher = {
Packit Service 5a9772
                        0: padhexa(v),
Packit Service 5a9772
                        2: bin(v),
Packit Service 5a9772
                        8: oct(v),
Packit Service 5a9772
                        10: str(v),
Packit Service 5a9772
                        16: padhexa(v)
Packit Service 5a9772
                        }
Packit Service 5a9772
                h = str(switcher.get(base))
Packit Service 5a9772
                if h != "None":
Packit Service 5a9772
                  last[pos] = h
Packit Service 5a9772
                if inv:
Packit Service 5a9772
                  line = h + ', ' + line
Packit Service 5a9772
                else:
Packit Service 5a9772
                  line += h + ', '
Packit Service 5a9772
            except ValueError:
Packit Service 5a9772
                if typemap and typemap[pos] != str:
Packit Service 5a9772
                    line += str(last[pos]) + ',\t'
Packit Service 5a9772
                else:
Packit Service 5a9772
                    if e == "":
Packit Service 5a9772
                      line += '"' + str(last[pos]) + '",\t'
Packit Service 5a9772
                    else:
Packit Service 5a9772
                      line += '"' + e + '",\t'
Packit Service 5a9772
                      if e != "None":
Packit Service 5a9772
                        last[pos] = str(e)
Packit Service 5a9772
            pos = pos + 1
Packit Service 5a9772
        fp.write(line[:-2] + '},\n')
Packit Service 5a9772
    fp.write('};\n')
Packit Service 5a9772
    fp.write('\n')
Packit Service 5a9772
Packit Service 5a9772
def update_lang_identifiers(fp):
Packit Service 5a9772
#   [Language identifier]   [Primary language]    [Prim. lang. identifier]    [Prim. lang. symbol]    [Sublanguage]   [Sublang. identifier]   [Sublang. symbol]
Packit Service 5a9772
    write_struct(fp, 'LanguageIdentifier', 'language_identifiers', 'https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings', 16, False, [int, str, int, str, str, int, str])
Packit Service 5a9772
Packit Service 5a9772
def update_code_pages(fp):
Packit Service 5a9772
    write_struct(fp, 'CodePage', 'code_pages', 'https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers', 10)
Packit Service 5a9772
Packit Service 5a9772
def update_input_locales(fp):
Packit Service 5a9772
    write_struct(fp, 'KeyboardIdentifier', 'keyboard_identifiers', 'https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-vista/cc766503(v=ws.10)', 0)
Packit Service 5a9772
    write_struct(fp, 'RDP_KEYBOARD_LAYOUT', 'RDP_KEYBOARD_LAYOUT_TABLE', 'https://docs.microsoft.com/en-us/windows-hardware/manufacture/desktop/windows-language-pack-default-values', 16, True)
Packit Service 5a9772
Packit Service 5a9772
try:
Packit Service 5a9772
    with open('language_identifiers.c', 'w') as fp:
Packit Service 5a9772
        fp.write(intro)
Packit Service 5a9772
        update_lang_identifiers(fp)
Packit Service 5a9772
        update_code_pages(fp)
Packit Service 5a9772
        update_input_locales(fp)
Packit Service 5a9772
except:
Packit Service 5a9772
    print('exception cought')
Packit Service 5a9772
    traceback.print_exc()