Blame scripts/fetch_language_identifiers.py

Packit Service b1ea74
#!/bin/env python3
Packit Service b1ea74
#
Packit Service b1ea74
# This is a helper script that fetches the current language and keyboard tables
Packit Service b1ea74
# and writes the result to a C compatible struct.
Packit Service b1ea74
#
Packit Service b1ea74
import os
Packit Service b1ea74
import sys
Packit Service b1ea74
import requests
Packit Service b1ea74
import numpy as np
Packit Service b1ea74
import traceback
Packit Service b1ea74
from bs4 import BeautifulSoup
Packit Service b1ea74
from bs4 import element
Packit Service b1ea74
Packit Service b1ea74
intro = '''/* This file is auto generated from
Packit Service b1ea74
 *
Packit Service b1ea74
 * https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings
Packit Service b1ea74
 *
Packit Service b1ea74
 * please do not edit but use ./scripts/fetch_language_identifiers.py to regenerate!
Packit Service b1ea74
 */
Packit Service b1ea74
Packit Service b1ea74
'''
Packit Service b1ea74
Packit Service b1ea74
def parse_html(text):
Packit Service b1ea74
    soup = BeautifulSoup(text, 'html.parser')
Packit Service b1ea74
    table = soup.find("table")
Packit Service b1ea74
    head = table.find('thead').find('tr')
Packit Service b1ea74
    headers = []
Packit Service b1ea74
    for th in head:
Packit Service b1ea74
        if type(th) == element.Tag:
Packit Service b1ea74
            headers += th
Packit Service b1ea74
Packit Service b1ea74
    body = table.find('tbody')
Packit Service b1ea74
    languages = []
Packit Service b1ea74
Packit Service b1ea74
    for tr in body:
Packit Service b1ea74
        if type(tr) == element.Tag:
Packit Service b1ea74
            entry = []
Packit Service b1ea74
            for th in tr:
Packit Service b1ea74
                if type(th) == element.Tag:
Packit Service b1ea74
                    if th.string:
Packit Service b1ea74
                        entry += [th.string]
Packit Service b1ea74
                    else:
Packit Service b1ea74
                        entry += ['']
Packit Service b1ea74
            languages += [entry]
Packit Service b1ea74
    return [headers, languages]
Packit Service b1ea74
Packit Service b1ea74
def is_base(num, base):
Packit Service b1ea74
    try:
Packit Service b1ea74
        v = int(num, base)
Packit Service b1ea74
        return True
Packit Service b1ea74
    except ValueError:
Packit Service b1ea74
        return False
Packit Service b1ea74
Packit Service b1ea74
def padhexa(v):
Packit Service b1ea74
    s = hex(v)
Packit Service b1ea74
    return '0x' + s[2:].zfill(8)
Packit Service b1ea74
Packit Service b1ea74
def write_struct(fp, struct, name, url, base, inv = False, typemap = None):
Packit Service b1ea74
    li = requests.get(url)
Packit Service b1ea74
    if li.status_code != requests.codes.ok:
Packit Service b1ea74
        print('Could not fetch ' + str(url) + ', reponse code ' + str(li.status_code))
Packit Service b1ea74
        sys.exit(1)
Packit Service b1ea74
    headers, languages = parse_html(li.text)
Packit Service b1ea74
Packit Service b1ea74
    fp.write('const ' + str(struct) + ' ' + str(name) + '[] =\n')
Packit Service b1ea74
    fp.write('{\n')
Packit Service b1ea74
    fp.write('/* ')
Packit Service b1ea74
    for h in headers:
Packit Service b1ea74
        fp.write('\t[')
Packit Service b1ea74
        fp.write(h)
Packit Service b1ea74
        fp.write(']\t')
Packit Service b1ea74
    fp.write('*/\n')
Packit Service b1ea74
    last = [None] * 32
Packit Service b1ea74
    for language in languages:
Packit Service b1ea74
        fp.write('\t{ ')
Packit Service b1ea74
        line = ''
Packit Service b1ea74
        pos = 0
Packit Service b1ea74
        for e in language:
Packit Service b1ea74
            try:
Packit Service b1ea74
                v = int(e, base=base)
Packit Service b1ea74
                switcher = {
Packit Service b1ea74
                        0: padhexa(v),
Packit Service b1ea74
                        2: bin(v),
Packit Service b1ea74
                        8: oct(v),
Packit Service b1ea74
                        10: str(v),
Packit Service b1ea74
                        16: padhexa(v)
Packit Service b1ea74
                        }
Packit Service b1ea74
                h = str(switcher.get(base))
Packit Service b1ea74
                if h != "None":
Packit Service b1ea74
                  last[pos] = h
Packit Service b1ea74
                if inv:
Packit Service b1ea74
                  line = h + ', ' + line
Packit Service b1ea74
                else:
Packit Service b1ea74
                  line += h + ', '
Packit Service b1ea74
            except ValueError:
Packit Service b1ea74
                if typemap and typemap[pos] != str:
Packit Service b1ea74
                    line += str(last[pos]) + ',\t'
Packit Service b1ea74
                else:
Packit Service b1ea74
                    if e == "":
Packit Service b1ea74
                      line += '"' + str(last[pos]) + '",\t'
Packit Service b1ea74
                    else:
Packit Service b1ea74
                      line += '"' + e + '",\t'
Packit Service b1ea74
                      if e != "None":
Packit Service b1ea74
                        last[pos] = str(e)
Packit Service b1ea74
            pos = pos + 1
Packit Service b1ea74
        fp.write(line[:-2] + '},\n')
Packit Service b1ea74
    fp.write('};\n')
Packit Service b1ea74
    fp.write('\n')
Packit Service b1ea74
Packit Service b1ea74
def update_lang_identifiers(fp):
Packit Service b1ea74
#   [Language identifier]   [Primary language]    [Prim. lang. identifier]    [Prim. lang. symbol]    [Sublanguage]   [Sublang. identifier]   [Sublang. symbol]
Packit Service b1ea74
    write_struct(fp, 'LanguageIdentifier', 'language_identifiers', 'https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings', 16, False, [int, str, int, str, str, int, str])
Packit Service b1ea74
Packit Service b1ea74
def update_code_pages(fp):
Packit Service b1ea74
    write_struct(fp, 'CodePage', 'code_pages', 'https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers', 10)
Packit Service b1ea74
Packit Service b1ea74
def update_input_locales(fp):
Packit Service b1ea74
    write_struct(fp, 'KeyboardIdentifier', 'keyboard_identifiers', 'https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-vista/cc766503(v=ws.10)', 0)
Packit Service b1ea74
    write_struct(fp, 'RDP_KEYBOARD_LAYOUT', 'RDP_KEYBOARD_LAYOUT_TABLE', 'https://docs.microsoft.com/en-us/windows-hardware/manufacture/desktop/windows-language-pack-default-values', 16, True)
Packit Service b1ea74
Packit Service b1ea74
try:
Packit Service b1ea74
    with open('language_identifiers.c', 'w') as fp:
Packit Service b1ea74
        fp.write(intro)
Packit Service b1ea74
        update_lang_identifiers(fp)
Packit Service b1ea74
        update_code_pages(fp)
Packit Service b1ea74
        update_input_locales(fp)
Packit Service b1ea74
except:
Packit Service b1ea74
    print('exception cought')
Packit Service b1ea74
    traceback.print_exc()