Blame scripts/fetch_language_identifiers.py

Packit Service fa4841
#!/bin/env python3
Packit Service fa4841
#
Packit Service fa4841
# This is a helper script that fetches the current language and keyboard tables
Packit Service fa4841
# and writes the result to a C compatible struct.
Packit Service fa4841
#
Packit Service fa4841
import os
Packit Service fa4841
import sys
Packit Service fa4841
import requests
Packit Service fa4841
import numpy as np
Packit Service fa4841
import traceback
Packit Service fa4841
from bs4 import BeautifulSoup
Packit Service fa4841
from bs4 import element
Packit Service fa4841
Packit Service fa4841
intro = '''/* This file is auto generated from
Packit Service fa4841
 *
Packit Service fa4841
 * https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings
Packit Service fa4841
 *
Packit Service fa4841
 * please do not edit but use ./scripts/fetch_language_identifiers.py to regenerate!
Packit Service fa4841
 */
Packit Service fa4841
Packit Service fa4841
'''
Packit Service fa4841
Packit Service fa4841
def parse_html(text):
Packit Service fa4841
    soup = BeautifulSoup(text, 'html.parser')
Packit Service fa4841
    table = soup.find("table")
Packit Service fa4841
    head = table.find('thead').find('tr')
Packit Service fa4841
    headers = []
Packit Service fa4841
    for th in head:
Packit Service fa4841
        if type(th) == element.Tag:
Packit Service fa4841
            headers += th
Packit Service fa4841
Packit Service fa4841
    body = table.find('tbody')
Packit Service fa4841
    languages = []
Packit Service fa4841
Packit Service fa4841
    for tr in body:
Packit Service fa4841
        if type(tr) == element.Tag:
Packit Service fa4841
            entry = []
Packit Service fa4841
            for th in tr:
Packit Service fa4841
                if type(th) == element.Tag:
Packit Service fa4841
                    if th.string:
Packit Service fa4841
                        entry += [th.string]
Packit Service fa4841
                    else:
Packit Service fa4841
                        entry += ['']
Packit Service fa4841
            languages += [entry]
Packit Service fa4841
    return [headers, languages]
Packit Service fa4841
Packit Service fa4841
def is_base(num, base):
Packit Service fa4841
    try:
Packit Service fa4841
        v = int(num, base)
Packit Service fa4841
        return True
Packit Service fa4841
    except ValueError:
Packit Service fa4841
        return False
Packit Service fa4841
Packit Service fa4841
def padhexa(v):
Packit Service fa4841
    s = hex(v)
Packit Service fa4841
    return '0x' + s[2:].zfill(8)
Packit Service fa4841
Packit Service fa4841
def write_struct(fp, struct, name, url, base, inv = False, typemap = None):
Packit Service fa4841
    li = requests.get(url)
Packit Service fa4841
    if li.status_code != requests.codes.ok:
Packit Service fa4841
        print('Could not fetch ' + str(url) + ', reponse code ' + str(li.status_code))
Packit Service fa4841
        sys.exit(1)
Packit Service fa4841
    headers, languages = parse_html(li.text)
Packit Service fa4841
Packit Service fa4841
    fp.write('const ' + str(struct) + ' ' + str(name) + '[] =\n')
Packit Service fa4841
    fp.write('{\n')
Packit Service fa4841
    fp.write('/* ')
Packit Service fa4841
    for h in headers:
Packit Service fa4841
        fp.write('\t[')
Packit Service fa4841
        fp.write(h)
Packit Service fa4841
        fp.write(']\t')
Packit Service fa4841
    fp.write('*/\n')
Packit Service fa4841
    last = [None] * 32
Packit Service fa4841
    for language in languages:
Packit Service fa4841
        fp.write('\t{ ')
Packit Service fa4841
        line = ''
Packit Service fa4841
        pos = 0
Packit Service fa4841
        for e in language:
Packit Service fa4841
            try:
Packit Service fa4841
                v = int(e, base=base)
Packit Service fa4841
                switcher = {
Packit Service fa4841
                        0: padhexa(v),
Packit Service fa4841
                        2: bin(v),
Packit Service fa4841
                        8: oct(v),
Packit Service fa4841
                        10: str(v),
Packit Service fa4841
                        16: padhexa(v)
Packit Service fa4841
                        }
Packit Service fa4841
                h = str(switcher.get(base))
Packit Service fa4841
                if h != "None":
Packit Service fa4841
                  last[pos] = h
Packit Service fa4841
                if inv:
Packit Service fa4841
                  line = h + ', ' + line
Packit Service fa4841
                else:
Packit Service fa4841
                  line += h + ', '
Packit Service fa4841
            except ValueError:
Packit Service fa4841
                if typemap and typemap[pos] != str:
Packit Service fa4841
                    line += str(last[pos]) + ',\t'
Packit Service fa4841
                else:
Packit Service fa4841
                    if e == "":
Packit Service fa4841
                      line += '"' + str(last[pos]) + '",\t'
Packit Service fa4841
                    else:
Packit Service fa4841
                      line += '"' + e + '",\t'
Packit Service fa4841
                      if e != "None":
Packit Service fa4841
                        last[pos] = str(e)
Packit Service fa4841
            pos = pos + 1
Packit Service fa4841
        fp.write(line[:-2] + '},\n')
Packit Service fa4841
    fp.write('};\n')
Packit Service fa4841
    fp.write('\n')
Packit Service fa4841
Packit Service fa4841
def update_lang_identifiers(fp):
Packit Service fa4841
#   [Language identifier]   [Primary language]    [Prim. lang. identifier]    [Prim. lang. symbol]    [Sublanguage]   [Sublang. identifier]   [Sublang. symbol]
Packit Service fa4841
    write_struct(fp, 'LanguageIdentifier', 'language_identifiers', 'https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings', 16, False, [int, str, int, str, str, int, str])
Packit Service fa4841
Packit Service fa4841
def update_code_pages(fp):
Packit Service fa4841
    write_struct(fp, 'CodePage', 'code_pages', 'https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers', 10)
Packit Service fa4841
Packit Service fa4841
def update_input_locales(fp):
Packit Service fa4841
    write_struct(fp, 'KeyboardIdentifier', 'keyboard_identifiers', 'https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-vista/cc766503(v=ws.10)', 0)
Packit Service fa4841
    write_struct(fp, 'RDP_KEYBOARD_LAYOUT', 'RDP_KEYBOARD_LAYOUT_TABLE', 'https://docs.microsoft.com/en-us/windows-hardware/manufacture/desktop/windows-language-pack-default-values', 16, True)
Packit Service fa4841
Packit Service fa4841
try:
Packit Service fa4841
    with open('language_identifiers.c', 'w') as fp:
Packit Service fa4841
        fp.write(intro)
Packit Service fa4841
        update_lang_identifiers(fp)
Packit Service fa4841
        update_code_pages(fp)
Packit Service fa4841
        update_input_locales(fp)
Packit Service fa4841
except:
Packit Service fa4841
    print('exception cought')
Packit Service fa4841
    traceback.print_exc()