Blame engine/iso639converter.py

Packit Service 1d8f1c
#!/usr/bin/python
Packit Service 1d8f1c
# vim:set fileencoding=utf-8 et sts=4 sw=4:
Packit Service 1d8f1c
#
Packit Service 1d8f1c
# ibus - Intelligent Input Bus for Linux / Unix OS
Packit Service 1d8f1c
#
Packit Service 1d8f1c
# Copyright © 2016 Takao Fujiwara <takao.fujiwara1@gmail.com>
Packit Service 1d8f1c
#
Packit Service 1d8f1c
# This library is free software; you can redistribute it and/or
Packit Service 1d8f1c
# modify it under the terms of the GNU Lesser General Public
Packit Service 1d8f1c
# License as published by the Free Software Foundation; either
Packit Service 1d8f1c
# version 2.1 of the License, or (at your option) any later version.
Packit Service 1d8f1c
#
Packit Service 1d8f1c
# This library is distributed in the hope that it will be useful,
Packit Service 1d8f1c
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 1d8f1c
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 1d8f1c
# Lesser General Public License for more details.
Packit Service 1d8f1c
#
Packit Service 1d8f1c
# You should have received a copy of the GNU Lesser General Public
Packit Service 1d8f1c
# License along with this library. If not, see <http://www.gnu.org/licenses/>.
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
# This script converts ISO 639-2 of three characters to ISO 639-1 of two
Packit Service 1d8f1c
# characters in simple.xml.
Packit Service 1d8f1c
# E.g. "eng" to "en"
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
from xml.sax import make_parser as sax_make_parser
Packit Service 1d8f1c
from xml.sax.handler import feature_namespaces as sax_feature_namespaces
Packit Service 1d8f1c
from xml.sax.saxutils import XMLFilterBase, XMLGenerator
Packit Service 1d8f1c
from xml.sax._exceptions import SAXParseException
Packit Service 1d8f1c
Packit Service 1d8f1c
import codecs
Packit Service 1d8f1c
import getopt
Packit Service 1d8f1c
import io
Packit Service 1d8f1c
import os
Packit Service 1d8f1c
import sys
Packit Service 1d8f1c
Packit Service 1d8f1c
INSTALLED_SIMPLE_XML = '/usr/share/ibus/component/simple.xml'
Packit Service 1d8f1c
PY3K = sys.version_info >= (3, 0)
Packit Service 1d8f1c
Packit Service 1d8f1c
if PY3K:
Packit Service 1d8f1c
    from io import StringIO
Packit Service 1d8f1c
else:
Packit Service 1d8f1c
    # io.StringIO does not work with XMLGenerator
Packit Service 1d8f1c
    from cStringIO import StringIO
Packit Service 1d8f1c
    # iso_639.xml includes UTF-8
Packit Service 1d8f1c
    reload(sys)
Packit Service 1d8f1c
    sys.setdefaultencoding('utf-8')
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
def usage(prgname):
Packit Service 1d8f1c
    print('''\
Packit Service 1d8f1c
Usage:
Packit Service 1d8f1c
  %s [OPTION...]
Packit Service 1d8f1c
Packit Service 1d8f1c
Options:
Packit Service 1d8f1c
  -h, --help                         Show this message
Packit Service 1d8f1c
  -i, --input=SIMPLE_XML             Load SIMPLE_XML file (default is:
Packit Service 1d8f1c
                                         %s)
Packit Service 1d8f1c
  -o, --output=FILE                  Output FILE (default is stdout)
Packit Service 1d8f1c
''' % (prgname, INSTALLED_SIMPLE_XML))
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
class ISO639XML(XMLFilterBase):
Packit Service 1d8f1c
    def __init__(self, parser=None):
Packit Service 1d8f1c
        self.__code2to1 = {}
Packit Service 1d8f1c
        self.__codetoname = {}
Packit Service 1d8f1c
        XMLFilterBase.__init__(self, parser)
Packit Service 1d8f1c
    def startElement(self, name, attrs):
Packit Service 1d8f1c
        if name != 'iso_639_entry':
Packit Service 1d8f1c
            return
Packit Service 1d8f1c
        n = attrs.get('name')
Packit Service 1d8f1c
        iso639_1 = attrs.get('iso_639_1_code')
Packit Service 1d8f1c
        iso639_2b = attrs.get('iso_639_2B_code')
Packit Service 1d8f1c
        iso639_2t = attrs.get('iso_639_2T_code')
Packit Service 1d8f1c
        if iso639_1 != None:
Packit Service 1d8f1c
            self.__codetoname[iso639_1] = n
Packit Service 1d8f1c
            if iso639_2b != None:
Packit Service 1d8f1c
                self.__code2to1[iso639_2b] = iso639_1
Packit Service 1d8f1c
                self.__codetoname[iso639_2b] = n
Packit Service 1d8f1c
            if iso639_2t != None and iso639_2b != iso639_2t:
Packit Service 1d8f1c
                self.__code2to1[iso639_2t] = iso639_1
Packit Service 1d8f1c
                self.__codetoname[iso639_2t] = n
Packit Service 1d8f1c
    def code2to1(self, iso639_2):
Packit Service 1d8f1c
        try:
Packit Service 1d8f1c
            return self.__code2to1[iso639_2]
Packit Service 1d8f1c
        except KeyError:
Packit Service 1d8f1c
            return None
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
class IBusComponentXML(XMLFilterBase):
Packit Service 1d8f1c
    def __init__(self, parser=None, downstream=None, iso639=None):
Packit Service 1d8f1c
        XMLFilterBase.__init__(self, parser)
Packit Service 1d8f1c
        self.__downstream = downstream
Packit Service 1d8f1c
        self.__iso639 = iso639
Packit Service 1d8f1c
        self.__is_language = False
Packit Service 1d8f1c
    def startDocument(self):
Packit Service 1d8f1c
        if self.__downstream:
Packit Service 1d8f1c
            self.__downstream.startDocument()
Packit Service 1d8f1c
    def endDocument(self):
Packit Service 1d8f1c
        if self.__downstream:
Packit Service 1d8f1c
            self.__downstream.endDocument()
Packit Service 1d8f1c
    def startElement(self, name, attrs):
Packit Service 1d8f1c
        if name == 'language':
Packit Service 1d8f1c
            self.__is_language = True
Packit Service 1d8f1c
        if self.__downstream:
Packit Service 1d8f1c
            self.__downstream.startElement(name, attrs)
Packit Service 1d8f1c
    def endElement(self, name):
Packit Service 1d8f1c
        if name == 'language':
Packit Service 1d8f1c
            self.__is_language = False
Packit Service 1d8f1c
        if self.__downstream:
Packit Service 1d8f1c
            self.__downstream.endElement(name)
Packit Service 1d8f1c
    def characters(self, text):
Packit Service 1d8f1c
        if self.__is_language:
Packit Service 1d8f1c
            if self.__iso639:
Packit Service 1d8f1c
                iso639_1 = self.__iso639.code2to1(text)
Packit Service 1d8f1c
                if iso639_1 != None:
Packit Service 1d8f1c
                    text = iso639_1
Packit Service 1d8f1c
        if self.__downstream:
Packit Service 1d8f1c
            self.__downstream.characters(text)
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
class ConvertEngineXML():
Packit Service 1d8f1c
    def __init__(self, path, iso639=None):
Packit Service 1d8f1c
        self.__path = path
Packit Service 1d8f1c
        self.__iso639 = iso639
Packit Service 1d8f1c
Packit Service 1d8f1c
        self.__result = StringIO()
Packit Service 1d8f1c
        downstream = XMLGenerator(self.__result, 'utf-8')
Packit Service 1d8f1c
        self.__load(downstream)
Packit Service 1d8f1c
Packit Service 1d8f1c
    def __load(self, downstream=None):
Packit Service 1d8f1c
        parser = sax_make_parser()
Packit Service 1d8f1c
        parser.setFeature(sax_feature_namespaces, 0)
Packit Service 1d8f1c
        self.__handler = IBusComponentXML(parser, downstream, self.__iso639)
Packit Service 1d8f1c
        parser.setContentHandler(self.__handler)
Packit Service 1d8f1c
        f = codecs.open(self.__path, 'r', encoding='utf-8')
Packit Service 1d8f1c
        try:
Packit Service 1d8f1c
            parser.parse(f)
Packit Service 1d8f1c
        except SAXParseException:
Packit Service 1d8f1c
            print('Error: Invalid file format: %s' % path)
Packit Service 1d8f1c
        finally:
Packit Service 1d8f1c
            f.close()
Packit Service 1d8f1c
    def write(self, output=None):
Packit Service 1d8f1c
        if output != None:
Packit Service 1d8f1c
            od = codecs.open(output, 'w', encoding='utf-8')
Packit Service 1d8f1c
        else:
Packit Service 1d8f1c
            if PY3K:
Packit Service 1d8f1c
                od = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
Packit Service 1d8f1c
            else:
Packit Service 1d8f1c
                od = codecs.getwriter('utf-8')(sys.stdout)
Packit Service 1d8f1c
        contents = self.__result.getvalue()
Packit Service 1d8f1c
        od.write(contents)
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
def parse_iso639(path):
Packit Service 1d8f1c
    f = codecs.open(path, 'r', encoding='utf-8')
Packit Service 1d8f1c
    parser = sax_make_parser()
Packit Service 1d8f1c
    parser.setFeature(sax_feature_namespaces, 0)
Packit Service 1d8f1c
    handler = ISO639XML(parser)
Packit Service 1d8f1c
    parser.setContentHandler(handler)
Packit Service 1d8f1c
    try:
Packit Service 1d8f1c
        parser.parse(f)
Packit Service 1d8f1c
    except SAXParseException:
Packit Service 1d8f1c
        print('Error: Invalid file format: %s' % path)
Packit Service 1d8f1c
    finally:
Packit Service 1d8f1c
        f.close()
Packit Service 1d8f1c
    return handler
Packit Service 1d8f1c
Packit Service 1d8f1c
Packit Service 1d8f1c
if __name__ == '__main__':
Packit Service 1d8f1c
    prgname = os.path.basename(sys.argv[0])
Packit Service 1d8f1c
    try:
Packit Service 1d8f1c
        opts, args = getopt.getopt(sys.argv[1:],
Packit Service 1d8f1c
                                   'hi:o:',
Packit Service 1d8f1c
                                   ['help', 'input=', 'output='])
Packit Service 1d8f1c
    except getopt.GetoptError as err:
Packit Service 1d8f1c
        print(err)
Packit Service 1d8f1c
        usage(prgname)
Packit Service 1d8f1c
        sys.exit(2)
Packit Service 1d8f1c
    if len(args) > 0:
Packit Service 1d8f1c
        usage(prgname)
Packit Service 1d8f1c
        sys.exit(2)
Packit Service 1d8f1c
    input = INSTALLED_SIMPLE_XML
Packit Service 1d8f1c
    output = None
Packit Service 1d8f1c
    for opt, arg in opts:
Packit Service 1d8f1c
        if opt in ('-h', '--help'):
Packit Service 1d8f1c
            usage(prgname)
Packit Service 1d8f1c
            sys.exit()
Packit Service 1d8f1c
        elif opt in ('-i', '--input'):
Packit Service 1d8f1c
            input = arg
Packit Service 1d8f1c
        elif opt in ('-o', '--output'):
Packit Service 1d8f1c
            output = arg
Packit Service 1d8f1c
Packit Service 1d8f1c
    iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639.xml')
Packit Service 1d8f1c
    xml = ConvertEngineXML(input, iso639)
Packit Service 1d8f1c
    xml.write(output)