Blame engine/iso639converter.py

Packit 3ff832
#!/usr/bin/python
Packit 3ff832
# vim:set fileencoding=utf-8 et sts=4 sw=4:
Packit 3ff832
#
Packit 3ff832
# ibus - Intelligent Input Bus for Linux / Unix OS
Packit 3ff832
#
Packit 3ff832
# Copyright © 2016 Takao Fujiwara <takao.fujiwara1@gmail.com>
Packit 3ff832
#
Packit 3ff832
# This library is free software; you can redistribute it and/or
Packit 3ff832
# modify it under the terms of the GNU Lesser General Public
Packit 3ff832
# License as published by the Free Software Foundation; either
Packit 3ff832
# version 2.1 of the License, or (at your option) any later version.
Packit 3ff832
#
Packit 3ff832
# This library is distributed in the hope that it will be useful,
Packit 3ff832
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 3ff832
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 3ff832
# Lesser General Public License for more details.
Packit 3ff832
#
Packit 3ff832
# You should have received a copy of the GNU Lesser General Public
Packit 3ff832
# License along with this library. If not, see <http://www.gnu.org/licenses/>.
Packit 3ff832
Packit 3ff832
Packit 3ff832
# This script converts ISO 639-2 of three characters to ISO 639-1 of two
Packit 3ff832
# characters in simple.xml.
Packit 3ff832
# E.g. "eng" to "en"
Packit 3ff832
Packit 3ff832
Packit 3ff832
from xml.sax import make_parser as sax_make_parser
Packit 3ff832
from xml.sax.handler import feature_namespaces as sax_feature_namespaces
Packit 3ff832
from xml.sax.saxutils import XMLFilterBase, XMLGenerator
Packit 3ff832
from xml.sax._exceptions import SAXParseException
Packit 3ff832
Packit 3ff832
import codecs
Packit 3ff832
import getopt
Packit 3ff832
import io
Packit 3ff832
import os
Packit 3ff832
import sys
Packit 3ff832
Packit 3ff832
INSTALLED_SIMPLE_XML = '/usr/share/ibus/component/simple.xml'
Packit 3ff832
PY3K = sys.version_info >= (3, 0)
Packit 3ff832
Packit 3ff832
if PY3K:
Packit 3ff832
    from io import StringIO
Packit 3ff832
else:
Packit 3ff832
    # io.StringIO does not work with XMLGenerator
Packit 3ff832
    from cStringIO import StringIO
Packit 3ff832
    # iso_639.xml includes UTF-8
Packit 3ff832
    reload(sys)
Packit 3ff832
    sys.setdefaultencoding('utf-8')
Packit 3ff832
Packit 3ff832
Packit 3ff832
def usage(prgname):
Packit 3ff832
    print('''\
Packit 3ff832
Usage:
Packit 3ff832
  %s [OPTION...]
Packit 3ff832
Packit 3ff832
Options:
Packit 3ff832
  -h, --help                         Show this message
Packit 3ff832
  -i, --input=SIMPLE_XML             Load SIMPLE_XML file (default is:
Packit 3ff832
                                         %s)
Packit 3ff832
  -o, --output=FILE                  Output FILE (default is stdout)
Packit 3ff832
''' % (prgname, INSTALLED_SIMPLE_XML))
Packit 3ff832
Packit 3ff832
Packit 3ff832
class ISO639XML(XMLFilterBase):
Packit 3ff832
    def __init__(self, parser=None):
Packit 3ff832
        self.__code2to1 = {}
Packit 3ff832
        self.__codetoname = {}
Packit 3ff832
        XMLFilterBase.__init__(self, parser)
Packit 3ff832
    def startElement(self, name, attrs):
Packit 3ff832
        if name != 'iso_639_entry':
Packit 3ff832
            return
Packit 3ff832
        n = attrs.get('name')
Packit 3ff832
        iso639_1 = attrs.get('iso_639_1_code')
Packit 3ff832
        iso639_2b = attrs.get('iso_639_2B_code')
Packit 3ff832
        iso639_2t = attrs.get('iso_639_2T_code')
Packit 3ff832
        if iso639_1 != None:
Packit 3ff832
            self.__codetoname[iso639_1] = n
Packit 3ff832
            if iso639_2b != None:
Packit 3ff832
                self.__code2to1[iso639_2b] = iso639_1
Packit 3ff832
                self.__codetoname[iso639_2b] = n
Packit 3ff832
            if iso639_2t != None and iso639_2b != iso639_2t:
Packit 3ff832
                self.__code2to1[iso639_2t] = iso639_1
Packit 3ff832
                self.__codetoname[iso639_2t] = n
Packit 3ff832
    def code2to1(self, iso639_2):
Packit 3ff832
        try:
Packit 3ff832
            return self.__code2to1[iso639_2]
Packit 3ff832
        except KeyError:
Packit 3ff832
            return None
Packit 3ff832
Packit 3ff832
Packit 3ff832
class IBusComponentXML(XMLFilterBase):
Packit 3ff832
    def __init__(self, parser=None, downstream=None, iso639=None):
Packit 3ff832
        XMLFilterBase.__init__(self, parser)
Packit 3ff832
        self.__downstream = downstream
Packit 3ff832
        self.__iso639 = iso639
Packit 3ff832
        self.__is_language = False
Packit 3ff832
    def startDocument(self):
Packit 3ff832
        if self.__downstream:
Packit 3ff832
            self.__downstream.startDocument()
Packit 3ff832
    def endDocument(self):
Packit 3ff832
        if self.__downstream:
Packit 3ff832
            self.__downstream.endDocument()
Packit 3ff832
    def startElement(self, name, attrs):
Packit 3ff832
        if name == 'language':
Packit 3ff832
            self.__is_language = True
Packit 3ff832
        if self.__downstream:
Packit 3ff832
            self.__downstream.startElement(name, attrs)
Packit 3ff832
    def endElement(self, name):
Packit 3ff832
        if name == 'language':
Packit 3ff832
            self.__is_language = False
Packit 3ff832
        if self.__downstream:
Packit 3ff832
            self.__downstream.endElement(name)
Packit 3ff832
    def characters(self, text):
Packit 3ff832
        if self.__is_language:
Packit 3ff832
            if self.__iso639:
Packit 3ff832
                iso639_1 = self.__iso639.code2to1(text)
Packit 3ff832
                if iso639_1 != None:
Packit 3ff832
                    text = iso639_1
Packit 3ff832
        if self.__downstream:
Packit 3ff832
            self.__downstream.characters(text)
Packit 3ff832
Packit 3ff832
Packit 3ff832
class ConvertEngineXML():
Packit 3ff832
    def __init__(self, path, iso639=None):
Packit 3ff832
        self.__path = path
Packit 3ff832
        self.__iso639 = iso639
Packit 3ff832
Packit 3ff832
        self.__result = StringIO()
Packit 3ff832
        downstream = XMLGenerator(self.__result, 'utf-8')
Packit 3ff832
        self.__load(downstream)
Packit 3ff832
Packit 3ff832
    def __load(self, downstream=None):
Packit 3ff832
        parser = sax_make_parser()
Packit 3ff832
        parser.setFeature(sax_feature_namespaces, 0)
Packit 3ff832
        self.__handler = IBusComponentXML(parser, downstream, self.__iso639)
Packit 3ff832
        parser.setContentHandler(self.__handler)
Packit 3ff832
        f = codecs.open(self.__path, 'r', encoding='utf-8')
Packit 3ff832
        try:
Packit 3ff832
            parser.parse(f)
Packit 3ff832
        except SAXParseException:
Packit 3ff832
            print('Error: Invalid file format: %s' % path)
Packit 3ff832
        finally:
Packit 3ff832
            f.close()
Packit 3ff832
    def write(self, output=None):
Packit 3ff832
        if output != None:
Packit 3ff832
            od = codecs.open(output, 'w', encoding='utf-8')
Packit 3ff832
        else:
Packit 3ff832
            if PY3K:
Packit 3ff832
                od = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
Packit 3ff832
            else:
Packit 3ff832
                od = codecs.getwriter('utf-8')(sys.stdout)
Packit 3ff832
        contents = self.__result.getvalue()
Packit 3ff832
        od.write(contents)
Packit 3ff832
Packit 3ff832
Packit 3ff832
def parse_iso639(path):
Packit 3ff832
    f = codecs.open(path, 'r', encoding='utf-8')
Packit 3ff832
    parser = sax_make_parser()
Packit 3ff832
    parser.setFeature(sax_feature_namespaces, 0)
Packit 3ff832
    handler = ISO639XML(parser)
Packit 3ff832
    parser.setContentHandler(handler)
Packit 3ff832
    try:
Packit 3ff832
        parser.parse(f)
Packit 3ff832
    except SAXParseException:
Packit 3ff832
        print('Error: Invalid file format: %s' % path)
Packit 3ff832
    finally:
Packit 3ff832
        f.close()
Packit 3ff832
    return handler
Packit 3ff832
Packit 3ff832
Packit 3ff832
if __name__ == '__main__':
Packit 3ff832
    prgname = os.path.basename(sys.argv[0])
Packit 3ff832
    try:
Packit 3ff832
        opts, args = getopt.getopt(sys.argv[1:],
Packit 3ff832
                                   'hi:o:',
Packit 3ff832
                                   ['help', 'input=', 'output='])
Packit 3ff832
    except getopt.GetoptError as err:
Packit 3ff832
        print(err)
Packit 3ff832
        usage(prgname)
Packit 3ff832
        sys.exit(2)
Packit 3ff832
    if len(args) > 0:
Packit 3ff832
        usage(prgname)
Packit 3ff832
        sys.exit(2)
Packit 3ff832
    input = INSTALLED_SIMPLE_XML
Packit 3ff832
    output = None
Packit 3ff832
    for opt, arg in opts:
Packit 3ff832
        if opt in ('-h', '--help'):
Packit 3ff832
            usage(prgname)
Packit 3ff832
            sys.exit()
Packit 3ff832
        elif opt in ('-i', '--input'):
Packit 3ff832
            input = arg
Packit 3ff832
        elif opt in ('-o', '--output'):
Packit 3ff832
            output = arg
Packit 3ff832
Packit 3ff832
    iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639.xml')
Packit 3ff832
    xml = ConvertEngineXML(input, iso639)
Packit 3ff832
    xml.write(output)