|
Packit |
1f3717 |
# -*- coding: utf-8 -*-
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
# Copyright 2009 - 2015 Harri Pitkänen (hatapitk@iki.fi)
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
# This program is free software; you can redistribute it and/or modify
|
|
Packit |
1f3717 |
# it under the terms of the GNU General Public License as published by
|
|
Packit |
1f3717 |
# the Free Software Foundation; either version 2 of the License, or
|
|
Packit |
1f3717 |
# (at your option) any later version.
|
|
Packit |
1f3717 |
#
|
|
Packit |
1f3717 |
# This program is distributed in the hope that it will be useful,
|
|
Packit |
1f3717 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
1f3717 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
1f3717 |
# GNU General Public License for more details.
|
|
Packit |
1f3717 |
#
|
|
Packit |
1f3717 |
# You should have received a copy of the GNU General Public License
|
|
Packit |
1f3717 |
# along with this program; if not, write to the Free Software
|
|
Packit |
1f3717 |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
# This program converts an XML representation of autocorrect data
|
|
Packit |
1f3717 |
# into lexc format for autocorrect transducer.
|
|
Packit |
1f3717 |
#
|
|
Packit |
1f3717 |
# Usage: python triecompiler.py input.xml output.lexc
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
from __future__ import unicode_literals
|
|
Packit |
1f3717 |
import xml.dom.minidom
|
|
Packit |
1f3717 |
import sys
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
# Open the XML file
|
|
Packit |
1f3717 |
xmlFile = open(sys.argv[1], "r")
|
|
Packit |
1f3717 |
autoCorrect = xml.dom.minidom.parseString(xmlFile.read())
|
|
Packit |
1f3717 |
xmlFile.close()
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
# Open the lexc file
|
|
Packit |
1f3717 |
outputFile = open(sys.argv[2], "w")
|
|
Packit |
1f3717 |
outputFile.write("Multichar_Symbols\n@_SPACE_@\n\nLEXICON Root\n")
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
def formatForLexc(s):
|
|
Packit |
1f3717 |
return s.replace("=", "").replace(" ", "@_SPACE_@")
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
# Read entries to lexc
|
|
Packit |
1f3717 |
for replacement in autoCorrect.getElementsByTagName("replacement"):
|
|
Packit |
1f3717 |
incorrect = formatForLexc(replacement.getElementsByTagName("incorrect")[0].firstChild.wholeText)
|
|
Packit |
1f3717 |
correct = formatForLexc(replacement.getElementsByTagName("correct")[0].firstChild.wholeText)
|
|
Packit |
1f3717 |
outputFile.write((incorrect + u":" + correct + u"\t#\t;\n").encode("UTF-8"))
|
|
Packit |
1f3717 |
|
|
Packit |
1f3717 |
outputFile.close()
|