Blame vvfst/autocorrect_to_lexc.py

Packit 1f3717
# -*- coding: utf-8 -*-
Packit 1f3717
Packit 1f3717
# Copyright 2009 - 2015 Harri Pitkänen (hatapitk@iki.fi)
Packit 1f3717
Packit 1f3717
# This program is free software; you can redistribute it and/or modify
Packit 1f3717
# it under the terms of the GNU General Public License as published by
Packit 1f3717
# the Free Software Foundation; either version 2 of the License, or
Packit 1f3717
# (at your option) any later version.
Packit 1f3717
#
Packit 1f3717
# This program is distributed in the hope that it will be useful,
Packit 1f3717
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 1f3717
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 1f3717
# GNU General Public License for more details.
Packit 1f3717
#
Packit 1f3717
# You should have received a copy of the GNU General Public License
Packit 1f3717
# along with this program; if not, write to the Free Software
Packit 1f3717
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
Packit 1f3717
Packit 1f3717
# This program converts an XML representation of autocorrect data
Packit 1f3717
# into lexc format for autocorrect transducer.
Packit 1f3717
#
Packit 1f3717
# Usage: python triecompiler.py input.xml output.lexc
Packit 1f3717
Packit 1f3717
from __future__ import unicode_literals
Packit 1f3717
import xml.dom.minidom
Packit 1f3717
import sys
Packit 1f3717
Packit 1f3717
# Open the XML file
Packit 1f3717
xmlFile = open(sys.argv[1], "r")
Packit 1f3717
autoCorrect = xml.dom.minidom.parseString(xmlFile.read())
Packit 1f3717
xmlFile.close()
Packit 1f3717
Packit 1f3717
# Open the lexc file
Packit 1f3717
outputFile = open(sys.argv[2], "w")
Packit 1f3717
outputFile.write("Multichar_Symbols\n@_SPACE_@\n\nLEXICON Root\n")
Packit 1f3717
Packit 1f3717
def formatForLexc(s):
Packit 1f3717
	return s.replace("=", "").replace(" ", "@_SPACE_@")
Packit 1f3717
Packit 1f3717
# Read entries to lexc
Packit 1f3717
for replacement in autoCorrect.getElementsByTagName("replacement"):
Packit 1f3717
	incorrect = formatForLexc(replacement.getElementsByTagName("incorrect")[0].firstChild.wholeText)
Packit 1f3717
	correct = formatForLexc(replacement.getElementsByTagName("correct")[0].firstChild.wholeText)
Packit 1f3717
	outputFile.write((incorrect + u":" + correct + u"\t#\t;\n").encode("UTF-8"))
Packit 1f3717
Packit 1f3717
outputFile.close()