Blame vvfst/generate_sukija.py

Packit 1f3717
# -*- coding: utf-8 -*-
Packit 1f3717
Packit 1f3717
# Copyright 2013-2015 Hannu Väisänen (Hannu.Vaisanen@uef.fi)
Packit 1f3717
# Program to generate old spellings and common spelling mistakes for Voikko lexicon.
Packit 1f3717
Packit 1f3717
# This program is free software; you can redistribute it and/or modify
Packit 1f3717
# it under the terms of the GNU General Public License as published by
Packit 1f3717
# the Free Software Foundation; either version 2 of the License, or
Packit 1f3717
# (at your option) any later version.
Packit 1f3717
#
Packit 1f3717
# This program is distributed in the hope that it will be useful,
Packit 1f3717
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 1f3717
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 1f3717
# GNU General Public License for more details.
Packit 1f3717
#
Packit 1f3717
# You should have received a copy of the GNU General Public License
Packit 1f3717
# along with this program; if not, write to the Free Software
Packit 1f3717
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
Packit 1f3717
Packit 1f3717
Packit 1f3717
# This program generates old spellings (e.g. symbooli) and
Packit 1f3717
# common spelling errors (e.g. kirjottaa) from file "all.lexc".
Packit 1f3717
#
Packit 1f3717
# An example: from line
Packit 1f3717
# [Ln][Xp]symboli[X]symbol@P.INEN_SALLITTU.ON@:symbol@P.INEN_SALLITTU.ON@ NimisanaPaperi_a ;
Packit 1f3717
#
Packit 1f3717
# generate line
Packit 1f3717
# [Ln][Xp]symboli[X]symbool@P.INEN_SALLITTU.ON@:symbool@P.INEN_SALLITTU.ON@ NimisanaPaperi_a 
Packit 1f3717
#
Packit 1f3717
# Shoud I generate these lines also?
Packit 1f3717
# [Ln][Xp]symbooli[X]symbol@P.INEN_SALLITTU.ON@:symbol@P.INEN_SALLITTU.ON@ NimisanaPaperi_a ;
Packit 1f3717
# [Ln][Xp]symbooli[X]symbool@P.INEN_SALLITTU.ON@:symbool@P.INEN_SALLITTU.ON@ NimisanaPaperi_a 
Packit 1f3717
#
Packit 1f3717
# This automatic generation will generate some old
Packit 1f3717
# spellings and spelling errors that do not exist in real life.
Packit 1f3717
Packit 1f3717
# Compiling (in directory suomimalaga):
Packit 1f3717
# make vvfst-sukija; make vvfst-sukija-install DESTDIR=~/vvfst/voikkodict/
Packit 1f3717
Packit 1f3717
# Compiling (in directory suomimalaga/vvfst):
Packit 1f3717
# cd ..; make vvfst-sukija; make vvfst-sukija-install DESTDIR=~/vvfst/voikkodict/; cd vvfst
Packit 1f3717
Packit 1f3717
# Testing (in directory suomimalaga/vvfst):
Packit 1f3717
# foma -e "read att all-sukija.att" -e "save stack sukija.fst" -e "quit"
Packit 1f3717
# date; cat ~/Lataukset/koesanat?.txt | flookup -i sukija.fst | gawk 'length($0) > 0' >test.out; date
Packit 1f3717
# diff test.out ~/Lataukset/vv* | grep '<.*[+][?]' | less
Packit 1f3717
# diff test.out ~/Lataukset/vv* | grep '>.*[+][?]' | less
Packit 1f3717
# diff test.out ~/Lataukset/vv* | grep '>.*[+][?]' | gawk '{print $2}' |flookup -i sukija.fst | gawk 'length($0) > 0'
Packit 1f3717
# cp test.out ~/Lataukset/vvfst-sukija-testi.out
Packit 1f3717
Packit 1f3717
# Style- ja usage-lippujen arvot suoraan Joukahaisesta:
Packit 1f3717
# grep -A1 '<style>' ../vocabulary/joukahainen.xml|grep flag|sort -u|gawk '{printf "%s,", substr($1,7,length($1)-13)}'
Packit 1f3717
# grep -A1 '<usage>' ../vocabulary/joukahainen.xml|grep flag|sort -u|gawk '{printf "%s,", substr($1,7,length($1)-13)}'
Packit 1f3717
Packit 1f3717
Packit 1f3717
import codecs
Packit 1f3717
import getopt
Packit 1f3717
import re
Packit 1f3717
import string
Packit 1f3717
import sys
Packit 1f3717
from types import *
Packit 1f3717
sys.path.append("common")
Packit 1f3717
import generate_lex_common
Packit 1f3717
Packit 1f3717
OPTIONS = generate_lex_common.get_options()
Packit 1f3717
Packit 1f3717
infile = codecs.open (OPTIONS["destdir"] + u"/all.lexc", "r", "UTF-8")
Packit 1f3717
outfile = codecs.open (OPTIONS["destdir"] + u"/all-sukija.lexc", 'w', 'UTF-8')
Packit 1f3717
sukijafile = codecs.open (OPTIONS["destdir"] + u"/poikkeavat-sukija.lexc", 'r', 'UTF-8')
Packit 1f3717
Packit 1f3717
C = u"[qwrtpsšdfghjklzžxcvbnm]"  # Consonants.
Packit 1f3717
V = u"[aeiouüyåäö]"              # Vovels.
Packit 1f3717
A = u"[aä]"
Packit 1f3717
U = u"[uy]"
Packit 1f3717
Packit 1f3717
def makeRePattern (wordClass, word):
Packit 1f3717
    u = u"^\\[%s\\]\\[Xp\\].*%s\\[X\\]" % (wordClass, word)
Packit 1f3717
    u = u.replace ('C', C)
Packit 1f3717
    u = u.replace ('V', V)
Packit 1f3717
    u = u.replace ('A', A)
Packit 1f3717
    u = u.replace ('U', U)
Packit 1f3717
    return u
Packit 1f3717
Packit 1f3717
Packit 1f3717
def makeRe (wordClass, word):
Packit 1f3717
    return re.compile (makeRePattern (wordClass, word), re.UNICODE)
Packit 1f3717
Packit 1f3717
Packit 1f3717
def replace (s, old, new):
Packit 1f3717
    u = s.replace (old + u":",  new + u":")
Packit 1f3717
    u = u.replace (old + u" ",  new + u" ")
Packit 1f3717
    u = u.replace (old + u"\t", new + u"\t")
Packit 1f3717
    u = u.replace (old + u"@",  new + u"@")
Packit 1f3717
    return u
Packit 1f3717
Packit 1f3717
Packit 1f3717
def replace_and_write (line, string1, string2):
Packit 1f3717
    s = replace (line, string1, string2)
Packit 1f3717
    outfile.write (s)
Packit 1f3717
Packit 1f3717
Packit 1f3717
re_adi = makeRe (u"Ln", u".Cadi")
Packit 1f3717
re_odi = makeRe (u"Ln", u".Codi")
Packit 1f3717
re_ofi = makeRe (u"Ln", u".Cofi")
Packit 1f3717
re_ogi = makeRe (u"Ln", u".Cogi")
Packit 1f3717
re_oli = makeRe (u"Ln", u".Coli")
Packit 1f3717
re_omi = makeRe (u"Ln", u".Comi")
Packit 1f3717
re_oni = makeRe (u"Ln", u".Coni")
Packit 1f3717
re_ori = makeRe (u"Ln", u".Cori")
Packit 1f3717
Packit 1f3717
re_OOri = makeRe (u"Ln", u".Cööri")
Packit 1f3717
re_UUri = makeRe (u"Ln", u"..UUri")
Packit 1f3717
Packit 1f3717
re_adinen = makeRe (u"Ln", u".Cadinen")
Packit 1f3717
re_odinen = makeRe (u"Ln", u".Codinen")
Packit 1f3717
re_ofinen = makeRe (u"Ln", u".Cofinen")
Packit 1f3717
re_oginen = makeRe (u"Ln", u".Coginen")
Packit 1f3717
re_olinen = makeRe (u"Ln", u".Colinen")
Packit 1f3717
re_ominen = makeRe (u"Ln", u".Cominen")
Packit 1f3717
re_oninen = makeRe (u"Ln", u".Coninen")
Packit 1f3717
re_orinen = makeRe (u"Ll", u".Corinen")
Packit 1f3717
Packit 1f3717
re_grafia   = makeRe (u"Ln", u"grafia")
Packit 1f3717
re_grafinen = makeRe (u"Ll", u"grafinen")
Packit 1f3717
re_logia    = makeRe (u"Ln", u"logia")
Packit 1f3717
re_loginen  = makeRe (u"Ll", u"loginen")
Packit 1f3717
Packit 1f3717
re_torio = makeRe (u"Ln", u"torio")
Packit 1f3717
Packit 1f3717
re_oittaa1 = makeRe (u"Lt", u".Coittaa")
Packit 1f3717
re_oittaa2 = makeRe (u"Lt", u".Cöittää")
Packit 1f3717
Packit 1f3717
re_ottaa1 = makeRe (u"Lt", u".Cottaa")
Packit 1f3717
re_ottaa2 = makeRe (u"Lt", u".Cöttää")
Packit 1f3717
Packit 1f3717
re_oitella1 = makeRe (u"Lt", u".Coitella")
Packit 1f3717
re_oitella2 = makeRe (u"Lt", u".Cöitellä")
Packit 1f3717
Packit 1f3717
re_otella1 = makeRe (u"Lt", u".Cotella")
Packit 1f3717
re_otella2 = makeRe (u"Lt", u".Cötellä")
Packit 1f3717
Packit 1f3717
re_isoida = makeRe (u"Lt", u"isoida") # Organisoida => organiseerata.
Packit 1f3717
Packit 1f3717
re_oitin = makeRe (u"Ln", u".Coitin")
Packit 1f3717
re_aatio = makeRe (u"Ln", u".Caatio")
Packit 1f3717
re_uutio = makeRe (u"Ln", u".Cuutio")
Packit 1f3717
re_uusio = makeRe (u"Ln", u".Cuusio")
Packit 1f3717
re_tio   = makeRe (u"Ln", u"([^a]i|k)tio") # Traditio, funktio, mutta ei aitio.
Packit 1f3717
Packit 1f3717
re_toninen = makeRe (u"Ll", u".toninen")
Packit 1f3717
re_iivinen = makeRe (u"Ll", u"Ciivinen")
Packit 1f3717
re_aalinen = makeRe (u"Ll", u"aalinen")
Packit 1f3717
re_geeninen = makeRe (u"Ll", u"geeninen")
Packit 1f3717
re_oittainen = makeRe (u"Ll", u"oittainen")
Packit 1f3717
Packit 1f3717
re_nuolaista = re.compile (u"\\[Lt\\].* Nuolaista_", re.UNICODE)
Packit 1f3717
re_rangaista = re.compile (u"\\[Lt\\].* Rangaista_", re.UNICODE)
Packit 1f3717
Packit 1f3717
re_Xiljoona = re.compile (u"\\A(?:\\[Bc\\]|\\[Sn\\]|@).*(b|m|tr)iljoon", re.UNICODE)
Packit 1f3717
Packit 1f3717
re_eikAs = makeRe (u"Ll", u"eikAs")
Packit 1f3717
re_8_9 = re.compile (u"\\[Xp\\](ka|y)hdeks")
Packit 1f3717
re_tautua1 = makeRe (u"Lt", u"tautua")
Packit 1f3717
re_tautua2 = makeRe (u"Lt", u"täytyä")
Packit 1f3717
Packit 1f3717
Packit 1f3717
# Words to be excluded.
Packit 1f3717
#
Packit 1f3717
re_adi_x = re.compile (u"\\A\[Ln\]\[Xp\](faradi|pikofaradi|stadi)\[X\]")
Packit 1f3717
re_ogi_x = re.compile (u"\\A\[Ln\]\[Xp\](blogi|grogi|judogi)\[X\]")
Packit 1f3717
re_omi_x = re.compile (u"\\A\[Ln\]\[Xp\](binomi|bromi|dibromi|genomi|kromi|trinomi)\[X\]")
Packit 1f3717
re_oni_x = re.compile (u"\\A\[Ln\]\[Xp\](ikoni)\[X\]")
Packit 1f3717
re_ori_x = re.compile (u"\\A\[Ln\]\[Xp\](hevosori|jalostusori|reettori|siitosori)\[X\]")
Packit 1f3717
Packit 1f3717
re_logia_x = re.compile (u"\\A\[Ln\]\[Xp\](genealogia|trilogia)\[X\]")
Packit 1f3717
Packit 1f3717
re_isoida_x = re.compile (u"\\A\[Lt\]\[Xp\](dramatisoida|karakterisoida)\[X\]")
Packit 1f3717
Packit 1f3717
re_A = re.compile (u"[aou]")
Packit 1f3717
Packit 1f3717
Packit 1f3717
spelling_pattern_list = [
Packit 1f3717
  (re_adi, u"ad", u"aad", re_adi_x),  # Serenadi  => senenaadi.
Packit 1f3717
  (re_odi, u"od", u"ood"),            # Aplodi    => aploodi.
Packit 1f3717
  (re_ofi, u"of", u"oof"),            # Filosofi  => filosoofi.
Packit 1f3717
  (re_ogi, u"og", u"oog", re_ogi_x),  # Arkeologi => arkeoloogi.
Packit 1f3717
  (re_oli, u"ol", u"ool"),            # Symboli   => symbooli.
Packit 1f3717
  (re_omi, u"om", u"oom", re_omi_x),  # Atomi     => atoomi.
Packit 1f3717
  (re_oni, u"on", u"oon", re_oni_x),  # Telefoni  => telefooni.
Packit 1f3717
  (re_ori, u"or", u"oor", re_ori_x),  # Pehtori   => pehtoori.
Packit 1f3717
Packit 1f3717
  (re_OOri,     u"öör",   u"ör"),     # Amatööri => amatöri.
Packit 1f3717
  (re_UUri,     u"uur",   u"ur"),
Packit 1f3717
Packit 1f3717
  (re_adinen,   u"adi",    u"aadi"),
Packit 1f3717
  (re_odinen,   u"odi",    u"oodi"),
Packit 1f3717
  (re_ofinen,   u"ofi",    u"oofi"),
Packit 1f3717
  (re_oginen,   u"ogi",    u"oogi"),
Packit 1f3717
  (re_olinen,   u"oli",    u"ooli"),
Packit 1f3717
  (re_ominen,   u"omi",    u"oomi"),
Packit 1f3717
  (re_oninen,   u"oni",    u"ooni"),
Packit 1f3717
  (re_orinen,   u"ori",    u"oori"),
Packit 1f3717
Packit 1f3717
  (re_grafia,   u"grafi",  u"graafi"),
Packit 1f3717
  (re_grafinen, u"grafi",  u"graafi"),
Packit 1f3717
  (re_logia,    u"logi",   u"loogi", re_logia_x),
Packit 1f3717
  (re_loginen,  u"logi",   u"loogi"),
Packit 1f3717
Packit 1f3717
  (re_torio, u"torio", u"toorio"),
Packit 1f3717
Packit 1f3717
  (re_oitin, u"oit", u"ot"),  # Kirjoitin => kirjotin (esim. kirjo(i)ttimen).
Packit 1f3717
Packit 1f3717
  (re_oittaa1, u"o",   u"ot",  u"Kirjoittaa", u"Alittaa"),
Packit 1f3717
  (re_oittaa2, u"ö",   u"öt",  u"Kirjoittaa", u"Alittaa"),
Packit 1f3717
  (re_oittaa1, u"oit", u"ot",  u"Alittaa",    u"Alittaa"),
Packit 1f3717
  (re_oittaa2, u"öit", u"öt",  u"Alittaa",    u"Alittaa"),
Packit 1f3717
  (re_ottaa1,  u"ot",  u"oit", u"Alittaa",    u"Alittaa"),
Packit 1f3717
  (re_ottaa2,  u"öt",  u"öit", u"Alittaa",    u"Alittaa"),
Packit 1f3717
  (re_ottaa1,  u"o",   u"oi",  u"Ammottaa",   u"Ammottaa"),
Packit 1f3717
  (re_ottaa2,  u"ö",   u"öi",  u"Ammottaa",   u"Ammottaa"),
Packit 1f3717
Packit 1f3717
  (re_oitella1, u"oit", u"ot",  u"Aatella", u"Aatella"),
Packit 1f3717
  (re_oitella2, u"öit", u"öt",  u"Aatella", u"Aatella"),
Packit 1f3717
  (re_otella1,  u"ot",  u"oit", u"Aatella", u"Aatella"),
Packit 1f3717
  (re_otella2,  u"öt",  u"öit", u"Aatella", u"Aatella"),
Packit 1f3717
Packit 1f3717
  (re_tautua1, u"tau", u"tau", u"Kaatua", u"SukijaAntautua"),
Packit 1f3717
  (re_tautua2, u"täy", u"täy", u"Kaatua", u"SukijaAntautua"),
Packit 1f3717
Packit 1f3717
  (re_isoida, u"isoida", u"iseerata", u"iso", u"iseer", u"Kanavoida", u"Saneerata", u"Voida", u"Saneerata", re_isoida_x),
Packit 1f3717
Packit 1f3717
  (re_toninen, u"toni", u"tooni"),
Packit 1f3717
  (re_iivinen, u"iivi", u"ivi"),
Packit 1f3717
  (re_aalinen, u"aali", u"ali"),
Packit 1f3717
  (re_geeninen, u"geeni", u"geni"),
Packit 1f3717
  (re_oittainen, u"oittai", u"ottai"),
Packit 1f3717
Packit 1f3717
  (re_nuolaista, u"Nuolaista_"),
Packit 1f3717
  (re_rangaista, u"Rangaista_"),
Packit 1f3717
Packit 1f3717
  (re_eikAs, u"eik", u"ehik"),
Packit 1f3717
]
Packit 1f3717
Packit 1f3717
Packit 1f3717
def word_class (line):
Packit 1f3717
    L = dict ([(u"[Ll]",  u"Laatusana"),
Packit 1f3717
               (u"[Ln]",  u"Nimisana"),
Packit 1f3717
               (u"[Lnl]", u"NimiLaatusana")])
Packit 1f3717
    return L[line[0:line.find("]")+1]]
Packit 1f3717
Packit 1f3717
Packit 1f3717
# Sanoja, joilla on vain muutama vanha taivutusmuoto. Generoidaan ne erikseen,
Packit 1f3717
# mutta vain sanoille, jotka ovat Joukahaisessa. Sanat ovat Nykysuomen
Packit 1f3717
# sanakirjan taivutuskaavojen numeroiden mukaisessa järjestyksessä.
Packit 1f3717
#
Packit 1f3717
# Tuomo Tuomi: Suomen kielen käänteissanakirja, 2. painos.
Packit 1f3717
# Suomalaisen Kirjallisuuden Seura 1980.
Packit 1f3717
Packit 1f3717
def write_word (line, word, lexicon):
Packit 1f3717
    prefix = line[0:line.find (u" ")]
Packit 1f3717
    A = u"a" if re_A.search(word) else u"ä"
Packit 1f3717
    outfile.write (u"%s %s%s_%s ;\n" % (prefix, word_class(line), lexicon, A))
Packit 1f3717
Packit 1f3717
def write_ahven (line, word):
Packit 1f3717
    if not line.startswith (u"[Lu]"):
Packit 1f3717
        write_word (line, word, u"SukijaAhven")
Packit 1f3717
Packit 1f3717
def write_kaunis (line, word):
Packit 1f3717
    write_word (line, word, u"SukijaKaunis")
Packit 1f3717
Packit 1f3717
def write_altis (line, word):
Packit 1f3717
    write_word (line, word, u"SukijaAltis")
Packit 1f3717
Packit 1f3717
def write_virkkaa (line, word):
Packit 1f3717
    prefix = line[0:line.find (u" ")]
Packit 1f3717
    outfile.write (u"%s SukijaVirkkaa_ä ;\n" % (prefix))
Packit 1f3717
Packit 1f3717
def write_paistaa (line, word):
Packit 1f3717
    prefix = line[0:line.find (u" ")]
Packit 1f3717
    outfile.write (u"%s SukijaPaistaa_a ;\n" % (prefix))
Packit 1f3717
Packit 1f3717
def write_paahtaa (line, word):
Packit 1f3717
    prefix = line[0:line.find (u" ")]
Packit 1f3717
    outfile.write (u"%s SukijaPaahtaa_a ;\n" % (prefix))
Packit 1f3717
Packit 1f3717
def write_lahti (line, word):
Packit 1f3717
    write_word (line, word, u"SukijaLahti")
Packit 1f3717
Packit 1f3717
def write_8_9 (line):
Packit 1f3717
    n = line.index (u"[X]")+3
Packit 1f3717
    s = line[n:]
Packit 1f3717
    s = s.replace (u"kahdeks", u"kaheks")
Packit 1f3717
    s = s.replace (u"yhdeks", u"yheks")
Packit 1f3717
    outfile.write (u"%s%s" % (line[0:n], s))
Packit 1f3717
Packit 1f3717
def generate_from_pattern_1 (line, pattern_list):
Packit 1f3717
    for x in pattern_list:
Packit 1f3717
        if x[0].match(line):
Packit 1f3717
            if (len(x) == 2):
Packit 1f3717
                outfile.write (line.replace (x[1], u"Sukija" + x[1]))
Packit 1f3717
            elif (len(x) == 3) or (len(x) == 4 and not x[3].match(line)):
Packit 1f3717
                replace_and_write (line, x[1], x[2])
Packit 1f3717
            elif (len(x) == 5) and (line.find (x[3]) >= 0):
Packit 1f3717
                replace_and_write (line.replace(x[3],x[4]), x[1], x[2])
Packit 1f3717
            elif (len(x) == 10 and not x[9].match(line)):
Packit 1f3717
                s = line.replace(x[1],x[2]).replace(x[5],x[6]).replace(x[7],x[8])
Packit 1f3717
                replace_and_write (s, x[3], x[4])
Packit 1f3717
Packit 1f3717
Packit 1f3717
def generate_from_pattern_2 (line, pattern, string, p1, p2, s1, s2):
Packit 1f3717
    if pattern.match (line):
Packit 1f3717
        for x in p1:
Packit 1f3717
            replace_and_write (line, string, x)
Packit 1f3717
        for x in p2:
Packit 1f3717
            replace_and_write (line.replace(s1,s2), string, x)
Packit 1f3717
Packit 1f3717
Packit 1f3717
# Vanhoja kirjoitusasuja ja yleisikä kirjoitusvirheitä,
Packit 1f3717
# jotka eivät noudata mitään kaavaa.
Packit 1f3717
#
Packit 1f3717
#    (u"", (u"", u"")),
Packit 1f3717
#
Packit 1f3717
word_list = [
Packit 1f3717
    (u"aarteisto",        (u"aarteisto",   u"aartehisto")),
Packit 1f3717
    (u"Abessinia",        (u"abessini",    u"abessiini", u"abyssini", "abyssiini")),
Packit 1f3717
    (u"Afganistan",       (u"afganistan",  u"afghanistan")),
Packit 1f3717
    (u"agaave",           (u"agaave",      u"agave")),
Packit 1f3717
    (u"aggregaatio",      (u"aggregaatio", u"agregaatio")),
Packit 1f3717
    (u"ag=gregaatti",     (u"ag[Bm]gregaat:aggregaat",     u"ag[Bm]regaat:agregaat")),
Packit 1f3717
    (u"ag=gressiivinen",  (u"ag[Bm]gressiivi:aggressiivi", u"ag[Bm]ressiivi:agressiivi",
Packit 1f3717
                                                           u"ag[Bm]ressivi:agressivi",
Packit 1f3717
                                                           u"ag[Bm]gressivi:aggressivi",
Packit 1f3717
                                                           u"ak[Bm]ressiivi:akressiivi")),
Packit 1f3717
    (u"ag=gressio",       (u"ag[Bm]gressio:aggressio", u"ag[Bm]ressio:agressio")),
Packit 1f3717
    (u"ainainen",         (u"ainai",         u"ainahi")),
Packit 1f3717
    (u"aineisto",         (u"aineisto",      u"ainehisto")),
Packit 1f3717
    (u"aivoitus",         (u"aivoitu",       u"aivotu")),
Packit 1f3717
    (u"akateemikko",      (u"akateemik",     u"akatemik")),
Packit 1f3717
    (u"akateeminen",      (u"akateemi",      u"akatemi")),
Packit 1f3717
    (u"aksiomi",          (u"aksiom",        u"aksioom")),
Packit 1f3717
    (u"aksiooma",         (u"aksioom",       u"aksiom")),
Packit 1f3717
    (u"alamainen",        (u"alamai",        u"alammai")),
Packit 1f3717
    (u"alimmainen",       (u"alimmai",       u"alimai")),
Packit 1f3717
    (u"alkali",           (u"alkal",         u"alkaal")),
Packit 1f3717
    (u"alkovi",           (u"alkov",         u"alkoov")),
Packit 1f3717
    (u"ameba",            (u"ameb",          u"ameeb")),
Packit 1f3717
    (u"amfi=teatteri",    (u"amfi[Bm]teatter:amfiteatter", u"amfii[Bm]teaatter:amfiiteaatter",
Packit 1f3717
                                                           u"amfi[Bm]teaatter:amfiteaatter")),
Packit 1f3717
    (u"apassi",           (u"apass",         u"apash")),
Packit 1f3717
    (u"apteekkari",       (u"apteekkar",     u"apteekar", u"aptekar")),
Packit 1f3717
    (u"arsenikki",        (u"arsenik",       u"arseniik")),
Packit 1f3717
    (u"assistentti",      (u"assistent",     u"asistent")),
Packit 1f3717
    (u"at=tribuutti",     (u"at[Bm]tribuut:attribuut", u"at[Bm]ribuut:atribuut")),
Packit 1f3717
    (u"Australia",        (u"Australi",      u"Austraali")),
Packit 1f3717
    (u"barbaari",         (u"barbaar",       u"barbar")),
Packit 1f3717
    (u"beduiini",         (u"beduiin",       u"beduin")),
Packit 1f3717
    (u"biljardi",         (u"biljard",       u"biljaard")),
Packit 1f3717
    (u"borssi",           (u"borss",         u"borsh")),
Packit 1f3717
    (u"dervissi",         (u"derviss",       u"dervish", u"dervisch")),
Packit 1f3717
    (u"diadeemi",         (u"diadeem",       u"diadem")),
Packit 1f3717
    (u"disko",            (u"disko",         u"disco")),
Packit 1f3717
    (u"divaani",          (u"divaan",        u"divan")),
Packit 1f3717
    (u"drakma",           (u"drakm",         u"drakhm")),
Packit 1f3717
    (u"eellimmäinen",     (u"eelimmäi",      u"eelimäi")),
Packit 1f3717
    (u"edes",             (u"edes",          u"ees")),
Packit 1f3717
    (u"eilinen",          (u"eili",          u"eilli")),
Packit 1f3717
    (u"ekumeeninen",      (u"ekumeeni",      u"ekumeni")),
Packit 1f3717
    (u"emali",            (u"emal",          u"emalj")),
Packit 1f3717
    (u"emaloida",         (u"emalo",         u"emaljo")),
Packit 1f3717
    (u"embleemi",         (u"embleem",       u"emblem")),
Packit 1f3717
    (u"emiiri",           (u"emiir",         u"emir")),
Packit 1f3717
    (u"ensimmäinen",      (u"ensimmäi",      u"ensimäi", u"ensinmäi")),
Packit 1f3717
    (u"erilainen",        (u"erilai",        u"erillai", u"erinlai")),
Packit 1f3717
    (u"erillinen",        (u"erilli",        u"erili")),
Packit 1f3717
    (u"eris=kummallinen", (u"kummalli",      u"kummali", u"kumalli")),
Packit 1f3717
    (u"erinäinen",        (u"erinäi",        u"erinnäi")),
Packit 1f3717
    (u"eteinen",          (u"etei",          u"etehi")),
Packit 1f3717
    (u"evankelinen",      (u"evankeli",      u"evankeeli")),
Packit 1f3717
    (u"fakiiri",          (u"fakiir",        u"fakir")),
Packit 1f3717
    (u"fär=saarelainen",  (u"fär[Bm]saarelai:färsaarelai", u"fäär[Bm]saarelai:fäärsaarelai")),
Packit 1f3717
    (u"galvaaninen",      (u"galvaani",      u"galvani")),
Packit 1f3717
    (u"gentlemanni",      (u"getlemann",     u"gentleman")),
Packit 1f3717
    (u"germaani",         (u"germaan",       u"german")),
Packit 1f3717
    (u"gregoriaaninen",   (u"gregoriaani",   u"gregoriani")),
Packit 1f3717
    (u"harlekiini",       (u"harlekiin",     u"harlekin")),
Packit 1f3717
    (u"harppuuna",        (u"harppuun",      u"harpuun")),
Packit 1f3717
    (u"haupitsi",         (u"haupits",       u"haubits")),
Packit 1f3717
    (u"hevonen",          (u"hevo",          u"hevoi")),
Packit 1f3717
    (u"humaaninen",       (u"humaani",       u"humani")),
Packit 1f3717
    (u"husaari",          (u"husaar",        u"husar")),
Packit 1f3717
    (u"huumori",          (u"huumor",        u"humor")),
Packit 1f3717
    (u"hygieeninen",      (u"hygieeni",      u"hygieni")),
Packit 1f3717
    (u"ideaali",          (u"ideaal",        u"ideal")),
Packit 1f3717
    (u"ilotella",         (u"ilot",          u"iloit")),
Packit 1f3717
    (u"inhimillinen",     (u"inhimilli",     u"inhimmilli")),
Packit 1f3717
    (u"inkvisiittori",    (u"inkvisiittor",  u"inkvisitor", u"inkvisiitor")),
Packit 1f3717
    (u"inkvisitio",       (u"inkvisitio",    u"inkvisiitio")),
Packit 1f3717
    (u"insinööri",        (u"insinöör",      u"insinör")),
Packit 1f3717
    (u"invalidi",         (u"invalid",       u"invaliid")),
Packit 1f3717
    (u"Italia",           (u"Ital",          u"Itaal")),
Packit 1f3717
    (u"janitsaari",       (u"janitsaar",     u"janitsar", u"janitschar", u"janitschaar", u"janitshaar")),
Packit 1f3717
    (u"juliaaninen",      (u"juliaani",      u"juliani")),
Packit 1f3717
    (u"juridinen",        (u"juridi",        u"juriidi")),
Packit 1f3717
    (u"kaanon",           (u"kaanon",        u"kanon")),
Packit 1f3717
    (u"kaleeri",        (u"kaleer",      u"kaler")),
Packit 1f3717
    (u"kamari",         (u"kamar",       u"kammar")),
Packit 1f3717
    (u"kameli",         (u"kamel",       u"kameel")),
Packit 1f3717
    (u"kamiina",        (u"kamiin",      u"kamin")),
Packit 1f3717
    (u"kampanja",       (u"kampanj",     u"kamppanj")),
Packit 1f3717
    (u"kaneli",         (u"kanel",       u"kaneel")),
Packit 1f3717
    (u"kanuuna",        (u"kanuun",      u"kanun", u"kanoon", u"kanon")),
Packit 1f3717
    (u"kaoottinen",     (u"kaootti",     u"kaaootti", u"kaaotti", u"kaotti")),
Packit 1f3717
    (u"kapteeni",       (u"kapteen",     u"kaptein")),
Packit 1f3717
    (u"kategoria",      (u"kategor",     u"kategoor")),
Packit 1f3717
    (u"kauan",          ((u"kauaa",      u"kauvaa"),
Packit 1f3717
                         (u"kauan",      u"kauvan"),
Packit 1f3717
                         (u"kauemmin",   u"kauvemmin"),
Packit 1f3717
                         (u"kauimmin",   u"kauvimmin"))),
Packit 1f3717
    (u"kauempi",        (u"kaue",        u"kauve")),
Packit 1f3717
    (u"Kaukasia",       (u"Kaukasi",     u"Kaukaasi")),
Packit 1f3717
    (u"kaunoinen",      (u"kaunoi",      u"kaunohi")),
Packit 1f3717
    (u"katolilainen",   (u"katolilai",   u"katoolilai")),
Packit 1f3717
    (u"katolinen",      (u"katoli",      u"katooli")),
Packit 1f3717
    (u"kavaljeeri",     (u"kavaljeer",   u"kavaljer", u"kavalier", u"kavaljier")),
Packit 1f3717
    (u"kenraali",       (u"kenraal",     u"kenral")),
Packit 1f3717
    (u"kerubi",         (u"kerub",       u"keruub", u"kheruub")),
Packit 1f3717
    (u"keskimmäinen",   (u"keskimmäi",   u"keskimäi")),
Packit 1f3717
    (u"kiiras=torstai", (u"torsta",      u"tuorsta")),
Packit 1f3717
    (u"kirjoitelma",    (u"kirjoitelm",  u"kirjotelm")),
Packit 1f3717
    (u"klinikka",       (u"klinik",      u"kliniik")),
Packit 1f3717
    (u"kollega",        (u"kolleg",      u"kolleeg")),
Packit 1f3717
    (u"kollegio",       (u"kollegio",    u"kolleegio")),
Packit 1f3717
    (u"komisario",      (u"komisario",   u"komisaario", u"komissaario")),
Packit 1f3717
    (u"komissaari",     (u"komissaar",   u"komissar")),
Packit 1f3717
    (u"koneisto",       (u"koneisto",    u"konehisto")),
Packit 1f3717
    (u"konossementti",  (u"konossement", u"konnossement")),
Packit 1f3717
    (u"konttori",       (u"konttor",     u"kontoor")),
Packit 1f3717
    (u"korsteeni",      (u"korsteen",    u"korstein")),
Packit 1f3717
    (u"kortteeri",      (u"kortteer",    u"kortter", u"kortier", u"korttier")),
Packit 1f3717
    (u"kraana",         (u"kraan",       u"kran")),
Packit 1f3717
    (u"kraatteri",      (u"kraatter",    u"kraater", u"krateer")),
Packit 1f3717
    (u"kranaatti",      ((u"kranaat",    u"kranat"),
Packit 1f3717
                         (u"kranaat",    u"kranat", u"NimisanaSalaatti_a", u"NimisanaRisti_a"))),
Packit 1f3717
    (u"krenatööri",     (u"krenatöör",   u"krenatör", u"kranatöör", u"kranatör", u"kranatier", u"krenatyör")),
Packit 1f3717
    (u"kritiikki",      ((u"kritiik",    u"kritik", u"NimisanaKajakki_ä", u"NimisanaRisti_ä"),
Packit 1f3717
                         (u"kritiik",    u"kriitiik"))),
Packit 1f3717
    (u"kriitikko",      (u"kriitik",     u"kriitiik", u"kritiik")),
Packit 1f3717
    (u"kulttuuri",      (u"kulttuur",    u"kultuur", "kulttur")),
Packit 1f3717
    (u"kuriiri",        (u"kuriir",      u"kurier")),
Packit 1f3717
    (u"kurtiini",       (u"kurtiin",     u"kurtin")),
Packit 1f3717
    (u"kuvernööri",     (u"kuvernöör",   u"kuvernör")),
Packit 1f3717
    (u"Kööpen=hamina",  (u"kööpen[Bm]hamin:kööpenhamin", u"köpen[Bm]hamin:köpenhamin")),
Packit 1f3717
    (u"laitimmainen",   (u"laitimmai",   u"laitimai")),
Packit 1f3717
    (u"langobardi",     (u"langobard",   u"longobard")),
Packit 1f3717
    (u"lauantai",       (u"lauanta",     u"lauvanta")),
Packit 1f3717
    (u"lauantaisin",    (u"lauantaisin", u"lauvantaisin")),
Packit 1f3717
    (u"leegio",         (u"leegio",      u"legio")),
Packit 1f3717
    (u"legioona",       (u"legioon",     u"legion")),
Packit 1f3717
    (u"legioonalainen", (u"legioonalai", u"legionalai")),
Packit 1f3717
    (u"lestadiolainen", (u"lestadiolai", u"lestaadiolai", u"laestadiolai")),
Packit 1f3717
    (u"liipaisin",      (u"liipaisi",    u"liipasi")),
Packit 1f3717
    (u"likimmäinen",    (u"likimmäi",    u"likimäi")),
Packit 1f3717
    (u"lineaarinen",    (u"lineaari",    u"lineari")),
Packit 1f3717
    (u"lordi",          (u"lord",        u"loord")),
Packit 1f3717
    (u"luterilainen",   (u"luterilai",   u"lutherilai", u"luteerilai", u"lutheerilai")),
Packit 1f3717
    (u"lähimmäinen",    (u"lähimmäi",    u"lähimäi")),
Packit 1f3717
    (u"maineikas",      (u"maineik",     u"maineek", u"mainek")),
Packit 1f3717
    (u"maisteri",       (u"maister",     u"majister")),
Packit 1f3717
    (u"majoneesi",      (u"majonees",    u"majonnees")),
Packit 1f3717
    (u"majuri",         (u"majur",       u"majuur")),
Packit 1f3717
    (u"mamselli",       (u"mamsell",     u"mamsel")),
Packit 1f3717
    (u"mansetti",       (u"manset",      u"manshet")),
Packit 1f3717
    (u"marsilainen",    (u"marsilai",    u"marssilai")),
Packit 1f3717
    (u"matrikkeli",     (u"matrikkel",   u"matrikel")),
Packit 1f3717
    (u"mekaaninen",     (u"mekaani",     u"mekani")),
Packit 1f3717
    (u"melankolinen",   (u"melankoli",   u"melankooli")),
Packit 1f3717
    (u"mieluinen",      (u"mielui",      u"mieluhi")),
Packit 1f3717
    (u"minareetti",     (u"minareet",    u"minaret")),
Packit 1f3717
    (u"modeemi",        (u"modeem",      u"modem")),
Packit 1f3717
    (u"moduuli",        (u"moduul",      u"modul")),
Packit 1f3717
    (u"molemmin=puolinen", (u"molemmin[Bm]puoli:molemminpuoli", u"molemmin[Bm]puolei:molemminpuolei")),
Packit 1f3717
    (u"moraali",           (u"moraal",      u"moral")),
Packit 1f3717
    (u"mosaiikki",      ((u"mosaiik",    u"mosaik"),
Packit 1f3717
                         (u"mosaiik",    u"mosaik", u"NimisanaTakki_a", u"NimisanaRisti_a"))),
Packit 1f3717
    (u"muhamettilainen", (u"muhamettilai", u"muhammettilai", u"mahomettilai", u"muhammedilai", u"muhamedilai",
Packit 1f3717
                          u"muhametilai",  u"muhameedilai",  u"mohametilai",  u"mohammedilai")),
Packit 1f3717
    (u"musiikki",        ((u"musiik",      u"musik"),
Packit 1f3717
                          (u"musiik",      u"musik", u"NimisanaKajakki_a", u"NimisanaPaperi_a"))),
Packit 1f3717
    (u"naiivi",          (u"naiiv",        u"naiv")),
Packit 1f3717
    (u"naiivinen",       (u"naiivi",       u"naivi")),
Packit 1f3717
    (u"non=ag=gressio",  (u"non[Bm]ag[Bm]gressio:nonaggressio", u"non[Bm]ag[Bm]ressio:nonagressio")),
Packit 1f3717
    (u"Nubia",           (u"nubi",        u"nuubi")),
Packit 1f3717
    (u"odottaa",         (u"odot",        u"oot")),
Packit 1f3717
    (u"ottomaani",       (u"ottomaan",    u"ottoman")),
Packit 1f3717
    (u"paitsi",          (u"paitsi",      u"paitse")),
Packit 1f3717
    (u"palkollinen",     (u"palkolli",    u"palkoilli")),
Packit 1f3717
    (u"paneeli",         (u"paneel",      u"panel")),
Packit 1f3717
    (u"pankkiiri",       (u"pankkiir",    u"pankkir")),
Packit 1f3717
    (u"pantomiimi",      (u"pantomiim",   u"pantomim")),
Packit 1f3717
    (u"paraati",         (u"paraat",      u"parat")),
Packit 1f3717
    (u"paratiisi",       (u"paratiis",    u"paratis", u"paradiis", u"paradis")),
Packit 1f3717
    (u"parhaisto",       (u"parhaisto",   u"parahisto")),
Packit 1f3717
    (u"paronitar",       (u"paronit",     u"paroonit")),
Packit 1f3717
    (u"pasuuna",         (u"pasuun",      u"pasun")),
Packit 1f3717
    (u"pataljoona",      (u"pataljoon",   u"pataljon")),
Packit 1f3717
    (u"patriisi",        (u"patriis",     u"patris")),
Packit 1f3717
    (u"patruuna",        (u"patruun",     u"patrun", u"patroon", u"patron")),
Packit 1f3717
    (u"perimmäinen",     (u"perimmäi",    u"perimäi")),
Packit 1f3717
    (u"persoona",        (u"persoon",     u"person")),
Packit 1f3717
    (u"piispa",          (u"piisp",       u"pisp")),
Packit 1f3717
    (u"pioneeri",        (u"pioneer",     u"pioner")),
Packit 1f3717
    (u"pioni",           (u"pion",        u"pioon")),
Packit 1f3717
    (u"pitaali",         (u"pitaal",      u"pital")),
Packit 1f3717
    (u"plataani",        (u"plataan",     u"platan")),
Packit 1f3717
    (u"plebeiji",        (u"plebeij",     u"plebej")),
Packit 1f3717
    (u"plutoona",        (u"plutoon",     u"pluton")),
Packit 1f3717
    (u"pohjimmainen",    (u"pohjimmai",   u"pohjimai")),
Packit 1f3717
    (u"poliisi",         (u"poliis",      u"polis")),
Packit 1f3717
    (u"poliitikko",      (u"poliitik",    u"politik", u"poliitiik")),
Packit 1f3717
    (u"poliittinen",     (u"poliitti",    u"politti", u"poliittii")),
Packit 1f3717
    (u"politiikka",      (u"politiik",    u"politik", u"poliitiik")),
Packit 1f3717
    (u"Polynesia",       (u"Polynesi",    u"Polyneesi")),
Packit 1f3717
    (u"posetiivi",       (u"posetiiv",    u"posetiv")),
Packit 1f3717
    (u"positiivi",       (u"positiiv",    u"positiv")),
Packit 1f3717
    (u"posliini",        (u"posliin",     u"poslin"   u"porsliin", u"porslin")),
Packit 1f3717
    (u"preettori",       (u"preettor",    u"preetor", u"pretor")),
Packit 1f3717
    (u"pretoriaani",     (u"pretoriaan",  u"pretorian")),
Packit 1f3717
    (u"probleemi",       (u"probleem",    u"problem")),
Packit 1f3717
    (u"pudottaa",        (u"pudo",        u"puo")),
Packit 1f3717
    (u"puolimmainen",    (u"puolimmai",   u"puolimai")),
Packit 1f3717
    (u"pyramidi",        (u"pyramid",     u"pyramiid")),
Packit 1f3717
    (u"päällikkö",       (u"päällik",     u"päälik")),
Packit 1f3717
    (u"päällimmäinen",   (u"päällimmäi",  u"päällimäi", u"päälimäi", u"päälimmäi")),
Packit 1f3717
    (u"päärynä",         ((u"pääryn", u"pääron", u"NimisanaPeruna_ä", u"NimisanaPeruna_aä"), )),
Packit 1f3717
    (u"rakuuna",         (u"rakuun",      u"rakun")),
Packit 1f3717
    (u"rangaistus",      (u"rangaistu",   u"rankaistu")),
Packit 1f3717
    (u"reettori",        (u"reettor",     u"reetor")),
Packit 1f3717
    (u"reunimmainen",    (u"reunimmai",   u"reunimai")),
Packit 1f3717
    (u"romanttinen",     (u"romantti",    u"romanti", u"romantilli")),
Packit 1f3717
    (u"safiiri",         (u"safiir",      u"safir")),
Packit 1f3717
    (u"saippua",         (u"saippu",      u"saipu")),
Packit 1f3717
    (u"salamanteri",     (u"salamanter",  u"salamander")),
Packit 1f3717
    (u"saman=lainen",    (u"saman[Bm]lai:samanlai", u"samal[Bm]lai:samallai")),
Packit 1f3717
    (u"samojedi",        (u"samojed",     u"samojeed")),
Packit 1f3717
    (u"sampanja",        (u"sampanj",     u"samppanj")),
Packit 1f3717
    (u"sankaruus",       (u"sankaruu",    u"sankariu")),
Packit 1f3717
    (u"saraseeni",       (u"saraseen",    u"sarasen")),
Packit 1f3717
    (u"sapatti",         (u"sapat",       u"sabat")),
Packit 1f3717
    (u"sapeli",          (u"sapel",       u"sapeel")),
Packit 1f3717
    (u"satraappi",       ((u"satraap",    u"satrap"),
Packit 1f3717
                          (u"satraap",    u"satraap", u"NimisanaSinappi_a", u"NimisanaPaperi_a"),
Packit 1f3717
                          (u"satraap",    u"satrap",  u"NimisanaSinappi_a", u"NimisanaPaperi_a"))),
Packit 1f3717
    (u"seminaari",       (u"seminaar",    u"seminar")),
Packit 1f3717
    (u"senaatti",        ((u"senaat",     u"senaat", u"NimisanaSalaatti_a", u"NimisanaPaperi_a"),
Packit 1f3717
                          (u"senaat",     u"senat",  u"NimisanaSalaatti_a", u"NimisanaPaperi_a"))),
Packit 1f3717
    (u"senaattori",      (u"senaattor",   u"senaator")),
Packit 1f3717
    (u"serafi",          (u"seraf",       u"seraaf")),
Packit 1f3717
    (u"serenad",         (u"serenad",     u"serenaat")),
Packit 1f3717
    (u"setlementti",     (u"setlement",   u"settlement")),
Packit 1f3717
    (u"shampanja",       (u"shampanj",    u"shamppanj")),
Packit 1f3717
    (u"sihteeri",        (u"sihteer",     u"sihter", u"sihtier")),
Packit 1f3717
    (u"sikari",          (u"sikar",       u"sikaar")),
Packit 1f3717
    (u"sireeni",         (u"sireen",      u"siren")),
Packit 1f3717
    (u"sitten",          (u"sitten",      u"sitte")),
Packit 1f3717
    (u"sitruuna",        (u"sitruun",     u"sitrun", u"sitroon", u"sitron")),
Packit 1f3717
    (u"sitä=paitsi",     (u"paitsi",      u"paitse")),
Packit 1f3717
    (u"siviili",         (u"siviil",      u"sivil")),
Packit 1f3717
    (u"sivilisaatio",    (u"sivilisaatio", u"siviilisaatio")),
Packit 1f3717
    (u"slaavilainen",    (u"slaavilai",    u"slavilai")),
Packit 1f3717
    (u"soolo",           (u"soolo",        u"solo")),
Packit 1f3717
    (u"soopeli",         (u"soopel",       u"sopel", u"soobel")),
Packit 1f3717
    (u"spitaali",        (u"spitaal",      u"spital")),
Packit 1f3717
    (u"stationaarinen",  ((u"stationaari", u"stationääri", u"LaatusanaNainenInen_a ", u"LaatusanaNainenInen_ä"),
Packit 1f3717
                          (u"stationaari", u"stationari"))),
Packit 1f3717
    (u"sulhanen",        (u"sulha",        u"sulhai")),
Packit 1f3717
    (u"sydämistyä",      ((u"sydämisty",   u"sydämmisty"),
Packit 1f3717
                          (u"sydämisty",   u"sydämmysty"))),
Packit 1f3717
    (u"synagoga",        (u"synagog",      u"synagoog")),
Packit 1f3717
    (u"taimmainen",      (u"taimmai",      u"takimai", u"taemmai", u"taaemmai")),
Packit 1f3717
    (u"talismaani",      (u"talismaan",    u"talisman")),
Packit 1f3717
    (u"tantieemi",       (u"tantieem",     u"tantiem")),
Packit 1f3717
    (u"teatteri",        (u"teatter",      u"teaater", u"teaatter", u"teater")),
Packit 1f3717
    (u"temperamentti",   (u"temperament",  u"tempperament")),
Packit 1f3717
    (u"tooga",           (u"toog",         u"tog")),
Packit 1f3717
    (u"topaasi",         (u"topaas",       u"topas")),
Packit 1f3717
    (u"Toscana",         (u"toscan",       u"toskan")),
Packit 1f3717
    (u"toteemi",         (u"toteem",       u"totem")),
Packit 1f3717
    (u"torpedo",         (u"torpedo",      u"torpeedo")),
Packit 1f3717
    (u"torstai",         (u"torsta",       u"tuorsta")),
Packit 1f3717
    (u"Traakia",         (u"traaki",       u"traki")),
Packit 1f3717
    (u"traakialainen",   (u"traakialai",   u"trakialai")),
Packit 1f3717
    (u"tulimmainen",     (u"tulimmai",     u"tulimai")),
Packit 1f3717
    (u"tulppaani",       (u"tulppaan",     u"tulpaan")),
Packit 1f3717
    (u"tussi",           (u"tuss",         u"tush")),
Packit 1f3717
    (u"tällainen",       ((u"tällai",      u"tällai", u"NimiLaatusanaNainenInen_a", u"NimiLaatusanaNainenInen_ä"),
Packit 1f3717
                          (u"tällai",      u"tälläi", u"NimiLaatusanaNainenInen_a", u"NimiLaatusanaNainenInen_aä"))),
Packit 1f3717
    (u"ulommainen",      (u"ulommai",      u"uloimmai")),
Packit 1f3717
    (u"upseeri",         (u"upseer",       u"upser", u"upsier")),
Packit 1f3717
    (u"upseeristo",      (u"upseeristo",   u"upsieristo")),
Packit 1f3717
    (u"vaillinainen",    (u"vaillinai",    u"vaillinnai")),
Packit 1f3717
    (u"vaivalloinen",    (u"vaivalloi",    u"vaivaloi")),
Packit 1f3717
    (u"vaientaa",        (u"vaien",        u"vaijen")),
Packit 1f3717
    (u"vampyyri",        (u"vampyyr",      u"vampyr")),
Packit 1f3717
    (u"vasemmisto",      (u"vasemmisto",   u"vasemisto")),
Packit 1f3717
    (u"viheriöidä",      (u"viheriö",      u"viherjö")),
Packit 1f3717
    (u"vihkiäinen",      (u"vihkiäi",      u"vihkijäi")),
Packit 1f3717
    (u"vihollinen",      (u"viholli",      u"vihoilli")),
Packit 1f3717
    (u"vinottainen",     (u"vinottai",     u"vinoittai")),
Packit 1f3717
    (u"visiiri",         (u"visiir",       u"visir")),
Packit 1f3717
    (u"Väisänen",        (u"väisä",        u"wäisä")),
Packit 1f3717
    (u"ylhäällä",        (u"ylhää",        u"ylähä")),
Packit 1f3717
    (u"ylimmäinen",      (u"ylimmäi",      u"ylimäi")),
Packit 1f3717
    (u"öinen",           (u"öi",           u"öilli")),
Packit 1f3717
Packit 1f3717
    (u"lainen",  lambda line, word: replace_and_write (line.replace(u"lai",u"läi"), u"NimiLaatusanaNainen_a", u"NimiLaatusanaNainen_ä")),
Packit 1f3717
Packit 1f3717
    # 38 pieni (4, 4). Juoni, moni, pieni, tyyni.
Packit 1f3717
    #
Packit 1f3717
    # Nämä ovat tiedostossa poikkeavat-sukija.lexc
Packit 1f3717
    #
Packit 1f3717
##    (u"juoni", [u"[Lnl][Xp]juoni[X]juon:juon NimiLaatusanaSukijaPieni_a ;"]),
Packit 1f3717
##    (u"moni",  [u"[Ln][Xp]moni[X]mon:mon NimisanaSukijaPieni_a ;"]),
Packit 1f3717
##    (u"pieni", [u"[Ll][Xp]pieni[X]pien:pien LaatusanaSukijaPieni_ä ;"]),
Packit 1f3717
##    (u"tyyni", [u"[Ll][Xp]tyyni[X]tyyn:tyyn LaatusanaSukijaPieni_ä ;"]),
Packit 1f3717
##    (u"peilityyni", [u"[Ll][Xp]peilityyni[X]peili[Bm]tyyn:peilityyn LaatusanaSukijaPieni_ä ;"]),
Packit 1f3717
##    (u"rasvatyyni", [u"[Ll][Xp]rasvatyyni[X]rasva[Bm]tyyn:rasvatyyn LaatusanaSukijaPieni_ä ;"]),
Packit 1f3717
Packit 1f3717
    # 39 nuori (3, 3). Tuomi, s. 182, 184.
Packit 1f3717
    #
Packit 1f3717
    (u"juuri",   [u"[Ln][Xp]juuri[X]juur[Ses][Ny]na:juurna NimisanaLiOlV_a ;",
Packit 1f3717
                  u"[Ln][Xp]juuri[X]juur[Ses][Ny]ra:juurra NimisanaLiOlV_a ;"]),
Packit 1f3717
    (u"nuori",   [u"[Lnl][Xp]nuori[X]nuor[Ses][Ny]na:nuorna NimisanaLiOlV_a ;",
Packit 1f3717
                  u"[Lnl][Xp]nuori[X]nuor[Ses][Ny]ra:nuorra NimisanaLiOlV_a ;"]),
Packit 1f3717
    (u"suuri",   [u"[Lnl][Xp]suuri[X]suur[Ses][Ny]na:suurna NimisanaLiOlV_a ;",
Packit 1f3717
                  u"[Lnl][Xp]suuri[X]suur[Ses][Ny]ra:suurra NimisanaLiOlV_a ;"]),
Packit 1f3717
Packit 1f3717
    # 46 hapsi (1, 1). Tuomi, s. 190. -- Vvfst tunnistaa muodot "hasten" ja "hapsien".
Packit 1f3717
    # hasna, hassa, hasten, hapsien   -- Nämä ovat niin harvinaisia, että tarvitseeko näitä indeksoinnissa?
Packit 1f3717
    #
Packit 1f3717
#    (u"hapsi", [u"[Ln][Xp]hapsi[X]has[Ses][Ny]na:hasna NimisanaLiOlV_a ;",
Packit 1f3717
#                u"[Ln][Xp]hapsi[X]has[Ses][Ny]sa:hassa NimisanaLiOlV_a ;"]),
Packit 1f3717
Packit 1f3717
    # 79 terve (4, 4). Tuomi s. 142, 143, 146.
Packit 1f3717
    #
Packit 1f3717
    (u"tuore", [u"[Ll][Xp]tuore[X]tuore[Ses][Ny]nna:tuorenna NimisanaLiOlV_a ;"]),
Packit 1f3717
    (u"vetre", [u"[Ll][Xp]vetre[X]vetre[Ses][Ny]nnä:vetrennä NimisanaLiOlV_ä ;"]),
Packit 1f3717
    (u"päre",  [u"[Ln][Xp]päre[X]päre[Ses][Ny]nnä:pärennä NimisanaLiOlV_ä ;"]),
Packit 1f3717
    (u"terve", [u"[Lnl][Xp]terve[X]terve[Ses][Ny]nnä:tervennä NimisanaLiOlV_ä ;"]),
Packit 1f3717
Packit 1f3717
    (u"kaivu", [u"[Ln][Xp]kaivu[X]kaivu:kaivu NimisanaPuu_a ;",
Packit 1f3717
                u"[Ln][Xp]kaivu[X]kaivu[Sill][Ny]usee:kaivuusee NimisanaLiOlN_a ;"]),
Packit 1f3717
]
Packit 1f3717
Packit 1f3717
Packit 1f3717
function_list = [
Packit 1f3717
    # Herttua-tyyppisillä sanoilla on monikkomuodot, joissa ei ole o:ta (herttuilla, jne).
Packit 1f3717
    #
Packit 1f3717
    # 20 herttua (10, 10). Tuomi, s. 114, 116, 121, 124, 125.
Packit 1f3717
    #
Packit 1f3717
    (lambda line, word: outfile.write (u"[Ln][Xp]%s[X]%s:%s SukijaHerttua ;\n" %
Packit 1f3717
                                       (word, word[0:len(word)-1], word[0:len(word)-1])),
Packit 1f3717
     (u"aurtua",
Packit 1f3717
      u"herttua",
Packit 1f3717
      u"hierua",
Packit 1f3717
      u"juolua",
Packit 1f3717
      u"lastua",
Packit 1f3717
      u"liettua",
Packit 1f3717
      u"luusua",
Packit 1f3717
      u"porstua",
Packit 1f3717
      u"saarua",
Packit 1f3717
      u"tanhua")),
Packit 1f3717
Packit 1f3717
# Vapaa ja tienoo ovat taivutuskaavoina SukijaVapaa.
Packit 1f3717
#
Packit 1f3717
    # 23 vapaa (8, 8). Tuomi, s. 1, 2.
Packit 1f3717
    #
Packit 1f3717
#    (write_vapaa_tienoo,
Packit 1f3717
#     (u"kajaa",
Packit 1f3717
#      u"vajaa",
Packit 1f3717
#      u"vakaa",
Packit 1f3717
#      u"suklaa",
Packit 1f3717
#      u"harmaa",
Packit 1f3717
#      u"vapaa",
Packit 1f3717
#      u"nepaa",
Packit 1f3717
#      u"hurraa")),
Packit 1f3717
Packit 1f3717
    # 24 tienoo (14, 14). Tuomi, s. 345. Taipuu kuten vapaa.
Packit 1f3717
    #
Packit 1f3717
#    (write_vapaa_tienoo,
Packit 1f3717
#     (u"kabeljoo",
Packit 1f3717
#      u"kalikoo",
Packit 1f3717
#      u"pikoo",
Packit 1f3717
#      u"talkoo",
Packit 1f3717
#      u"haloo",
Packit 1f3717
#      u"halloo",
Packit 1f3717
#      u"tienoo",
Packit 1f3717
#      u"poppoo",
Packit 1f3717
#      u"bigarroo",
Packit 1f3717
#      u"platoo",
Packit 1f3717
#      u"ehtoo",
Packit 1f3717
#      u"palttoo",
Packit 1f3717
#      u"ponttoo",
Packit 1f3717
#      u"nivoo")),
Packit 1f3717
Packit 1f3717
    # 33 lohi (2, 2). Tuomi, s. 151.
Packit 1f3717
    # lohten, uuhten
Packit 1f3717
    #
Packit 1f3717
    (lambda line, word: outfile.write (u"[Ln][Xp]%s[X]%s:%s SukijaLohi ;\n" %
Packit 1f3717
                                       (word, word[0:len(word)-1], word[0:len(word)-1])),
Packit 1f3717
     (u"lohi",
Packit 1f3717
      u"tyynenmerenlohi",   # On Joukahaisessa.
Packit 1f3717
      u"uuhi")),
Packit 1f3717
Packit 1f3717
    # 34 lahti (2, 2). Tuomi, s. 193.
Packit 1f3717
    # lahta (= lahtea), lahtein
Packit 1f3717
    #
Packit 1f3717
    (write_lahti, 
Packit 1f3717
     (u"haahti",
Packit 1f3717
      u"lahti")),
Packit 1f3717
Packit 1f3717
    # Ahven taipuu kuten sisar, paitsi että yksikön olento on myös ahvenna.
Packit 1f3717
    #
Packit 1f3717
    # 55 ahven (22, 23). Tuomi, s. 246, 247, 301, 302.
Packit 1f3717
    #
Packit 1f3717
    (write_ahven,
Packit 1f3717
     (u"aamen",
Packit 1f3717
      u"ahven",
Packit 1f3717
      u"haiven",
Packit 1f3717
      u"huomen",
Packit 1f3717
      u"häiven",
Packit 1f3717
      u"höyhen",
Packit 1f3717
#      u"ien",  # On erikseen: ikene, ien.
Packit 1f3717
      u"iljen",
Packit 1f3717
      u"joutsen",
Packit 1f3717
      u"jäsen",
Packit 1f3717
      u"kymmen",
Packit 1f3717
      u"kämmen",
Packit 1f3717
      u"liemen",
Packit 1f3717
      u"paimen",
Packit 1f3717
      u"siemen",
Packit 1f3717
      u"ruumen",
Packit 1f3717
      u"terhen",
Packit 1f3717
      u"taimen",
Packit 1f3717
      u"tuumen",
Packit 1f3717
      u"tyven",
Packit 1f3717
      u"tyyven",
Packit 1f3717
      u"uumen",
Packit 1f3717
      u"vuomen")),
Packit 1f3717
Packit 1f3717
    # 69 kaunis (7, 6). Tuomi, s. 358.
Packit 1f3717
    #
Packit 1f3717
    (write_kaunis,
Packit 1f3717
     (u"kallis",
Packit 1f3717
      u"aulis",
Packit 1f3717
      u"valmis",
Packit 1f3717
      u"kaunis",
Packit 1f3717
#      u"altis",
Packit 1f3717
      u"tiivis")),
Packit 1f3717
Packit 1f3717
     (write_altis, (u"altis", )),
Packit 1f3717
Packit 1f3717
     # 11 paistaa (9, 9). Tuomi s. 1, 2, 8, 11, 12, 15, 17.
Packit 1f3717
     #
Packit 1f3717
     (write_virkkaa, (u"vilkkaa",
Packit 1f3717
                      u"virkkaa")),
Packit 1f3717
     (write_paistaa, (u"paistaa", )),
Packit 1f3717
     (write_paahtaa, (u"paahtaa",
Packit 1f3717
                      u"raistaa",
Packit 1f3717
                      u"saattaa",
Packit 1f3717
                      u"taittaa",
Packit 1f3717
                      u"palttaa",
Packit 1f3717
                      u"varttaa")),
Packit 1f3717
]
Packit 1f3717
Packit 1f3717
def convert_to_dictionary (word_list):
Packit 1f3717
    l0 = map (lambda x : x[0], word_list)
Packit 1f3717
    l1 = map (lambda x : x[1], word_list)
Packit 1f3717
    return dict (zip (l0, l1))
Packit 1f3717
Packit 1f3717
sukija_dictionary = convert_to_dictionary (word_list)
Packit 1f3717
Packit 1f3717
Packit 1f3717
def error (line):
Packit 1f3717
    sys.stderr.write (line)
Packit 1f3717
    sys.stderr.write ("Wrong format in sukija_dictionary.\n")
Packit 1f3717
    sys.exit (1)
Packit 1f3717
Packit 1f3717
Packit 1f3717
def write_list (line, key, data):
Packit 1f3717
    for x in data:
Packit 1f3717
        if type(x) == UnicodeType:
Packit 1f3717
            outfile.write (x + u"\n")
Packit 1f3717
        else:
Packit 1f3717
            error (line)
Packit 1f3717
Packit 1f3717
Packit 1f3717
def write_tuple (line, key, g):
Packit 1f3717
     if type(g[0]) == UnicodeType:
Packit 1f3717
         for i in range (1, len(g)):
Packit 1f3717
             replace_and_write (line, g[0], g[i])
Packit 1f3717
     elif type(g[0]) == TupleType:
Packit 1f3717
         for i in range (0, len(g)):
Packit 1f3717
             if (len(g[i]) == 2):
Packit 1f3717
                 replace_and_write (line, g[i][0], g[i][1])
Packit 1f3717
             else:
Packit 1f3717
                 s = line.replace (g[i][2], g[i][3])
Packit 1f3717
                 outfile.write (replace (s, g[i][0], g[i][1]))
Packit 1f3717
     else:
Packit 1f3717
         error (line)
Packit 1f3717
Packit 1f3717
Packit 1f3717
# Extract base form from a line.
Packit 1f3717
#
Packit 1f3717
base_form_re = re.compile (u"\\[Xp\\]([^[]+)\\[X\\]", re.UNICODE)
Packit 1f3717
Packit 1f3717
def generate_word (r, line, sukija_dictionary):
Packit 1f3717
    try:
Packit 1f3717
        g = sukija_dictionary[r.group(1)]
Packit 1f3717
        if type(g) == ListType:
Packit 1f3717
            write_list (line, r.group(1), g)
Packit 1f3717
	elif type(g) == TupleType:
Packit 1f3717
            write_tuple (line, r.group(1), g)
Packit 1f3717
	elif type(g) == LambdaType:
Packit 1f3717
            g (line, r.group(1))
Packit 1f3717
	else:
Packit 1f3717
            error (line)
Packit 1f3717
    except KeyError:  # It is not an error if a word is not in sukija_dictionary.
Packit 1f3717
        pass
Packit 1f3717
Packit 1f3717
Packit 1f3717
def generate_from_function (r, line, function_list):
Packit 1f3717
    for x in function_list:
Packit 1f3717
        if r.group(1) in x[1]:
Packit 1f3717
            x[0] (line, r.group(1))
Packit 1f3717
Packit 1f3717
Packit 1f3717
def generate_xiljoona (line):
Packit 1f3717
    if (line.startswith ("[Sn]")):
Packit 1f3717
        u = line.replace (u"miljoona@", u"miljona@")
Packit 1f3717
        u = u.replace (u"miljoonat@", u"miljonat@")
Packit 1f3717
        u = u.replace (u"biljoona@",  u"biljona@")
Packit 1f3717
        u = u.replace (u"biljoonat@", u"biljonat@")
Packit 1f3717
        u = u.replace (u"triljoona@",  u"triljona@")
Packit 1f3717
        u = u.replace (u"triljoonat@", u"triljonat@")
Packit 1f3717
        outfile.write (u)
Packit 1f3717
    else:
Packit 1f3717
        outfile.write (line.replace (u"iljoon", u"iljon"))
Packit 1f3717
        if (line.startswith (u"@") and line.find (u"iljoonien:")):
Packit 1f3717
            outfile.write (line.replace (u"iljoonien", u"iljoonain"))
Packit 1f3717
            outfile.write (line.replace (u"iljoonien", u"iljonain"))
Packit 1f3717
Packit 1f3717
Packit 1f3717
ei_vertm = re.compile (u"@[PDC][.]EI_VERTM([.]ON)?@", re.UNICODE)
Packit 1f3717
Packit 1f3717
Packit 1f3717
sukija_additions = {
Packit 1f3717
    u"LEXICON Asemosana\n":     u"SukijaAsemosana ;\n",
Packit 1f3717
    u"LEXICON Sanasto\n":       u"SukijaSanasto;\n",
Packit 1f3717
    u"LEXICON Sanasto_em\n":    u"SukijaPoikkeavat_em ;\n",
Packit 1f3717
    u"LEXICON Sanasto_ep\n":    u"SukijaPoikkeavat_ep ;\n",
Packit 1f3717
    u"LEXICON Sanasto_l\n":     u"SukijaPoikkeavat_l ;\n",
Packit 1f3717
    u"LEXICON Sanasto_n\n":     u"SukijaPoikkeavat_n ;\n",
Packit 1f3717
    u"LEXICON Sanasto_nl\n":    u"SukijaPoikkeavat_nl ;\n",
Packit 1f3717
    u"LEXICON Sanasto_p\n":     u"Sukija_p ;\n",
Packit 1f3717
    u"LEXICON Sanasto_t\n":     u"SukijaPoikkeavat_t ;\n",
Packit 1f3717
    u"LEXICON Sanasto_s\n":     u"SukijaPoikkeavat_s ;\n",
Packit 1f3717
    u"LEXICON Suhdesana\n":     u"SukijaSuhdesana ;\n",
Packit 1f3717
    u"LEXICON LukusananErikoisjälkiliite\n": u"SukijaLukusananErikoisjälkiliite ;\n",
Packit 1f3717
    u"LEXICON Omistusliite_a\n":  u"[O2y]s:s Liitesana_a  ;\n",
Packit 1f3717
    u"LEXICON Omistusliite_ä\n":  u"[O2y]s:s Liitesana_ä  ;\n",
Packit 1f3717
    u"LEXICON Omistusliite_aä\n": u"[O2y]s:s Liitesana_aä ;\n"
Packit 1f3717
#   u"LEXICON Omistusliite_a\n":  u"[O1y]in:in Liitesana_a  ;\n[O2y]s:s Liitesana_a  ;\n",
Packit 1f3717
#   u"LEXICON Omistusliite_ä\n":  u"[O1y]in:in Liitesana_ä  ;\n[O2y]s:s Liitesana_ä  ;\n",
Packit 1f3717
#   u"LEXICON Omistusliite_aä\n": u"[O1y]in:in Liitesana_aä ;\n[O2y]s:s Liitesana_aä ;\n"
Packit 1f3717
}
Packit 1f3717
Packit 1f3717
Packit 1f3717
def write_sukija_additions (line, sukija_additions):
Packit 1f3717
    try:
Packit 1f3717
        outfile.write (sukija_additions[line])
Packit 1f3717
    except KeyError:
Packit 1f3717
        pass
Packit 1f3717
Packit 1f3717
Packit 1f3717
# Copy Voikko vocabulary and insert forms that Sukija needs.
Packit 1f3717
#
Packit 1f3717
while True:
Packit 1f3717
    line = infile.readline()
Packit 1f3717
    if line == u"":
Packit 1f3717
        break
Packit 1f3717
    if string.find (line, u"[Tn4]mi@") == 0:  # 4. nimitapa (puhu+minen) ei ole teonsanan taivutusmuoto.
Packit 1f3717
        continue
Packit 1f3717
    line = re.sub (ei_vertm, u"", line)
Packit 1f3717
    if string.find (line, u"=") >= 0:
Packit 1f3717
        line = line.replace (u"@P.YS_EI_JATKOA.ON@", u"")
Packit 1f3717
        
Packit 1f3717
    if OPTIONS["sukija-ys"]:
Packit 1f3717
        line = line.replace (u"@P.YS_EI_JATKOA.ON@", u"")
Packit 1f3717
        line = line.replace (u"@D.YS_EI_JATKOA@", u"")
Packit 1f3717
        line = line.replace (u"@C.YS_EI_JATKOA@", u"")
Packit 1f3717
    outfile.write (line)
Packit 1f3717
    write_sukija_additions (line, sukija_additions)
Packit 1f3717
Packit 1f3717
    generate_from_pattern_1 (line, spelling_pattern_list)
Packit 1f3717
Packit 1f3717
    generate_from_pattern_2 (line, re_uusio, u"uusio", (u"usio",),          (u"usion",  u"usioon"),  u"NimisanaAutio_a", u"NimisanaPaperi_a")
Packit 1f3717
    generate_from_pattern_2 (line, re_tio,   u"tio",   (u"tsio",),          (u"tsion",  u"tsioon"),  u"NimisanaAutio_a", u"NimisanaPaperi_a")
Packit 1f3717
    generate_from_pattern_2 (line, re_aatio, u"aatio", (u"atio", u"atsio"), (u"atsion", u"atsioon"), u"NimisanaAutio_a", u"NimisanaPaperi_a")
Packit 1f3717
    generate_from_pattern_2 (line, re_uutio, u"uutio", (u"utio", u"utsio"), (u"utsion", u"utsioon"), u"NimisanaAutio_a", u"NimisanaPaperi_a")
Packit 1f3717
Packit 1f3717
    if re_8_9.search (line):
Packit 1f3717
        write_8_9 (line)
Packit 1f3717
Packit 1f3717
    r = base_form_re.search (line)
Packit 1f3717
    if r:
Packit 1f3717
        generate_word (r, line, sukija_dictionary)
Packit 1f3717
        generate_from_function (r, line, function_list)
Packit 1f3717
    if (re_Xiljoona.search (line)):
Packit 1f3717
        generate_xiljoona (line)
Packit 1f3717
infile.close()
Packit 1f3717
Packit 1f3717
outfile.write (u"\n\n\n")
Packit 1f3717
Packit 1f3717
while True:
Packit 1f3717
    line = sukijafile.readline()
Packit 1f3717
    if line == u"":
Packit 1f3717
        break
Packit 1f3717
    if u"!" in line:
Packit 1f3717
        line = line[0:line.find(u"!")]
Packit 1f3717
    outfile.write (line)
Packit 1f3717
sukijafile.close()
Packit 1f3717
Packit 1f3717
outfile.close()