#!/bin/bash
# ----------------------------------------------------------------------------------
#
# Shukllachiska Kichwa del Ecuador, hunspell format dictionary generator
# Copyright (C) 2009 Arno Teigseth, Henry David Lara
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. See LICENSE.txt. If not, see <http://www.gnu.org/licenses/>.
#
# The author(s) can be contacted at arno at teigseth dot no
#
# ----------------------------------------------------------------------------------
#
# This is *NOT* the .dic file, but a script to generate such a file, taking the
# words from qu_EC.dic.MASTER. See the file qu_EC.dic.MASTER for dictionary creation rules.
#
# Generate qu_EC.dic file
# Variables
TMPFILE=/tmp/quiwork
VFILE=/tmp/quiworkVerbs
AFILE=/tmp/quiworkADJs
OFILE=/tmp/quiworkOther
OUTFILE=qu_EC.dic
MASTERFILE=qu_EC.dic.MASTER
#Functions
fixtilde () # Create names entries
{
# Check if entry contains accents
if [ `echo $1 |grep [áóéúí]` ]; then
# Does contain accents, print both versions
# Print non-compound-version
echo -n $1 |sed 's/\/.*//'
echo /
# Print compound-version
echo -n $1 | sed y/áóéúí/aoeui/ |sed 's/\/.*//'
echo //x
else
# No accents, just print the entry
echo $1
fi
}
fixname () # Create names entries
{
# Check if entry contains accents
if [ `echo $1 |grep [áóéúí]` ]; then
# Does contain accents, print both versions
# Print non-compound-version
echo $1/
# Print compound-version
echo -n $1 | sed y/áóéúí/aoeui/
echo //x
else
# No accents, just print the entry
echo $1//
fi
}
variant () # Create infix variants
{
VERB=`echo $1 |sed -e 's/\/\/.*$//'` # Remove trailing // and whatever.
# OUT Base verb -na
echo ${VERB}//v
STEM=`echo $VERB |sed s/na$//` # Stem without -na
RULES=`echo $1 |sed -e 's/.*\/\///'` # Remove stem and //
# OUT -ri- infix and variants
if [[ $RULES == *r* ]]
then
echo ${STEM}rina//v
# OUT -rimu- infix
if [[ $RULES == *,* ]]
then
echo ${STEM}rimuna//v
fi
# OUT -richi- infix
if [[ $RULES == *h* ]]
then
echo ${STEM}richina//v
fi
# OUT -rikri- infix
# 2sam 10:10 "tuparikrichun"
if [[ $RULES == *\>* ]]
then
echo ${STEM}rikrina//v
fi
# OUT -riku- infix and variants
if [[ $RULES == *-* ]]
then
echo ${STEM}rikuna//v
fi
fi
# OUT -kri- infix and variants
if [[ $RULES == *\>* ]]
then
echo ${STEM}krina//v
fi
# OUT -ra- infix and variants (-ra- and -riya- are equivalent)
if [[ $RULES == *+* ]]
then
echo ${STEM}rana//v
echo ${STEM}riyana//v
fi
# OUT -ku- infix and variants
if [[ $RULES == *-* ]]
then
echo ${STEM}kuna//v
echo ${STEM}nakuna//v
# OUT -kumu- infix and variants
if [[ $RULES == *,* ]]
then
echo ${STEM}kumuna//v
# OUT -kumuri- infix and variants
# (my lesson 77: "uraykumurirkakunachu")
if [[ $RULES == *r* ]]
then
echo ${STEM}kumurina//v
fi
fi
# OUT -kuchi- infix
if [[ $RULES == *h* ]]
then
echo ${STEM}kuchina//v
fi
fi
# OUT -mu- infix and variants
if [[ $RULES == *,* ]]
then
echo ${STEM}muna//v
# OUT -muwa- infix
if [[ $RULES == *w* ]]
then
echo ${STEM}muwana//v
fi
# OUT -muku- infix
if [[ $RULES == *-* ]]
then
echo ${STEM}mukuna//v
echo ${STEM}namukuna//v
fi
fi
# OUT -wa- infix and variants
if [[ $RULES == *w* ]]
then
echo ${STEM}wana//v
fi
# OUT -chi- infix and variants
if [[ $RULES == *h* ]]
then
echo ${STEM}china//v
# OUT -chiwa- infix
if [[ $RULES == *w* ]]
then
echo ${STEM}chiwana//v
fi
# OUT -chiri- infix
# 2sam 10:8 "alli|chiri|nakurka"
if [[ $RULES == *r* ]]
then
echo ${STEM}chirina//v
fi
# OUT -chiku- infix
if [[ $RULES == *-* ]]
then
echo ${STEM}chikuna//v
echo ${STEM}chinakuna//v
fi
# OUT -chikri- infix and variants
if [[ $RULES == *\>* ]]
then
echo ${STEM}chikrina//v
fi
fi
}
## END OF FUNCTIONS
#
rm $OFILE
# Prep: remove comments and rtrim whitespace
cat $MASTERFILE |sed s/#.*$// |sed s/" "*$// |grep -v ^$ > $TMPFILE
# Find verbs
echo Forking verbs
cat $TMPFILE | grep v$ > $VFILE
# Find adjectives
echo Forking adjectives
# "ungido" is an adjective, but the corresponding verb is
# "ungina", NOT "ungidoyana"
#
# NOTE using MASTERFILE not TEMPFILE here
cat $MASTERFILE |grep '\/a'|grep -v '#NOCONJ' |sed s/#.*$// |sed s/" "*$// |grep -v ^$ | grep a$ > $AFILE
echo Adding adjectives -yana form to verbs file
cat $AFILE |sed 's/\/.*$/yana\/\/r\>+-,whv/' >> $VFILE
# Add alli->alliYANA
#cat $AFILE |sed 's/\/.*$//'|grep -v k$ |sed 's/$/yana\/\/r\>+-,whv/' >> $VFILE
# achik->achiYANA (not achiKyana)
#cat $AFILE |sed 's/\/.*$//'|grep k$ |sed 's/k$/yana\/\/r\>+-,whv/' >> $VFILE
#echo Adding adjectives -naya form to dic file
#cat $AFILE |sed 's/\/.*$/naya\/\//' > $OFILE
#
# -naya is really for nouns:
# yakunaya = thirsty
# Find other words
echo 'Writing non-[verb/adjective]s'
for n in `cat $TMPFILE|grep -v v$ |grep -v a$`; do
# The words from MASTER ending in /x should really go out here
#echo Adding word $n
echo -n "."
fixtilde $n >> $OFILE
done
# used to be just
# cat $TMPFILE | grep -v v$ >> $OFILE
# but cárcel should be added as two:
# cárcel/
# carcel//x
# Prep outfile
rm $TMPFILE
# Put infixes onto verbs, into outfile.
for i in `cat $VFILE`; do
echo Creating variants of $i
variant $i >> $TMPFILE; # Create variants of verb, according to the rules
done
# Append names
for n in `cat names.txt`; do
echo Adding name $n
fixtilde $n >> $TMPFILE
done
# Create outfile
cat $OFILE >> $TMPFILE
# Count it
cat $TMPFILE |sort -u |grep -c $ > $OUTFILE
# Sort it, removing duplicates
cat $TMPFILE |sort -u >> $OUTFILE
#Clean up
#rm $TMPFILE
#rm $VFILE
#rm $AFILE
#rm $OFILE