Blame generateQUdicfile.sh

Packit Service 6ddf2f
#!/bin/bash
Packit Service 6ddf2f
# ----------------------------------------------------------------------------------
Packit Service 6ddf2f
#
Packit Service 6ddf2f
#    Shukllachiska Kichwa del Ecuador, hunspell format dictionary generator
Packit Service 6ddf2f
#    Copyright (C) 2009 Arno Teigseth, Henry David Lara
Packit Service 6ddf2f
#
Packit Service 6ddf2f
#    This program is free software: you can redistribute it and/or modify
Packit Service 6ddf2f
#    it under the terms of the GNU Affero General Public License as published by
Packit Service 6ddf2f
#    the Free Software Foundation, either version 3 of the License, or
Packit Service 6ddf2f
#    (at your option) any later version.
Packit Service 6ddf2f
#
Packit Service 6ddf2f
#    This program is distributed in the hope that it will be useful,
Packit Service 6ddf2f
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 6ddf2f
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service 6ddf2f
#    GNU Affero General Public License for more details.
Packit Service 6ddf2f
#
Packit Service 6ddf2f
#    You should have received a copy of the GNU Affero General Public License
Packit Service 6ddf2f
#    along with this program. See LICENSE.txt. If not, see <http://www.gnu.org/licenses/>.
Packit Service 6ddf2f
#
Packit Service 6ddf2f
#    The author(s) can be contacted at arno at teigseth dot no
Packit Service 6ddf2f
#
Packit Service 6ddf2f
# ----------------------------------------------------------------------------------
Packit Service 6ddf2f
# 
Packit Service 6ddf2f
# This is *NOT* the .dic file, but a script to generate such a file, taking the
Packit Service 6ddf2f
# words from qu_EC.dic.MASTER. See the file qu_EC.dic.MASTER for dictionary creation rules.
Packit Service 6ddf2f
#
Packit Service 6ddf2f
# Generate qu_EC.dic file
Packit Service 6ddf2f
Packit Service 6ddf2f
# Variables
Packit Service 6ddf2f
TMPFILE=/tmp/quiwork
Packit Service 6ddf2f
VFILE=/tmp/quiworkVerbs
Packit Service 6ddf2f
AFILE=/tmp/quiworkADJs
Packit Service 6ddf2f
OFILE=/tmp/quiworkOther
Packit Service 6ddf2f
OUTFILE=qu_EC.dic
Packit Service 6ddf2f
MASTERFILE=qu_EC.dic.MASTER
Packit Service 6ddf2f
Packit Service 6ddf2f
#Functions
Packit Service 6ddf2f
 
Packit Service 6ddf2f
 fixtilde () # Create names entries
Packit Service 6ddf2f
 {
Packit Service 6ddf2f
   # Check if entry contains accents
Packit Service 6ddf2f
   if [ `echo $1 |grep [áóéúí]` ]; then 
Packit Service 6ddf2f
      # Does contain accents, print both versions
Packit Service 6ddf2f
      # Print non-compound-version
Packit Service 6ddf2f
      echo -n $1 |sed 's/\/.*//'
Packit Service 6ddf2f
      echo /
Packit Service 6ddf2f
      # Print compound-version
Packit Service 6ddf2f
      echo -n $1 | sed y/áóéúí/aoeui/ |sed 's/\/.*//'
Packit Service 6ddf2f
      echo //x
Packit Service 6ddf2f
   else
Packit Service 6ddf2f
      # No accents, just print the entry
Packit Service 6ddf2f
      echo $1
Packit Service 6ddf2f
   fi  
Packit Service 6ddf2f
 }
Packit Service 6ddf2f
Packit Service 6ddf2f
 fixname () # Create names entries
Packit Service 6ddf2f
 {
Packit Service 6ddf2f
   # Check if entry contains accents
Packit Service 6ddf2f
   if [ `echo $1 |grep [áóéúí]` ]; then 
Packit Service 6ddf2f
      # Does contain accents, print both versions
Packit Service 6ddf2f
      # Print non-compound-version
Packit Service 6ddf2f
      echo $1/
Packit Service 6ddf2f
      # Print compound-version
Packit Service 6ddf2f
      echo -n $1 | sed y/áóéúí/aoeui/
Packit Service 6ddf2f
      echo //x
Packit Service 6ddf2f
   else
Packit Service 6ddf2f
      # No accents, just print the entry
Packit Service 6ddf2f
      echo $1//
Packit Service 6ddf2f
   fi  
Packit Service 6ddf2f
 }
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
 variant () # Create infix variants
Packit Service 6ddf2f
 {
Packit Service 6ddf2f
   VERB=`echo $1 |sed -e 's/\/\/.*$//'`  # Remove trailing // and whatever.
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT Base verb -na
Packit Service 6ddf2f
   echo ${VERB}//v
Packit Service 6ddf2f
   
Packit Service 6ddf2f
   STEM=`echo $VERB |sed s/na$//`      # Stem without -na
Packit Service 6ddf2f
   RULES=`echo $1 |sed -e 's/.*\/\///'` # Remove stem and //
Packit Service 6ddf2f
   
Packit Service 6ddf2f
   # OUT -ri- infix and variants
Packit Service 6ddf2f
   if [[ $RULES == *r* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}rina//v
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -rimu- infix
Packit Service 6ddf2f
	   if [[ $RULES == *,* ]] 	
Packit Service 6ddf2f
	   then 
Packit Service 6ddf2f
	      echo ${STEM}rimuna//v
Packit Service 6ddf2f
	   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -richi- infix
Packit Service 6ddf2f
      if [[ $RULES == *h* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}richina//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -rikri- infix
Packit Service 6ddf2f
      # 2sam 10:10 "tuparikrichun"
Packit Service 6ddf2f
      if [[ $RULES == *\>* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}rikrina//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -riku- infix and variants
Packit Service 6ddf2f
      if [[ $RULES == *-* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}rikuna//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT -kri- infix and variants
Packit Service 6ddf2f
   if [[ $RULES == *\>* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}krina//v
Packit Service 6ddf2f
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT -ra- infix and variants (-ra- and -riya- are equivalent)
Packit Service 6ddf2f
   if [[ $RULES == *+* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}rana//v
Packit Service 6ddf2f
      echo ${STEM}riyana//v
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT -ku- infix and variants
Packit Service 6ddf2f
   if [[ $RULES == *-* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}kuna//v
Packit Service 6ddf2f
      echo ${STEM}nakuna//v
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -kumu- infix and variants
Packit Service 6ddf2f
      if [[ $RULES == *,* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}kumuna//v
Packit Service 6ddf2f
Packit Service 6ddf2f
         # OUT -kumuri- infix and variants
Packit Service 6ddf2f
         # (my lesson 77: "uraykumurirkakunachu")
Packit Service 6ddf2f
         if [[ $RULES == *r* ]] 
Packit Service 6ddf2f
         then 
Packit Service 6ddf2f
            echo ${STEM}kumurina//v
Packit Service 6ddf2f
         fi
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -kuchi- infix
Packit Service 6ddf2f
      if [[ $RULES == *h* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}kuchina//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT -mu- infix and variants
Packit Service 6ddf2f
   if [[ $RULES == *,* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}muna//v
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -muwa- infix
Packit Service 6ddf2f
      if [[ $RULES == *w* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}muwana//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -muku- infix
Packit Service 6ddf2f
	   if [[ $RULES == *-* ]] 
Packit Service 6ddf2f
	   then 
Packit Service 6ddf2f
	      echo ${STEM}mukuna//v
Packit Service 6ddf2f
	      echo ${STEM}namukuna//v
Packit Service 6ddf2f
	   fi
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT -wa- infix and variants
Packit Service 6ddf2f
   if [[ $RULES == *w* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}wana//v
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   # OUT -chi- infix and variants
Packit Service 6ddf2f
   if [[ $RULES == *h* ]] 
Packit Service 6ddf2f
   then 
Packit Service 6ddf2f
      echo ${STEM}china//v
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -chiwa- infix
Packit Service 6ddf2f
      if [[ $RULES == *w* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}chiwana//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -chiri- infix
Packit Service 6ddf2f
      # 2sam 10:8 "alli|chiri|nakurka"
Packit Service 6ddf2f
      if [[ $RULES == *r* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}chirina//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
      # OUT -chiku- infix
Packit Service 6ddf2f
      if [[ $RULES == *-* ]] 
Packit Service 6ddf2f
      then 
Packit Service 6ddf2f
         echo ${STEM}chikuna//v
Packit Service 6ddf2f
         echo ${STEM}chinakuna//v
Packit Service 6ddf2f
      fi
Packit Service 6ddf2f
Packit Service 6ddf2f
     # OUT -chikri- infix and variants
Packit Service 6ddf2f
     if [[ $RULES == *\>* ]] 
Packit Service 6ddf2f
     then 
Packit Service 6ddf2f
        echo ${STEM}chikrina//v
Packit Service 6ddf2f
Packit Service 6ddf2f
     fi
Packit Service 6ddf2f
Packit Service 6ddf2f
   fi
Packit Service 6ddf2f
 }
Packit Service 6ddf2f
Packit Service 6ddf2f
## END OF FUNCTIONS
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
#
Packit Service 6ddf2f
rm $OFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
#  Prep: remove comments and rtrim whitespace
Packit Service 6ddf2f
cat $MASTERFILE |sed s/#.*$// |sed s/" "*$// |grep -v ^$ > $TMPFILE 
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
# Find verbs
Packit Service 6ddf2f
echo Forking verbs
Packit Service 6ddf2f
cat $TMPFILE | grep v$ > $VFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
# Find adjectives
Packit Service 6ddf2f
echo Forking adjectives
Packit Service 6ddf2f
# "ungido" is an adjective, but the corresponding verb is
Packit Service 6ddf2f
# "ungina", NOT "ungidoyana"
Packit Service 6ddf2f
#
Packit Service 6ddf2f
# NOTE using MASTERFILE not TEMPFILE here
Packit Service 6ddf2f
cat $MASTERFILE |grep '\/a'|grep -v '#NOCONJ' |sed s/#.*$// |sed s/" "*$// |grep -v ^$ | grep a$ > $AFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
echo Adding adjectives -yana form to verbs file
Packit Service 6ddf2f
cat $AFILE |sed 's/\/.*$/yana\/\/r\>+-,whv/' >> $VFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
# Add alli->alliYANA
Packit Service 6ddf2f
#cat $AFILE |sed 's/\/.*$//'|grep -v k$ |sed 's/$/yana\/\/r\>+-,whv/' >> $VFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
# achik->achiYANA (not achiKyana)
Packit Service 6ddf2f
#cat $AFILE |sed 's/\/.*$//'|grep k$ |sed 's/k$/yana\/\/r\>+-,whv/' >> $VFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
#echo Adding adjectives -naya form to dic file
Packit Service 6ddf2f
#cat $AFILE |sed 's/\/.*$/naya\/\//' > $OFILE
Packit Service 6ddf2f
#
Packit Service 6ddf2f
# -naya is really for nouns:
Packit Service 6ddf2f
# yakunaya = thirsty
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
# Find other words
Packit Service 6ddf2f
echo 'Writing non-[verb/adjective]s'
Packit Service 6ddf2f
Packit Service 6ddf2f
for n in `cat $TMPFILE|grep -v v$ |grep -v a$`; do
Packit Service 6ddf2f
 # The words from MASTER ending in /x should really go out here
Packit Service 6ddf2f
Packit Service 6ddf2f
 #echo Adding word $n
Packit Service 6ddf2f
 echo -n "."
Packit Service 6ddf2f
 fixtilde $n >> $OFILE
Packit Service 6ddf2f
done
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
# used to be just
Packit Service 6ddf2f
# cat $TMPFILE | grep -v v$ >> $OFILE
Packit Service 6ddf2f
# but cárcel should be added as two:
Packit Service 6ddf2f
# cárcel/
Packit Service 6ddf2f
# carcel//x
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
# Prep outfile 
Packit Service 6ddf2f
rm $TMPFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
# Put infixes onto verbs, into outfile.
Packit Service 6ddf2f
for i in `cat $VFILE`; do
Packit Service 6ddf2f
 echo Creating variants of $i
Packit Service 6ddf2f
 variant $i >> $TMPFILE; # Create variants of verb, according to the rules
Packit Service 6ddf2f
done
Packit Service 6ddf2f
Packit Service 6ddf2f
# Append names
Packit Service 6ddf2f
for n in `cat names.txt`; do
Packit Service 6ddf2f
 echo Adding name $n
Packit Service 6ddf2f
 fixtilde $n >> $TMPFILE
Packit Service 6ddf2f
done
Packit Service 6ddf2f
Packit Service 6ddf2f
# Create outfile
Packit Service 6ddf2f
cat $OFILE >> $TMPFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
# Count it
Packit Service 6ddf2f
cat $TMPFILE |sort -u |grep -c $ > $OUTFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
# Sort it, removing duplicates
Packit Service 6ddf2f
cat $TMPFILE |sort -u >> $OUTFILE
Packit Service 6ddf2f
Packit Service 6ddf2f
Packit Service 6ddf2f
#Clean up
Packit Service 6ddf2f
#rm $TMPFILE
Packit Service 6ddf2f
#rm $VFILE
Packit Service 6ddf2f
#rm $AFILE
Packit Service 6ddf2f
#rm $OFILE
Packit Service 6ddf2f