|
Packit |
ca9683 |
#!/usr/bin/perl -w
|
|
Packit |
ca9683 |
# Copyright (C) 2002-2003 Nadav Har'El and Dan Kenigsberg
|
|
Packit |
ca9683 |
#
|
|
Packit |
ca9683 |
# Merges several dictionaries with prefix hints, into one dictionary with
|
|
Packit |
ca9683 |
# or'ed prefix hints.
|
|
Packit |
ca9683 |
# Usage: cat dict1 dict2 ... | pmerge -p prefixesout > wordsout
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
use IO::File;
|
|
Packit |
ca9683 |
use Carp;
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
require "PrefixBits.pl";
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
# "perl -w" warns about variables only used once (it assumes they are a
|
|
Packit |
ca9683 |
# typo). This ugliness gets rid of this warning. Is there a more sensible way?
|
|
Packit |
ca9683 |
($PS_L,$PS_B,$PS_VERB,$PS_NONDEF,$PS_IMPER,$PS_MISC)=
|
|
Packit |
ca9683 |
($PS_L,$PS_B,$PS_VERB,$PS_NONDEF,$PS_IMPER,$PS_MISC);
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
use Getopt::Std;
|
|
Packit |
ca9683 |
my %opts;
|
|
Packit |
ca9683 |
# -p - output prefix file.
|
|
Packit |
ca9683 |
if(!getopts('p:', \%opts)){
|
|
Packit |
ca9683 |
exit(1);
|
|
Packit |
ca9683 |
}
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
my $out_prefixes=$opts{p};
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
my $specifier;
|
|
Packit |
ca9683 |
my %specifiers;
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
while(<>){
|
|
Packit |
ca9683 |
chomp;
|
|
Packit |
ca9683 |
#next if /---/; # TODO: this isn't needed. remove it.
|
|
Packit |
ca9683 |
#s/-$//o; # TODO: dan added this. remove it.
|
|
Packit |
ca9683 |
s/\+ / /o; # The Makefile was supposed to remove those, but still...
|
|
Packit |
ca9683 |
if(/^L/o){
|
|
Packit |
ca9683 |
$specifier = $PS_L;
|
|
Packit |
ca9683 |
s/^L//o;
|
|
Packit |
ca9683 |
} elsif(/^B/o){
|
|
Packit |
ca9683 |
$specifier = $PS_B;
|
|
Packit |
ca9683 |
s/^B//o;
|
|
Packit |
ca9683 |
} elsif(!/^[א-ת]/o){
|
|
Packit |
ca9683 |
next; # not a word
|
|
Packit |
ca9683 |
} elsif(/-$/o){
|
|
Packit |
ca9683 |
# In wolig.pl's simple output (without -d), this specified smichut,
|
|
Packit |
ca9683 |
# and we shouldn't allow prefixes with he hayedia. This case is
|
|
Packit |
ca9683 |
# useful for smichut words in extrawords.
|
|
Packit |
ca9683 |
$specifier = $PS_NONDEF;
|
|
Packit |
ca9683 |
s/-$//o;
|
|
Packit |
ca9683 |
} elsif(/ פ,/o) {
|
|
Packit |
ca9683 |
if(/ .*ציווי/o) {
|
|
Packit |
ca9683 |
$specifier = $PS_IMPER;
|
|
Packit |
ca9683 |
} elsif(!/ .*הווה/o) {
|
|
Packit |
ca9683 |
$specifier = $PS_VERB;
|
|
Packit |
ca9683 |
} elsif(/ .*סמיכות/o || m:,כינוי/:o) {
|
|
Packit |
ca9683 |
$specifier = $PS_NONDEF;
|
|
Packit |
ca9683 |
} else {
|
|
Packit |
ca9683 |
$specifier = $PS_ALL;
|
|
Packit |
ca9683 |
}
|
|
Packit |
ca9683 |
} elsif(/[ ,][עת],/) {
|
|
Packit |
ca9683 |
if (/ .*סמיכות/o || m:,של/:o || / .*פרטי/o) {
|
|
Packit |
ca9683 |
$specifier = $PS_NONDEF;
|
|
Packit |
ca9683 |
} else {
|
|
Packit |
ca9683 |
$specifier = $PS_ALL;
|
|
Packit |
ca9683 |
}
|
|
Packit |
ca9683 |
} else {
|
|
Packit |
ca9683 |
$specifier = $PS_ALL;
|
|
Packit |
ca9683 |
}
|
|
Packit |
ca9683 |
s/ .*$//; # remove all the "-d" explanations after the word
|
|
Packit |
ca9683 |
$specifiers{$_} |= $specifier;
|
|
Packit |
ca9683 |
}
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
my @words = sort(keys %specifiers);
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
my $F = new IO::File;
|
|
Packit |
ca9683 |
$F->open($out_prefixes,"w") or croak "Couldn't write -p parameter '$out_prefixes'";
|
|
Packit |
ca9683 |
print $F map { chr($specifiers{$_}) } @words;
|
|
Packit |
ca9683 |
print map { $_."\n" } @words;
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
|
|
Packit |
ca9683 |
exit 0;
|