Blame pmerge

Packit ca9683
#!/usr/bin/perl -w
Packit ca9683
# Copyright (C) 2002-2003 Nadav Har'El and Dan Kenigsberg
Packit ca9683
#
Packit ca9683
# Merges several dictionaries with prefix hints, into one dictionary with
Packit ca9683
# or'ed prefix hints.
Packit ca9683
# Usage: cat dict1 dict2 ... | pmerge -p prefixesout > wordsout
Packit ca9683
Packit ca9683
use IO::File;
Packit ca9683
use Carp;
Packit ca9683
Packit ca9683
require "PrefixBits.pl";
Packit ca9683
Packit ca9683
# "perl -w" warns about variables only used once (it assumes they are a
Packit ca9683
# typo). This ugliness gets rid of this warning. Is there a more sensible way?
Packit ca9683
($PS_L,$PS_B,$PS_VERB,$PS_NONDEF,$PS_IMPER,$PS_MISC)=
Packit ca9683
	($PS_L,$PS_B,$PS_VERB,$PS_NONDEF,$PS_IMPER,$PS_MISC);
Packit ca9683
Packit ca9683
use Getopt::Std;
Packit ca9683
my %opts;
Packit ca9683
# -p - output prefix file.
Packit ca9683
if(!getopts('p:', \%opts)){
Packit ca9683
	exit(1);
Packit ca9683
}
Packit ca9683
Packit ca9683
my $out_prefixes=$opts{p};
Packit ca9683
Packit ca9683
my $specifier;
Packit ca9683
my %specifiers;
Packit ca9683
Packit ca9683
while(<>){
Packit ca9683
	chomp;
Packit ca9683
	#next if /---/;  # TODO: this isn't needed. remove it.
Packit ca9683
	#s/-$//o; # TODO: dan added this. remove it.
Packit ca9683
	s/\+ / /o; # The Makefile was supposed to remove those, but still...
Packit ca9683
	if(/^L/o){
Packit ca9683
	  $specifier = $PS_L;
Packit ca9683
	  s/^L//o;
Packit ca9683
	} elsif(/^B/o){
Packit ca9683
	  $specifier = $PS_B;
Packit ca9683
	  s/^B//o;
Packit ca9683
	} elsif(!/^[א-ת]/o){
Packit ca9683
	  next; # not a word
Packit ca9683
	} elsif(/-$/o){
Packit ca9683
	  # In wolig.pl's simple output (without -d), this specified smichut,
Packit ca9683
	  # and we shouldn't allow prefixes with he hayedia. This case is
Packit ca9683
	  # useful for smichut words in extrawords.
Packit ca9683
	  $specifier = $PS_NONDEF;
Packit ca9683
	  s/-$//o;
Packit ca9683
	} elsif(/ פ,/o) {
Packit ca9683
	  if(/ .*ציווי/o) {
Packit ca9683
		$specifier = $PS_IMPER;
Packit ca9683
	  } elsif(!/ .*הווה/o) {
Packit ca9683
		$specifier = $PS_VERB;
Packit ca9683
	  } elsif(/ .*סמיכות/o || m:,כינוי/:o) {
Packit ca9683
		$specifier = $PS_NONDEF;
Packit ca9683
	  } else {
Packit ca9683
		$specifier = $PS_ALL;
Packit ca9683
	  }
Packit ca9683
	} elsif(/[ ,][עת],/) {
Packit ca9683
	  if (/ .*סמיכות/o || m:,של/:o || / .*פרטי/o) {
Packit ca9683
		$specifier = $PS_NONDEF;
Packit ca9683
	  } else {
Packit ca9683
		$specifier = $PS_ALL;
Packit ca9683
	  }
Packit ca9683
	} else {
Packit ca9683
	  $specifier = $PS_ALL;
Packit ca9683
	}
Packit ca9683
	s/ .*$//;	# remove all the "-d" explanations after the word
Packit ca9683
	$specifiers{$_} |= $specifier;
Packit ca9683
}
Packit ca9683
Packit ca9683
my @words = sort(keys %specifiers);
Packit ca9683
Packit ca9683
my $F = new IO::File;
Packit ca9683
$F->open($out_prefixes,"w") or croak "Couldn't write -p parameter '$out_prefixes'";
Packit ca9683
print $F map { chr($specifiers{$_}) } @words;
Packit ca9683
print map { $_."\n" } @words;
Packit ca9683
Packit ca9683
Packit ca9683
exit 0;