Blob Blame History Raw
#!/usr/bin/perl
# Author: Mashrab Kuvatov <kmashrab@uni-bremen.de>
use Encode;

my @words;
my @new_words;

binmode( STDOUT, ":utf8" );
if ( $#ARGV != 1 ){
	print "Usage: new_words.pl wordlist1 wordlist2\nOutputs words in wordlist2 which are not in wordlist1.\n";
	print Encode::decode_utf8("\nФойдаланиш: new_words.pl wordlist1 wordlist2\n");
	print Encode::decode_utf8("wordlist2'дан wordlist1'га кирмаган сўзларни кўрсатади.\n");
	exit;
}

# Read wordlist 1
open(INFILE, $ARGV[0] ) || die "Cannot open file: $ARGV[0]\n";
while(<INFILE>){
	push( @words, Encode::decode_utf8( $_) );
}
close(INFILE);

# Read wordlist 2
open(INFILE2, $ARGV[1] ) || die "Cannot open file: $ARGV[1]\n";
while(<INFILE2>){
	push( @new_words, Encode::decode_utf8( $_) );
}
close(INFILE2);

undef %is_word;
for (@words) { $is_word{$_} = 1 }

for($word=0; $word<=$#new_words; $word++){
	$new_word = $new_words[$word];
	$i = 0;
	if( !($is_word{$new_word} || $is_word{lc($new_word)}) ){
		print $new_word;
	}
}