Blame describe_unicodes

Packit 0ea5da
#!/usr/bin/perl
Packit 0ea5da
Packit 0ea5da
#     describe_unicodes -- add human readable descriptions to unicodes
Packit 0ea5da
#     Copyright © 2005 Anton Zinoviev <anton@lml.bas.bg>
Packit 0ea5da
Packit 0ea5da
#     This program is free software; you can redistribute it and/or modify
Packit 0ea5da
#     it under the terms of the GNU General Public License as published by
Packit 0ea5da
#     the Free Software Foundation; either version 2 of the License, or
Packit 0ea5da
#     (at your option) any later version.
Packit 0ea5da
Packit 0ea5da
#     This program is distributed in the hope that it will be useful,
Packit 0ea5da
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 0ea5da
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 0ea5da
#     GNU General Public License for more details.
Packit 0ea5da
Packit 0ea5da
#     If you have not received a copy of the GNU General Public License
Packit 0ea5da
#     along with this program, write to the Free Software Foundation, Inc.,
Packit 0ea5da
#     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit 0ea5da
Packit 0ea5da
use strict;
Packit 0ea5da
Packit 0ea5da
my $utf = "/usr/share/i18n/charmaps/UTF-8.gz";
Packit 0ea5da
Packit 0ea5da
sub debug {
Packit 0ea5da
    print STDERR "@_";
Packit 0ea5da
}
Packit 0ea5da
Packit 0ea5da
if ($ARGV[0] eq "--help" || $ARGV[0] eq "-h") {
Packit 0ea5da
    print STDERR <
Packit 0ea5da
Usage:	describe_unicodes FILE
Packit 0ea5da
Packit 0ea5da
Adds a comment lines in FILE for every substring of the form "U+NNNN",
Packit 0ea5da
where NNNN are hexadecimal digits.
Packit 0ea5da
EOT
Packit 0ea5da
    exit 0
Packit 0ea5da
}
Packit 0ea5da
Packit 0ea5da
my $file = $ARGV[0];
Packit 0ea5da
Packit 0ea5da
my $line = 0;
Packit 0ea5da
my %descriptions;
Packit 0ea5da
open UTF, "zcat $utf |" or die "$0: zcat $utf: $!\n";
Packit 0ea5da
while (<UTF>) {
Packit 0ea5da
    $line++;
Packit 0ea5da
    last if (/^[[:space:]]*CHARMAP[[:space:]]*$/);
Packit 0ea5da
}
Packit 0ea5da
while (<UTF>) {
Packit 0ea5da
    $line++;
Packit 0ea5da
    last if (/^[[:space:]]*END[[:space:]]*CHARMAP[[:space:]]*$/);
Packit 0ea5da
    /\<U([0-9a-fA-F]+)\>[[:space:]]+[^[:space:]]+[[:space:]]+(.*)/
Packit 0ea5da
	or die "$0: $utf: syntax error on line $line: $_\n";
Packit 0ea5da
    $descriptions{hex ($1)} = $2;
Packit 0ea5da
}
Packit 0ea5da
close UTF;
Packit 0ea5da
Packit 0ea5da
my @output;
Packit 0ea5da
open FILE, "$file" or die "$0: $file: $!\n";
Packit 0ea5da
while (<FILE>) {
Packit 0ea5da
    push (@output, $_) unless (/^\# U\+[0-9a-fA-F]+:   [^ ]/);
Packit 0ea5da
    s/#.*//;
Packit 0ea5da
    while (/^.*?U\+([0-9a-fA-F]+)(.*)/) {
Packit 0ea5da
	push (@output, 
Packit 0ea5da
	      sprintf ("# U+%s:   %s\n", $1, $descriptions{hex ($1)}));
Packit 0ea5da
	$_ = $2;
Packit 0ea5da
    }
Packit 0ea5da
}
Packit 0ea5da
close FILE;
Packit 0ea5da
    
Packit 0ea5da
open FILE, ">$file" or die "$0: $file: $!\n";
Packit 0ea5da
foreach (@output) {
Packit 0ea5da
    print FILE;
Packit 0ea5da
}
Packit 0ea5da
close FILE;
Packit 0ea5da