|
Packit |
0ea5da |
#!/usr/bin/perl
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
# describe_unicodes -- add human readable descriptions to unicodes
|
|
Packit |
0ea5da |
# Copyright © 2005 Anton Zinoviev <anton@lml.bas.bg>
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
# This program is free software; you can redistribute it and/or modify
|
|
Packit |
0ea5da |
# it under the terms of the GNU General Public License as published by
|
|
Packit |
0ea5da |
# the Free Software Foundation; either version 2 of the License, or
|
|
Packit |
0ea5da |
# (at your option) any later version.
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
# This program is distributed in the hope that it will be useful,
|
|
Packit |
0ea5da |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
0ea5da |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
0ea5da |
# GNU General Public License for more details.
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
# If you have not received a copy of the GNU General Public License
|
|
Packit |
0ea5da |
# along with this program, write to the Free Software Foundation, Inc.,
|
|
Packit |
0ea5da |
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
use strict;
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
my $utf = "/usr/share/i18n/charmaps/UTF-8.gz";
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
sub debug {
|
|
Packit |
0ea5da |
print STDERR "@_";
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
if ($ARGV[0] eq "--help" || $ARGV[0] eq "-h") {
|
|
Packit |
0ea5da |
print STDERR <
|
|
Packit |
0ea5da |
Usage: describe_unicodes FILE
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
Adds a comment lines in FILE for every substring of the form "U+NNNN",
|
|
Packit |
0ea5da |
where NNNN are hexadecimal digits.
|
|
Packit |
0ea5da |
EOT
|
|
Packit |
0ea5da |
exit 0
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
my $file = $ARGV[0];
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
my $line = 0;
|
|
Packit |
0ea5da |
my %descriptions;
|
|
Packit |
0ea5da |
open UTF, "zcat $utf |" or die "$0: zcat $utf: $!\n";
|
|
Packit |
0ea5da |
while (<UTF>) {
|
|
Packit |
0ea5da |
$line++;
|
|
Packit |
0ea5da |
last if (/^[[:space:]]*CHARMAP[[:space:]]*$/);
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
while (<UTF>) {
|
|
Packit |
0ea5da |
$line++;
|
|
Packit |
0ea5da |
last if (/^[[:space:]]*END[[:space:]]*CHARMAP[[:space:]]*$/);
|
|
Packit |
0ea5da |
/\<U([0-9a-fA-F]+)\>[[:space:]]+[^[:space:]]+[[:space:]]+(.*)/
|
|
Packit |
0ea5da |
or die "$0: $utf: syntax error on line $line: $_\n";
|
|
Packit |
0ea5da |
$descriptions{hex ($1)} = $2;
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
close UTF;
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
my @output;
|
|
Packit |
0ea5da |
open FILE, "$file" or die "$0: $file: $!\n";
|
|
Packit |
0ea5da |
while (<FILE>) {
|
|
Packit |
0ea5da |
push (@output, $_) unless (/^\# U\+[0-9a-fA-F]+: [^ ]/);
|
|
Packit |
0ea5da |
s/#.*//;
|
|
Packit |
0ea5da |
while (/^.*?U\+([0-9a-fA-F]+)(.*)/) {
|
|
Packit |
0ea5da |
push (@output,
|
|
Packit |
0ea5da |
sprintf ("# U+%s: %s\n", $1, $descriptions{hex ($1)}));
|
|
Packit |
0ea5da |
$_ = $2;
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
close FILE;
|
|
Packit |
0ea5da |
|
|
Packit |
0ea5da |
open FILE, ">$file" or die "$0: $file: $!\n";
|
|
Packit |
0ea5da |
foreach (@output) {
|
|
Packit |
0ea5da |
print FILE;
|
|
Packit |
0ea5da |
}
|
|
Packit |
0ea5da |
close FILE;
|
|
Packit |
0ea5da |
|