Blob Blame History Raw
use Unicode::UCD qw(charinfo);

my $output = 'eucJP-ascii.ucm';
rename $output, "$output.old" if -e $output;
open EUCJP, '>', $output or die $!;
my $prologue = <<'EOF';
#
# eucJP-ascii.ucm
#
<code_set_name>  "eucJP-ascii"
<code_set_alias> "eucjp-ascii"
<code_set_alias> "x-eucjp-open-19970715-ascii"
<mb_cur_min> 1
<mb_cur_max> 3
<subchar> \xA2\xAE
<uconv_class> "MBCS"
#
CHARMAP
EOF
print EUCJP $prologue;

my $dir = $ARGV[0] || 'www.opengroup.or.jp/jvc/cde';
my %ucs = ();
my %rev = ();
my %ext = ();

# C0/C1 controls (except SS2/SS3) and DEL.
foreach my $c ((0x00..0x1F, 0x7F, 0x80..0x8D, 0x90..0x9F)) {
    $ucs{sprintf "%04X", $c} = [sprintf "\\x%02X", $c];
    $rev{sprintf "\\x%02X", $c} = [sprintf "%04X", $c];
}
# eucJP-ascii mappings.
foreach my $map (qw(0201A 0208A 13th 0212A udc ibmext)) {
  open MAP, "$dir/eucJP-$map.txt" or die $!;
  while (<MAP>) {
    chomp $_;
    my ($euc, $ucs) = split /\s+/;
    $euc =~ s/^0x// || die "$_";
    $ucs =~ s/^0x// || die "$_";
    my @euc = grep { $_ } split /([0-9A-F]{2})/, $euc;
    $euc = "\\x".join("\\x", @euc);
    $ucs{$ucs} ||= [];
    push @{$ucs{$ucs}}, $euc;
    $rev{$euc} ||= [];
    push @{$rev{$euc}}, $ucs;
  }
  close MAP;
}
# eucJP-ms reverse fallbacks.
foreach my $ext (qw(0208M 0212M)) {
  open EXT, "$dir/eucJP-$ext.txt" or die $!;
  while (<EXT>) {
    chomp $_;
    my ($euc, $ucs) = split /\s+/;
    $euc =~ s/^0x// || die "$_";
    $ucs =~ s/^0x// || die "$_";
    my @euc = grep { $_ } split /([0-9A-F]{2})/, $euc;
    $euc = "\\x".join("\\x", @euc);
    next if defined $ucs{$ucs};
    $ext{$ucs} = $euc;
    $ucs{$ucs} = undef;
  }
  close EXT;
}

# Output.
foreach my $u (sort keys %ucs) {
    $name = charinfo(hex("0x$u"))->{name} ||
	    ('E000' le $u and $u le 'F8FF' and '<Private Use>') || '';
    unless (defined $ucs{$u}) {
	print EUCJP "<U$u> $ext{$u} |1 # $name\n";
	next;
    }
    my @u = @{$ucs{$u}};
    if ($#u == 0) {
	print EUCJP "<U$u> $u[0] |0 # $name\n";
    } else {
	print EUCJP "<U$u> ".shift(@u)." |0 # $name\n";
	foreach my $c (@u) {
	    print EUCJP "<U$u> $c |3 # $name\n";
	}
    }
}
# Verify duplicated mapping.
my $dup = 0;
foreach my $e (sort keys %rev) {
    my @e = @{$rev{$e}};
    if ($#e != 0) {
	print STDERR "$e <U".join(">,<U", @e).">\n";
	$dup++;
    }
}
warn "$dup duplicated mapping" if $dup;

my $epilogue = <<'EOF';
END CHARMAP
EOF
print EUCJP $epilogue;
close EUCJP;