#!/usr/bin/env perl
=head1 NAME
xmlchar2utf8trans - convert XSLT 2.0 character maps to utf8trans character maps
=head1 SYNOPSIS
xmlchar2utf8trans [xslt-charmap-file]
=head1 DESCRIPTION
This script converts a XML character map in XSLT 2.0 format
to the plain text format used by utf8trans(1).
Note: The parsing of the XSLT 2.0 character map is really
minimal, and does not conform to the full specification.
=cut
use XML::SAX::ParserFactory;
use strict;
package CharmapProcessor;
use base qw(XML::SAX::Base);
sub start_document {
my ($self, $elem) = @_;
$self->{imported_charmaps} = {};
$self->{local_charmaps} = {};
}
sub xsl_import_element {
my ($self, $elem) = @_;
my $href = $elem->{Attributes}->{'{}href'};
if(not defined $href) {
$self->report_static_error("<xsl:import> does not have required 'href' attribute");
return;
}
my $import_charmap_proc = CharmapProcessor->new;
my $import_parser = XML::SAX::ParserFactory->parser(
DocumentHandler => $import_charmap_proc);
$import_parser->parse_uri($href->{Value});
copy_hash_entries($self->{imported_charmaps},
$import_charmap_proc->{imported_charmaps});
copy_hash_entries($self->{imported_charmaps},
$import_charmap_proc->{local_charmaps});
}
sub copy_hash_entries
{
my ($to, $from) = @_;
my @conflicts = 0;
while( my ($k,$v) = each %{$from} ) {
push(@conflicts, $k) if exists($to->{$k});
$to->{$k} = $v;
}
return @conflicts;
}
sub xsl_include_element {
my ($self, $elem) = @_;
my $href = $elem->{Attributes}->{'{}href'};
if(not defined $href) {
$self->report_static_error("<xsl:import> does not have required 'href' attribute");
return;
}
my $include_parser = XML::SAX::ParserFactory->parser(
DocumentHandler => $self);
$include_parser->parse_uri($href->{Value});
}
sub xsl_output_character_element {
my ($self, $elem) = @_;
if(not defined $self->{current_charmap}) {
print STDERR "no current charmap\n";
return;
}
my $char = ord($elem->{Attributes}->{'{}character'}->{Value});
my $string = $elem->{Attributes}->{'{}string'}->{Value};
push(@{$self->{current_charmap}}, [ $char, $string ]);
}
sub xsl_character_map_element {
my ($self, $elem) = @_;
my $charmap_name = $elem->{Attributes}->{'{}name'}->{Value};
$self->{current_charmap} = [];
$self->{local_charmaps}->{$charmap_name} = $self->{current_charmap};
}
sub xsl_output_element {
my ($self, $elem) = @_;
if(exists $elem->{Attributes}->{'{}use-character-maps'}) {
# FIXME doesn't handle namespaced names yet !
$_ = $elem->{Attributes}->{'{}use-character-maps'}->{Value};
my @k = split;
$self->{use_charmaps} = \@k;
}
}
sub start_element {
my ($self, $elem) = @_;
if($elem->{NamespaceURI} eq 'http://www.w3.org/1999/XSL/Transform')
{
if($elem->{LocalName} eq 'output-character') {
&xsl_output_character_element;
} elsif($elem->{LocalName} eq 'character-map') {
&xsl_character_map_element;
} elsif($elem->{LocalName} eq 'import') {
&xsl_import_element;
} elsif($elem->{LocalName} eq 'include') {
&xsl_include_element;
} elsif($elem->{LocalName} eq 'output') {
&xsl_output_element;
}
}
}
sub report_static_error {
my ($self, $error_code, $message) = @_;
print STDERR "$0: $error_code: $message\n";
}
sub set_document_locator {
my ($self, $arg) = @_;
$self->{locator} = $arg->{Locator};
}
# sub comment {
# my ($self, $node) = @_;
# my @lines = split(/\n/, $node->{Data});
#
# print "\n" if @lines > 1;
#
# foreach my $line (@lines) {
# $line =~ s/^[ \t]+//;
# print "# $line\n";
# }
# }
package main;
my $charmap_proc = CharmapProcessor->new;
unshift(@ARGV, '-') unless @ARGV;
my $parser = XML::SAX::ParserFactory->parser(
DocumentHandler => $charmap_proc);
binmode(STDOUT, ":utf8") unless $] < 5.008;
foreach my $file (@ARGV)
{
if($file eq '-') {
$parser->parse_file(\*STDIN);
} else {
$parser->parse_uri($file);
}
}
if(!exists $charmap_proc->{use_charmaps}) {
print STDERR "no character maps are indicated to be used";
}
foreach my $k (@{$charmap_proc->{use_charmaps}}) {
my $charmap = $charmap_proc->{local_charmaps}->{$k};
foreach my $entry (@$charmap) {
$entry->[1] =~ tr/\n\0/ /;
printf "%x\t%s\n", $entry->[0], $entry->[1];
}
}