|
Packit |
e4b6da |
#!/usr/bin/env perl
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
=head1 NAME
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
xmlchar2utf8trans - convert XSLT 2.0 character maps to utf8trans character maps
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
=head1 SYNOPSIS
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
xmlchar2utf8trans [xslt-charmap-file]
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
=head1 DESCRIPTION
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
This script converts a XML character map in XSLT 2.0 format
|
|
Packit |
e4b6da |
to the plain text format used by utf8trans(1).
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
Note: The parsing of the XSLT 2.0 character map is really
|
|
Packit |
e4b6da |
minimal, and does not conform to the full specification.
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
=cut
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
use XML::SAX::ParserFactory;
|
|
Packit |
e4b6da |
use strict;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
package CharmapProcessor;
|
|
Packit |
e4b6da |
use base qw(XML::SAX::Base);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub start_document {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
$self->{imported_charmaps} = {};
|
|
Packit |
e4b6da |
$self->{local_charmaps} = {};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub xsl_import_element {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $href = $elem->{Attributes}->{'{}href'};
|
|
Packit |
e4b6da |
if(not defined $href) {
|
|
Packit |
e4b6da |
$self->report_static_error("<xsl:import> does not have required 'href' attribute");
|
|
Packit |
e4b6da |
return;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $import_charmap_proc = CharmapProcessor->new;
|
|
Packit |
e4b6da |
my $import_parser = XML::SAX::ParserFactory->parser(
|
|
Packit |
e4b6da |
DocumentHandler => $import_charmap_proc);
|
|
Packit |
e4b6da |
$import_parser->parse_uri($href->{Value});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
copy_hash_entries($self->{imported_charmaps},
|
|
Packit |
e4b6da |
$import_charmap_proc->{imported_charmaps});
|
|
Packit |
e4b6da |
copy_hash_entries($self->{imported_charmaps},
|
|
Packit |
e4b6da |
$import_charmap_proc->{local_charmaps});
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub copy_hash_entries
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
my ($to, $from) = @_;
|
|
Packit |
e4b6da |
my @conflicts = 0;
|
|
Packit |
e4b6da |
while( my ($k,$v) = each %{$from} ) {
|
|
Packit |
e4b6da |
push(@conflicts, $k) if exists($to->{$k});
|
|
Packit |
e4b6da |
$to->{$k} = $v;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
return @conflicts;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub xsl_include_element {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $href = $elem->{Attributes}->{'{}href'};
|
|
Packit |
e4b6da |
if(not defined $href) {
|
|
Packit |
e4b6da |
$self->report_static_error("<xsl:import> does not have required 'href' attribute");
|
|
Packit |
e4b6da |
return;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $include_parser = XML::SAX::ParserFactory->parser(
|
|
Packit |
e4b6da |
DocumentHandler => $self);
|
|
Packit |
e4b6da |
$include_parser->parse_uri($href->{Value});
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub xsl_output_character_element {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(not defined $self->{current_charmap}) {
|
|
Packit |
e4b6da |
print STDERR "no current charmap\n";
|
|
Packit |
e4b6da |
return;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $char = ord($elem->{Attributes}->{'{}character'}->{Value});
|
|
Packit |
e4b6da |
my $string = $elem->{Attributes}->{'{}string'}->{Value};
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
push(@{$self->{current_charmap}}, [ $char, $string ]);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub xsl_character_map_element {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $charmap_name = $elem->{Attributes}->{'{}name'}->{Value};
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
$self->{current_charmap} = [];
|
|
Packit |
e4b6da |
$self->{local_charmaps}->{$charmap_name} = $self->{current_charmap};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub xsl_output_element {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(exists $elem->{Attributes}->{'{}use-character-maps'}) {
|
|
Packit |
e4b6da |
# FIXME doesn't handle namespaced names yet !
|
|
Packit |
e4b6da |
$_ = $elem->{Attributes}->{'{}use-character-maps'}->{Value};
|
|
Packit |
e4b6da |
my @k = split;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
$self->{use_charmaps} = \@k;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub start_element {
|
|
Packit |
e4b6da |
my ($self, $elem) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if($elem->{NamespaceURI} eq 'http://www.w3.org/1999/XSL/Transform')
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
if($elem->{LocalName} eq 'output-character') {
|
|
Packit |
e4b6da |
&xsl_output_character_element;
|
|
Packit |
e4b6da |
} elsif($elem->{LocalName} eq 'character-map') {
|
|
Packit |
e4b6da |
&xsl_character_map_element;
|
|
Packit |
e4b6da |
} elsif($elem->{LocalName} eq 'import') {
|
|
Packit |
e4b6da |
&xsl_import_element;
|
|
Packit |
e4b6da |
} elsif($elem->{LocalName} eq 'include') {
|
|
Packit |
e4b6da |
&xsl_include_element;
|
|
Packit |
e4b6da |
} elsif($elem->{LocalName} eq 'output') {
|
|
Packit |
e4b6da |
&xsl_output_element;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub report_static_error {
|
|
Packit |
e4b6da |
my ($self, $error_code, $message) = @_;
|
|
Packit |
e4b6da |
print STDERR "$0: $error_code: $message\n";
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub set_document_locator {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
$self->{locator} = $arg->{Locator};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# sub comment {
|
|
Packit |
e4b6da |
# my ($self, $node) = @_;
|
|
Packit |
e4b6da |
# my @lines = split(/\n/, $node->{Data});
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# print "\n" if @lines > 1;
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# foreach my $line (@lines) {
|
|
Packit |
e4b6da |
# $line =~ s/^[ \t]+//;
|
|
Packit |
e4b6da |
# print "# $line\n";
|
|
Packit |
e4b6da |
# }
|
|
Packit |
e4b6da |
# }
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
package main;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $charmap_proc = CharmapProcessor->new;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
unshift(@ARGV, '-') unless @ARGV;
|
|
Packit |
e4b6da |
my $parser = XML::SAX::ParserFactory->parser(
|
|
Packit |
e4b6da |
DocumentHandler => $charmap_proc);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
binmode(STDOUT, ":utf8") unless $] < 5.008;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
foreach my $file (@ARGV)
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
if($file eq '-') {
|
|
Packit |
e4b6da |
$parser->parse_file(\*STDIN);
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
$parser->parse_uri($file);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(!exists $charmap_proc->{use_charmaps}) {
|
|
Packit |
e4b6da |
print STDERR "no character maps are indicated to be used";
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
foreach my $k (@{$charmap_proc->{use_charmaps}}) {
|
|
Packit |
e4b6da |
my $charmap = $charmap_proc->{local_charmaps}->{$k};
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
foreach my $entry (@$charmap) {
|
|
Packit |
e4b6da |
$entry->[1] =~ tr/\n\0/ /;
|
|
Packit |
e4b6da |
printf "%x\t%s\n", $entry->[0], $entry->[1];
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|