Blame charmaps/xmlcharmap2utf8trans

Packit e4b6da
#!/usr/bin/env perl
Packit e4b6da
Packit e4b6da
=head1 NAME
Packit e4b6da
Packit e4b6da
xmlchar2utf8trans - convert XSLT 2.0 character maps to utf8trans character maps
Packit e4b6da
Packit e4b6da
=head1 SYNOPSIS
Packit e4b6da
Packit e4b6da
xmlchar2utf8trans [xslt-charmap-file]
Packit e4b6da
Packit e4b6da
=head1 DESCRIPTION
Packit e4b6da
Packit e4b6da
This script converts a XML character map in XSLT 2.0 format
Packit e4b6da
to the plain text format used by utf8trans(1).
Packit e4b6da
Packit e4b6da
Note:  The parsing of the XSLT 2.0 character map is really
Packit e4b6da
minimal, and does not conform to the full specification.
Packit e4b6da
Packit e4b6da
=cut
Packit e4b6da
Packit e4b6da
use XML::SAX::ParserFactory;
Packit e4b6da
use strict;
Packit e4b6da
Packit e4b6da
package CharmapProcessor;
Packit e4b6da
use base qw(XML::SAX::Base);
Packit e4b6da
Packit e4b6da
sub start_document {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
    $self->{imported_charmaps} = {};
Packit e4b6da
    $self->{local_charmaps} = {};
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub xsl_import_element {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
Packit e4b6da
    my $href = $elem->{Attributes}->{'{}href'};
Packit e4b6da
    if(not defined $href) {
Packit e4b6da
        $self->report_static_error("<xsl:import> does not have required 'href' attribute");
Packit e4b6da
        return;
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    my $import_charmap_proc = CharmapProcessor->new;
Packit e4b6da
    my $import_parser = XML::SAX::ParserFactory->parser(
Packit e4b6da
                        DocumentHandler => $import_charmap_proc);
Packit e4b6da
    $import_parser->parse_uri($href->{Value});
Packit e4b6da
    
Packit e4b6da
    copy_hash_entries($self->{imported_charmaps},
Packit e4b6da
                      $import_charmap_proc->{imported_charmaps});
Packit e4b6da
    copy_hash_entries($self->{imported_charmaps},
Packit e4b6da
                      $import_charmap_proc->{local_charmaps});
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub copy_hash_entries
Packit e4b6da
{
Packit e4b6da
    my ($to, $from) = @_;
Packit e4b6da
    my @conflicts = 0;
Packit e4b6da
    while( my ($k,$v) = each %{$from} ) {
Packit e4b6da
        push(@conflicts, $k) if exists($to->{$k});
Packit e4b6da
        $to->{$k} = $v;
Packit e4b6da
    }
Packit e4b6da
    return @conflicts;
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub xsl_include_element {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
Packit e4b6da
    my $href = $elem->{Attributes}->{'{}href'};
Packit e4b6da
    if(not defined $href) {
Packit e4b6da
        $self->report_static_error("<xsl:import> does not have required 'href' attribute");
Packit e4b6da
        return;
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    my $include_parser = XML::SAX::ParserFactory->parser(
Packit e4b6da
                        DocumentHandler => $self);
Packit e4b6da
    $include_parser->parse_uri($href->{Value});
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub xsl_output_character_element {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
Packit e4b6da
    if(not defined $self->{current_charmap}) {
Packit e4b6da
        print STDERR "no current charmap\n";
Packit e4b6da
        return;
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    my $char = ord($elem->{Attributes}->{'{}character'}->{Value});
Packit e4b6da
    my $string = $elem->{Attributes}->{'{}string'}->{Value};
Packit e4b6da
Packit e4b6da
    push(@{$self->{current_charmap}}, [ $char, $string ]);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub xsl_character_map_element {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
Packit e4b6da
    my $charmap_name = $elem->{Attributes}->{'{}name'}->{Value};
Packit e4b6da
Packit e4b6da
    $self->{current_charmap} = [];
Packit e4b6da
    $self->{local_charmaps}->{$charmap_name} = $self->{current_charmap};
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub xsl_output_element {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
Packit e4b6da
    if(exists $elem->{Attributes}->{'{}use-character-maps'}) {
Packit e4b6da
      # FIXME doesn't handle namespaced names yet !
Packit e4b6da
      $_ = $elem->{Attributes}->{'{}use-character-maps'}->{Value};
Packit e4b6da
      my @k = split;
Packit e4b6da
Packit e4b6da
      $self->{use_charmaps} = \@k;
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub start_element {
Packit e4b6da
    my ($self, $elem) = @_;
Packit e4b6da
    
Packit e4b6da
    if($elem->{NamespaceURI} eq 'http://www.w3.org/1999/XSL/Transform')
Packit e4b6da
    {
Packit e4b6da
        if($elem->{LocalName} eq 'output-character') {
Packit e4b6da
            &xsl_output_character_element;
Packit e4b6da
        } elsif($elem->{LocalName} eq 'character-map') {
Packit e4b6da
            &xsl_character_map_element;
Packit e4b6da
        } elsif($elem->{LocalName} eq 'import') {
Packit e4b6da
            &xsl_import_element;
Packit e4b6da
        } elsif($elem->{LocalName} eq 'include') {
Packit e4b6da
            &xsl_include_element;
Packit e4b6da
        } elsif($elem->{LocalName} eq 'output') {
Packit e4b6da
            &xsl_output_element;
Packit e4b6da
        } 
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub report_static_error {
Packit e4b6da
    my ($self, $error_code, $message) = @_;
Packit e4b6da
    print STDERR "$0: $error_code: $message\n";
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub set_document_locator {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
    $self->{locator} = $arg->{Locator};
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
# sub comment {
Packit e4b6da
#     my ($self, $node) = @_;
Packit e4b6da
#     my @lines = split(/\n/, $node->{Data});
Packit e4b6da
#     
Packit e4b6da
#     print "\n" if @lines > 1;
Packit e4b6da
#     
Packit e4b6da
#     foreach my $line (@lines) {
Packit e4b6da
#         $line =~ s/^[ \t]+//;
Packit e4b6da
#         print "# $line\n";
Packit e4b6da
#     }
Packit e4b6da
# }
Packit e4b6da
Packit e4b6da
package main;
Packit e4b6da
Packit e4b6da
my $charmap_proc = CharmapProcessor->new;
Packit e4b6da
Packit e4b6da
unshift(@ARGV, '-') unless @ARGV;
Packit e4b6da
my $parser = XML::SAX::ParserFactory->parser(
Packit e4b6da
                DocumentHandler => $charmap_proc);
Packit e4b6da
Packit e4b6da
binmode(STDOUT, ":utf8") unless $] < 5.008;
Packit e4b6da
Packit e4b6da
foreach my $file (@ARGV)
Packit e4b6da
{
Packit e4b6da
    if($file eq '-') {
Packit e4b6da
        $parser->parse_file(\*STDIN);
Packit e4b6da
    } else {
Packit e4b6da
        $parser->parse_uri($file);
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
if(!exists $charmap_proc->{use_charmaps}) {
Packit e4b6da
  print STDERR "no character maps are indicated to be used";
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
foreach my $k (@{$charmap_proc->{use_charmaps}}) {
Packit e4b6da
  my $charmap = $charmap_proc->{local_charmaps}->{$k};
Packit e4b6da
Packit e4b6da
  foreach my $entry (@$charmap) {
Packit e4b6da
    $entry->[1] =~ tr/\n\0/  /;
Packit e4b6da
    printf "%x\t%s\n", $entry->[0], $entry->[1];
Packit e4b6da
  }
Packit e4b6da
}