|
Packit |
e4b6da |
# vim:sw=4 sta showmatch
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
use strict;
|
|
Packit |
e4b6da |
use vars qw($VERSION);
|
|
Packit |
e4b6da |
$VERSION = '0.8.6';
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
package XML::Handler::SGMLSpl::Node;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub new {
|
|
Packit |
e4b6da |
my ($class, $type, $saxhash, $parent) = @_;
|
|
Packit |
e4b6da |
$saxhash->{type} = $type;
|
|
Packit |
e4b6da |
$saxhash->{parent} = $parent;
|
|
Packit |
e4b6da |
$saxhash->{ext} = {};
|
|
Packit |
e4b6da |
return bless($saxhash, $class);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# Element name
|
|
Packit |
e4b6da |
sub name {
|
|
Packit |
e4b6da |
my ($self) = @_;
|
|
Packit |
e4b6da |
if($self->{type} eq 'element') {
|
|
Packit |
e4b6da |
return $self->{LocalName};
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
return undef;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub attr {
|
|
Packit |
e4b6da |
my ($self, $name) = @_;
|
|
Packit |
e4b6da |
if($name !~ /^\{/) { $name = '{}' . $name; }
|
|
Packit |
e4b6da |
return $self->{Attributes}->{$name}->{Value};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub within {
|
|
Packit |
e4b6da |
my ($self,$name) = @_;
|
|
Packit |
e4b6da |
for ($self = $self->{parent}; $self; $self = $self->{parent}) {
|
|
Packit |
e4b6da |
return $self if ($self->name eq $name);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return undef;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub in {
|
|
Packit |
e4b6da |
my ($self,$name) = @_;
|
|
Packit |
e4b6da |
if ($self->{parent} and $self->{parent}->name eq $name) {
|
|
Packit |
e4b6da |
return $self->{parent};
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
return undef;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub ext { return shift->{ext} }
|
|
Packit |
e4b6da |
sub parent { return shift->{parent} }
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# one of document, element, text, processing-instruction, comment, whitespace
|
|
Packit |
e4b6da |
sub type { return shift->{type} }
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
package XML::Handler::SGMLSpl;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub new {
|
|
Packit |
e4b6da |
my ($class, $user_data) = @_;
|
|
Packit |
e4b6da |
my $self = {
|
|
Packit |
e4b6da |
rules => {},
|
|
Packit |
e4b6da |
mode => [ '' ],
|
|
Packit |
e4b6da |
namespaces => {}, # public
|
|
Packit |
e4b6da |
user_data => $user_data
|
|
Packit |
e4b6da |
};
|
|
Packit |
e4b6da |
return bless($self, $class);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# Rule parsing
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub add_rule {
|
|
Packit |
e4b6da |
my $sub = pop;
|
|
Packit |
e4b6da |
my ($self, $pattern, $mode) = @_;
|
|
Packit |
e4b6da |
$mode = '' if !defined($mode);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# Init hashes if not there already
|
|
Packit |
e4b6da |
if(!defined $self->{rules}->{$mode}) {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode} = {
|
|
Packit |
e4b6da |
t_elem_open => {},
|
|
Packit |
e4b6da |
t_elem_close => {},
|
|
Packit |
e4b6da |
t_sdata => {},
|
|
Packit |
e4b6da |
};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if($pattern eq 'text()') {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_text} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern eq 'processing-instruction()') {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_pi} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern eq 'comment()') {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_comment} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern eq 'sdata()') {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_sdata}->{''} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern eq '/<') {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_doc_start} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern eq '/>') {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_doc_end} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern =~ /^(\{([^}]+)\}(.+))<$/) {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_elem_open}->{$1} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern =~ /^(\{([^}]+)\}(.+))>$/) {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_elem_close}->{$1} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern =~ /^(([^:]+):)?([^:]+)<$/) {
|
|
Packit |
e4b6da |
my $x = '{' . $self->{namespaces}->{$2} . '}' . $3;
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_elem_open}->{$x} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern =~ /^(([^:]+):)?([^:]+)>$/) {
|
|
Packit |
e4b6da |
my $x = '{' . $self->{namespaces}->{$2} . '}' . $3;
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_elem_close}->{$x} = $sub;
|
|
Packit |
e4b6da |
} elsif($pattern =~ /^\|(.+)\|$/) {
|
|
Packit |
e4b6da |
$self->{rules}->{$mode}->{t_sdata}->{$1} = $sub;
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
die "Unknown pattern type!";
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# Modes
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub push_mode {
|
|
Packit |
e4b6da |
my ($self, $mode) = @_;
|
|
Packit |
e4b6da |
push(@{$self->{mode}}, $mode);
|
|
Packit |
e4b6da |
return $self->{mode}->[-2];
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub mode {
|
|
Packit |
e4b6da |
my ($self) = @_;
|
|
Packit |
e4b6da |
return $self->{mode}->[-1];
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub pop_mode {
|
|
Packit |
e4b6da |
my ($self) = @_;
|
|
Packit |
e4b6da |
return pop(@{$self->{mode}});
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# Locators
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub set_document_locator {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
$self->{locator} = $arg->{Locator};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub get_locator {
|
|
Packit |
e4b6da |
my ($self) = @_;
|
|
Packit |
e4b6da |
return $self->{locator};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# Helpful utility method: displays location,
|
|
Packit |
e4b6da |
# with optional node address
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub warn_location
|
|
Packit |
e4b6da |
{
|
|
Packit |
e4b6da |
my $self = shift;
|
|
Packit |
e4b6da |
my $msg = pop;
|
|
Packit |
e4b6da |
my $node = shift;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $location = $self->get_locator && $self->get_locator->location();
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my ($sysid, $linenum) = ('-', '');
|
|
Packit |
e4b6da |
if($location) {
|
|
Packit |
e4b6da |
$sysid = $location->{SystemId};
|
|
Packit |
e4b6da |
$linenum = $location->{LineNumber};
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $nodeinfo;
|
|
Packit |
e4b6da |
if(defined $node) {
|
|
Packit |
e4b6da |
if($node->type() eq 'element') {
|
|
Packit |
e4b6da |
$nodeinfo = $node->name;
|
|
Packit |
e4b6da |
} elsif($node->type eq 'text') {
|
|
Packit |
e4b6da |
$nodeinfo = $node->parent->name;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
if(defined $nodeinfo) {
|
|
Packit |
e4b6da |
warn "$0:${sysid}:${linenum}:${nodeinfo}: ${msg}\n";
|
|
Packit |
e4b6da |
} else {
|
|
Packit |
e4b6da |
warn "$0:${sysid}:${linenum}: ${msg}\n";
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# Standard handlers: not intended for user
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub start_document {
|
|
Packit |
e4b6da |
my ($self) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $doc = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'document', {}, undef);
|
|
Packit |
e4b6da |
$self->{current_node} = $doc;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
&{($self->{rules}->{$self->mode}->{t_doc_start} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $doc, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub end_document {
|
|
Packit |
e4b6da |
my ($self) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $doc = $self->{current_node};
|
|
Packit |
e4b6da |
$self->{current_node} = undef;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
return &{($self->{rules}->{$self->mode}->{t_doc_end} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $doc, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub start_element {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $elem = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'element', $arg, $self->{current_node});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
$self->{current_node} = $elem;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
$elem->{_last_mode} = $self->mode;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $key = '{' . $arg->{NamespaceURI} . '}' . $arg->{LocalName};
|
|
Packit |
e4b6da |
my $default_key = '{' . $arg->{NamespaceURI} . '}*';
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $elemrules = $self->{rules}->{$self->mode}->{t_elem_open};
|
|
Packit |
e4b6da |
&{($elemrules->{$key} || $elemrules->{$default_key} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $elem, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub end_element {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $elem = $self->{current_node};
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $key = '{' . $arg->{NamespaceURI} . '}' . $arg->{LocalName};
|
|
Packit |
e4b6da |
my $default_key = '{' . $arg->{NamespaceURI} . '}*';
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# We always enter the end element handler with the same mode
|
|
Packit |
e4b6da |
# as we had entered the start element handler.
|
|
Packit |
e4b6da |
# In most cases this is the more sane behavior, even though
|
|
Packit |
e4b6da |
# it's inconsistent.
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $elemrules = $self->{rules}->{$elem->{_last_mode}}->{t_elem_close};
|
|
Packit |
e4b6da |
&{($elemrules->{$key} || $elemrules->{$default_key} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $elem, $self);
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
$self->{current_node} = $elem->parent;
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub characters {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $textnode = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'text', $arg, $self->{current_node});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
&{($self->{rules}->{$self->mode}->{t_text} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $textnode, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub processing_instruction {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $pi = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'processing-instruction', $arg, $self->{current_node});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
&{($self->{rules}->{$self->mode}->{t_pi} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $pi, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub ignorable_whitespace {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $textnode = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'whitespace', $arg, $self->{current_node});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
&{($self->{rules}->{$self->mode}->{t_text} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $textnode, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub comment {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $comment = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'comment', $arg, $self->{current_node});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
&{($self->{rules}->{$self->mode}->{t_comment} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $comment, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
# SDATA entities (SGML)
|
|
Packit |
e4b6da |
#
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
sub internal_entity_ref {
|
|
Packit |
e4b6da |
my ($self, $arg) = @_;
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $sdata = XML::Handler::SGMLSpl::Node->new(
|
|
Packit |
e4b6da |
'sdata', $arg, $self->{current_node});
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
my $sdatarules = $self->{rules}->{$self->mode}->{t_sdata};
|
|
Packit |
e4b6da |
&{($sdatarules->{$arg->{LocalName}} || $sdatarules->{''} || sub{})}
|
|
Packit |
e4b6da |
($self->{user_data}, $sdata, $self);
|
|
Packit |
e4b6da |
}
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# FIXME: Write a man page.
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
# Autoload methods go after =cut, and are processed by the autosplit program.
|
|
Packit |
e4b6da |
|
|
Packit |
e4b6da |
1;
|