Blame perl/XML/Handler/SGMLSpl.pm

Packit e4b6da
# vim:sw=4 sta showmatch
Packit e4b6da
Packit e4b6da
use strict;
Packit e4b6da
use vars qw($VERSION);
Packit e4b6da
$VERSION = '0.8.6';
Packit e4b6da
Packit e4b6da
Packit e4b6da
Packit e4b6da
Packit e4b6da
package XML::Handler::SGMLSpl::Node;
Packit e4b6da
Packit e4b6da
sub new {
Packit e4b6da
    my ($class, $type, $saxhash, $parent) = @_;
Packit e4b6da
    $saxhash->{type} = $type;
Packit e4b6da
    $saxhash->{parent} = $parent;
Packit e4b6da
    $saxhash->{ext} = {};
Packit e4b6da
    return bless($saxhash, $class);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
# Element name
Packit e4b6da
sub name {
Packit e4b6da
    my ($self) = @_;
Packit e4b6da
    if($self->{type} eq 'element') {
Packit e4b6da
	return $self->{LocalName};
Packit e4b6da
    } else {
Packit e4b6da
	return undef;
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub attr {
Packit e4b6da
    my ($self, $name) = @_;
Packit e4b6da
    if($name !~ /^\{/) { $name = '{}' . $name; }
Packit e4b6da
    return $self->{Attributes}->{$name}->{Value};
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub within {
Packit e4b6da
    my ($self,$name) = @_;
Packit e4b6da
    for ($self = $self->{parent}; $self; $self = $self->{parent}) {
Packit e4b6da
	return $self if ($self->name eq $name);
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    return undef;
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub in {
Packit e4b6da
    my ($self,$name) = @_;
Packit e4b6da
    if ($self->{parent} and $self->{parent}->name eq $name) {
Packit e4b6da
	return $self->{parent};
Packit e4b6da
    } else {
Packit e4b6da
	return undef;
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub ext { return shift->{ext} }
Packit e4b6da
sub parent { return shift->{parent} }
Packit e4b6da
Packit e4b6da
# one of document, element, text, processing-instruction, comment, whitespace
Packit e4b6da
sub type { return shift->{type} }
Packit e4b6da
Packit e4b6da
Packit e4b6da
Packit e4b6da
Packit e4b6da
package XML::Handler::SGMLSpl;
Packit e4b6da
Packit e4b6da
sub new {
Packit e4b6da
    my ($class, $user_data) = @_;
Packit e4b6da
    my $self = {
Packit e4b6da
	rules => {},
Packit e4b6da
	mode => [ '' ],
Packit e4b6da
        namespaces => {},       # public
Packit e4b6da
	user_data => $user_data
Packit e4b6da
    };
Packit e4b6da
    return bless($self, $class);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
#
Packit e4b6da
# Rule parsing
Packit e4b6da
#
Packit e4b6da
Packit e4b6da
sub add_rule {
Packit e4b6da
    my $sub = pop;
Packit e4b6da
    my ($self, $pattern, $mode) = @_;
Packit e4b6da
    $mode = '' if !defined($mode);
Packit e4b6da
Packit e4b6da
    # Init hashes if not there already
Packit e4b6da
    if(!defined $self->{rules}->{$mode}) {
Packit e4b6da
	$self->{rules}->{$mode} = {
Packit e4b6da
	    t_elem_open	 => {},
Packit e4b6da
	    t_elem_close => {},
Packit e4b6da
	    t_sdata => {},
Packit e4b6da
	};
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    if($pattern eq 'text()') {
Packit e4b6da
        $self->{rules}->{$mode}->{t_text} = $sub;
Packit e4b6da
    } elsif($pattern eq 'processing-instruction()') {
Packit e4b6da
        $self->{rules}->{$mode}->{t_pi} = $sub;
Packit e4b6da
    } elsif($pattern eq 'comment()') {
Packit e4b6da
        $self->{rules}->{$mode}->{t_comment} = $sub;
Packit e4b6da
    } elsif($pattern eq 'sdata()') {
Packit e4b6da
        $self->{rules}->{$mode}->{t_sdata}->{''} = $sub;
Packit e4b6da
    } elsif($pattern eq '/<') {
Packit e4b6da
        $self->{rules}->{$mode}->{t_doc_start} = $sub;
Packit e4b6da
    } elsif($pattern eq '/>') {
Packit e4b6da
        $self->{rules}->{$mode}->{t_doc_end} = $sub;
Packit e4b6da
    } elsif($pattern =~ /^(\{([^}]+)\}(.+))<$/) {
Packit e4b6da
        $self->{rules}->{$mode}->{t_elem_open}->{$1} = $sub;
Packit e4b6da
    } elsif($pattern =~ /^(\{([^}]+)\}(.+))>$/) {
Packit e4b6da
        $self->{rules}->{$mode}->{t_elem_close}->{$1} = $sub;
Packit e4b6da
    } elsif($pattern =~ /^(([^:]+):)?([^:]+)<$/) {
Packit e4b6da
        my $x = '{' . $self->{namespaces}->{$2} . '}' . $3;
Packit e4b6da
        $self->{rules}->{$mode}->{t_elem_open}->{$x} = $sub;
Packit e4b6da
    } elsif($pattern =~ /^(([^:]+):)?([^:]+)>$/) {
Packit e4b6da
        my $x = '{' . $self->{namespaces}->{$2} . '}' . $3;
Packit e4b6da
        $self->{rules}->{$mode}->{t_elem_close}->{$x} = $sub;
Packit e4b6da
    } elsif($pattern =~ /^\|(.+)\|$/) {
Packit e4b6da
        $self->{rules}->{$mode}->{t_sdata}->{$1} = $sub;
Packit e4b6da
    } else {
Packit e4b6da
        die "Unknown pattern type!";
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
#
Packit e4b6da
# Modes
Packit e4b6da
#
Packit e4b6da
Packit e4b6da
sub push_mode {
Packit e4b6da
    my ($self, $mode) = @_;
Packit e4b6da
    push(@{$self->{mode}}, $mode);
Packit e4b6da
    return $self->{mode}->[-2];
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub mode {
Packit e4b6da
    my ($self) = @_;
Packit e4b6da
    return $self->{mode}->[-1];
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub pop_mode {
Packit e4b6da
    my ($self) = @_;
Packit e4b6da
    return pop(@{$self->{mode}});
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
#
Packit e4b6da
# Locators
Packit e4b6da
#
Packit e4b6da
Packit e4b6da
sub set_document_locator {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
    $self->{locator} = $arg->{Locator};
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub get_locator {
Packit e4b6da
    my ($self) = @_;
Packit e4b6da
    return $self->{locator};
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
#
Packit e4b6da
# Helpful utility method: displays location,
Packit e4b6da
# with optional node address
Packit e4b6da
#
Packit e4b6da
Packit e4b6da
sub warn_location
Packit e4b6da
{
Packit e4b6da
    my $self = shift;
Packit e4b6da
    my $msg = pop;
Packit e4b6da
    my $node = shift;
Packit e4b6da
Packit e4b6da
    my $location = $self->get_locator && $self->get_locator->location();
Packit e4b6da
Packit e4b6da
    my ($sysid, $linenum) = ('-', '');
Packit e4b6da
    if($location) {
Packit e4b6da
	$sysid = $location->{SystemId};
Packit e4b6da
	$linenum = $location->{LineNumber};
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    my $nodeinfo;
Packit e4b6da
    if(defined $node) {
Packit e4b6da
	if($node->type() eq 'element') {
Packit e4b6da
	    $nodeinfo = $node->name;
Packit e4b6da
	} elsif($node->type eq 'text') {
Packit e4b6da
	    $nodeinfo = $node->parent->name;
Packit e4b6da
	}
Packit e4b6da
    }
Packit e4b6da
Packit e4b6da
    if(defined $nodeinfo) {
Packit e4b6da
	warn "$0:${sysid}:${linenum}:${nodeinfo}: ${msg}\n";
Packit e4b6da
    } else {
Packit e4b6da
	warn "$0:${sysid}:${linenum}: ${msg}\n";
Packit e4b6da
    }
Packit e4b6da
}
Packit e4b6da
    
Packit e4b6da
Packit e4b6da
Packit e4b6da
 
Packit e4b6da
Packit e4b6da
#
Packit e4b6da
# Standard handlers: not intended for user
Packit e4b6da
#
Packit e4b6da
Packit e4b6da
sub start_document {
Packit e4b6da
    my ($self) = @_;
Packit e4b6da
Packit e4b6da
    my $doc = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		'document', {}, undef);
Packit e4b6da
    $self->{current_node} = $doc;
Packit e4b6da
    
Packit e4b6da
    &{($self->{rules}->{$self->mode}->{t_doc_start} || sub{})}
Packit e4b6da
	($self->{user_data}, $doc, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub end_document {
Packit e4b6da
    my ($self) = @_;
Packit e4b6da
Packit e4b6da
    my $doc = $self->{current_node};
Packit e4b6da
    $self->{current_node} = undef;
Packit e4b6da
    
Packit e4b6da
    return &{($self->{rules}->{$self->mode}->{t_doc_end} || sub{})}
Packit e4b6da
	($self->{user_data}, $doc, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub start_element {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $elem = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		    'element', $arg, $self->{current_node});
Packit e4b6da
		    
Packit e4b6da
    $self->{current_node} = $elem;
Packit e4b6da
Packit e4b6da
    $elem->{_last_mode} = $self->mode;
Packit e4b6da
Packit e4b6da
    my $key = '{' . $arg->{NamespaceURI} . '}' . $arg->{LocalName};
Packit e4b6da
    my $default_key = '{' . $arg->{NamespaceURI} . '}*';
Packit e4b6da
    
Packit e4b6da
    my $elemrules = $self->{rules}->{$self->mode}->{t_elem_open};
Packit e4b6da
    &{($elemrules->{$key} || $elemrules->{$default_key} || sub{})}
Packit e4b6da
       ($self->{user_data}, $elem, $self);
Packit e4b6da
}
Packit e4b6da
    
Packit e4b6da
sub end_element {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $elem = $self->{current_node};
Packit e4b6da
Packit e4b6da
    my $key = '{' . $arg->{NamespaceURI} . '}' . $arg->{LocalName};
Packit e4b6da
    my $default_key = '{' . $arg->{NamespaceURI} . '}*';
Packit e4b6da
Packit e4b6da
    # We always enter the end element handler with the same mode
Packit e4b6da
    # as we had entered the start element handler.
Packit e4b6da
    # In most cases this is the more sane behavior, even though
Packit e4b6da
    # it's inconsistent.
Packit e4b6da
Packit e4b6da
    my $elemrules = $self->{rules}->{$elem->{_last_mode}}->{t_elem_close};
Packit e4b6da
    &{($elemrules->{$key} || $elemrules->{$default_key} || sub{})}
Packit e4b6da
       ($self->{user_data}, $elem, $self);
Packit e4b6da
       
Packit e4b6da
    $self->{current_node} = $elem->parent;
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub characters {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $textnode = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		    'text', $arg, $self->{current_node});
Packit e4b6da
    
Packit e4b6da
    &{($self->{rules}->{$self->mode}->{t_text} || sub{})}
Packit e4b6da
	($self->{user_data}, $textnode, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub processing_instruction {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $pi = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		    'processing-instruction', $arg, $self->{current_node});
Packit e4b6da
    
Packit e4b6da
    &{($self->{rules}->{$self->mode}->{t_pi} || sub{})}
Packit e4b6da
	($self->{user_data}, $pi, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub ignorable_whitespace {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $textnode = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		    'whitespace', $arg, $self->{current_node});
Packit e4b6da
    
Packit e4b6da
    &{($self->{rules}->{$self->mode}->{t_text} || sub{})}
Packit e4b6da
	($self->{user_data}, $textnode, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
sub comment {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $comment = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		    'comment', $arg, $self->{current_node});
Packit e4b6da
    
Packit e4b6da
    &{($self->{rules}->{$self->mode}->{t_comment} || sub{})}
Packit e4b6da
	($self->{user_data}, $comment, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
#
Packit e4b6da
# SDATA entities (SGML)
Packit e4b6da
#
Packit e4b6da
Packit e4b6da
sub internal_entity_ref {
Packit e4b6da
    my ($self, $arg) = @_;
Packit e4b6da
Packit e4b6da
    my $sdata = XML::Handler::SGMLSpl::Node->new(
Packit e4b6da
		    'sdata', $arg, $self->{current_node});
Packit e4b6da
Packit e4b6da
    my $sdatarules = $self->{rules}->{$self->mode}->{t_sdata};
Packit e4b6da
    &{($sdatarules->{$arg->{LocalName}} || $sdatarules->{''} || sub{})}
Packit e4b6da
       ($self->{user_data}, $sdata, $self);
Packit e4b6da
}
Packit e4b6da
Packit e4b6da
# FIXME: Write a man page.
Packit e4b6da
Packit e4b6da
# Autoload methods go after =cut, and are processed by the autosplit program.
Packit e4b6da
Packit e4b6da
1;