Blob Blame History Raw
#
#  CharEnts.pm
#
#  $Id: CharEnts.pm,v 1.1.1.1 2001/05/24 15:57:41 sano Exp $
#
#  SGML Character Entity utilities -- interface to Perl module
#  Text::EntityMap.
#
package LinuxDocTools::CharEnts;
use strict;

=head1 NAME

LinuxDocTools::CharEnts - Interface to Text::EntityMap

=head1 SYNOPSIS

  my $char_maps = load_char_maps ('.2ext', [ Text::EntityMap::sdata_dirs() ]);

  $value = parse_data ($value, $char_maps, $escape_sub);

=head1 DESCRIPTION

This module provides a simple interface to the entity map handling provided by
B<Text::EntityMap>.

=head1 FUNCTIONS

=over 4

=cut

use Text::EntityMap;
use Exporter;

use vars qw(@ISA @EXPORT $VERSION);
@ISA = qw(Exporter);
@EXPORT = qw(load_char_maps parse_data);
$VERSION = sprintf("%d.%02d", q$Revision: 1.1.1.1 $ =~ /(\d+)\.(\d+)/);

# `%warn_map' tracks entities that were not able to be mapped so they
# are only warned once.
my %warn_map = ();

=item parse_data ($data, $char_map, $escape_sub)

B<parse_data> takes a string of I<$data> in the output format of
B<nsgmls> (see SP's C<sgmlsout.htm> document) without the leading dash.
B<parse_data> calls I<$char_map>'s lookup method for each sdata
entity reference. If the entity reference is undefined, it is
left alone (without the (n)sgmls C<\|>). For all remaining data,
B<parse_data> calls back into I<$escape_sub> to properly escape
characters for the backend formatter. Strings returned from the
lookup method are assumed to be already escaped.

This routine is derived from David Megginson's SGMLSpm.

=cut

sub parse_data {
    my ($data, $char_map, $escape_sub) = @_;
    my ($result) = "";

    my $sdata_flag = 0;
    my $out = '';

    while ($data =~ /\\(\\|n|\||[0-7]{1,3})/) {
	$out .= $`;
	$data = $';

	if ($1 eq '|') {
	    # beginning or end of SDATA
	    if ("$out" ne '') {
		if ($sdata_flag) {
		    my ($mapping) = $char_map->lookup ($out);
		    if (defined $mapping) {
			# escape `\' in mapping for ASP
			$mapping =~ s/\\/\\\\/g;
			$result .= $mapping;
		    } else {
			if (!$warn_map{$out}) {
			    warn "parse_data: no entity map for \`$out'\n";
			    $warn_map{$out} = 1;
			}
			# output the entity reference inside of `{}'
			$result .= &$escape_sub ("{" . $out . "}");
		    }
		} else {
		    $result .= &$escape_sub ($out);
		}
		$out = '';
	    }
	    $sdata_flag = !$sdata_flag;

	} elsif ($1 eq 'n') {
	    # record end

	    # pass '\\n' through to ASP
	    $result .= &$escape_sub ($out) . '\\n';
	    $out = '';
	} elsif ($1 eq '\\') {
	    # backslash

	    $result .= &$escape_sub ($out);

	    $out = '[bsol  ]';	# bsol == entity name for backslash
	    my ($mapping) = $char_map->lookup ($out);
	    if (defined $mapping) {
		# escape `\' in mapping for ASP
		$mapping =~ s/\\/\\\\/g;
		$result .= $mapping;
	    } else {
		if (!$warn_map{$out}) {
		    warn "parse_data: no entity map for \`$out'\n";
		    $warn_map{$out} = 1;
		}
		# output the entity reference inside of `{}'
		$result .= &$escape_sub ("{" . $out . "}");
	    }
	    $out = '';
	} else {
	    # other octal character
	    $result .= &$escape_sub ($out . chr(oct($1)));
	    $out = '';
	}
    }
    $out .= $data;
    if ("$out" ne '') {
	$result .= &$escape_sub ($out);
    }

    return ($result);
}

=item load_char_maps ($format, $paths)

B<load_char_maps> takes an EntityMap format suffix and loads all of the
character entity replacement sets for that suffix into an EntityMapGroup.
It searches every directory in I<@{$path}>.

=cut

sub load_char_maps {
    my ($format, $paths) = @_;

    my (@char_maps) = ();
    my ($path, $file_name, $char_map);

    foreach $path (@{$paths}) {
	if (-d $path) {
	    opendir (SDATADIR, $path)
		|| die "load_char_map: opening directory \`$path' for reading: $!\n";
	    foreach $file_name (readdir (SDATADIR)) {
		next if ($file_name !~ /$format$/);
		eval {$char_map = Text::EntityMap->load ("$path/$file_name")}
  		    || die "load_char_map: loading \`$path/$file_name'\n$@\n";
		push (@char_maps, $char_map);
	    }
	    closedir (SDATADIR);
	}
    }

    warn "load_char_maps: no entity maps found\n"
	if ($#char_maps == -1);

    return (Text::EntityMap->group (@char_maps));
}

=back

=head1 AUTHOR

Ken MacLeod, C<E<lt>ken@bitsko.slc.ut.usE<gt>>

=cut
1;