|
Packit |
d0f5c2 |
#
|
|
Packit |
d0f5c2 |
# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $
|
|
Packit |
d0f5c2 |
#
|
|
Packit |
d0f5c2 |
package Encode::Encoder;
|
|
Packit |
d0f5c2 |
use strict;
|
|
Packit |
d0f5c2 |
use warnings;
|
|
Packit |
d0f5c2 |
our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
require Exporter;
|
|
Packit |
d0f5c2 |
our @ISA = qw(Exporter);
|
|
Packit |
d0f5c2 |
our @EXPORT_OK = qw ( encoder );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
our $AUTOLOAD;
|
|
Packit |
d0f5c2 |
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
|
|
Packit |
d0f5c2 |
use Encode qw(encode decode find_encoding from_to);
|
|
Packit |
d0f5c2 |
use Carp;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub new {
|
|
Packit |
d0f5c2 |
my ( $class, $data, $encname ) = @_;
|
|
Packit |
d0f5c2 |
unless ($encname) {
|
|
Packit |
d0f5c2 |
$encname = Encode::is_utf8($data) ? 'utf8' : '';
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
my $obj = find_encoding($encname)
|
|
Packit |
d0f5c2 |
or croak __PACKAGE__, ": unknown encoding: $encname";
|
|
Packit |
d0f5c2 |
$encname = $obj->name;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
my $self = {
|
|
Packit |
d0f5c2 |
data => $data,
|
|
Packit |
d0f5c2 |
encoding => $encname,
|
|
Packit |
d0f5c2 |
};
|
|
Packit |
d0f5c2 |
bless $self => $class;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub encoder { __PACKAGE__->new(@_) }
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub data {
|
|
Packit |
d0f5c2 |
my ( $self, $data ) = @_;
|
|
Packit |
d0f5c2 |
if ( defined $data ) {
|
|
Packit |
d0f5c2 |
$self->{data} = $data;
|
|
Packit |
d0f5c2 |
return $data;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
return $self->{data};
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub encoding {
|
|
Packit |
d0f5c2 |
my ( $self, $encname ) = @_;
|
|
Packit |
d0f5c2 |
if ($encname) {
|
|
Packit |
d0f5c2 |
my $obj = find_encoding($encname)
|
|
Packit |
d0f5c2 |
or confess __PACKAGE__, ": unknown encoding: $encname";
|
|
Packit |
d0f5c2 |
$self->{encoding} = $obj->name;
|
|
Packit |
d0f5c2 |
return $self;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
return $self->{encoding};
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub bytes {
|
|
Packit |
d0f5c2 |
my ( $self, $encname ) = @_;
|
|
Packit |
d0f5c2 |
$encname ||= $self->{encoding};
|
|
Packit |
d0f5c2 |
my $obj = find_encoding($encname)
|
|
Packit |
d0f5c2 |
or confess __PACKAGE__, ": unknown encoding: $encname";
|
|
Packit |
d0f5c2 |
$self->{data} = $obj->decode( $self->{data}, 1 );
|
|
Packit |
d0f5c2 |
$self->{encoding} = '';
|
|
Packit |
d0f5c2 |
return $self;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub DESTROY { # defined so it won't autoload.
|
|
Packit |
d0f5c2 |
DEBUG and warn shift;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub AUTOLOAD {
|
|
Packit |
d0f5c2 |
my $self = shift;
|
|
Packit |
d0f5c2 |
my $type = ref($self)
|
|
Packit |
d0f5c2 |
or confess "$self is not an object";
|
|
Packit |
d0f5c2 |
my $myname = $AUTOLOAD;
|
|
Packit |
d0f5c2 |
$myname =~ s/.*://; # strip fully-qualified portion
|
|
Packit |
d0f5c2 |
my $obj = find_encoding($myname)
|
|
Packit |
d0f5c2 |
or confess __PACKAGE__, ": unknown encoding: $myname";
|
|
Packit |
d0f5c2 |
DEBUG and warn $self->{encoding}, " => ", $obj->name;
|
|
Packit |
d0f5c2 |
if ( $self->{encoding} ) {
|
|
Packit |
d0f5c2 |
from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
$self->{data} = $obj->encode( $self->{data}, 1 );
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
$self->{encoding} = $obj->name;
|
|
Packit |
d0f5c2 |
return $self;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use overload
|
|
Packit |
d0f5c2 |
q("") => sub { $_[0]->{data} },
|
|
Packit |
d0f5c2 |
q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
|
|
Packit |
d0f5c2 |
fallback => 1,
|
|
Packit |
d0f5c2 |
;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
1;
|
|
Packit |
d0f5c2 |
__END__
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 NAME
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Encoder -- Object Oriented Encoder
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SYNOPSIS
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use Encode::Encoder;
|
|
Packit |
d0f5c2 |
# Encode::encode("ISO-8859-1", $data);
|
|
Packit |
d0f5c2 |
Encode::Encoder->new($data)->iso_8859_1; # OOP way
|
|
Packit |
d0f5c2 |
# shortcut
|
|
Packit |
d0f5c2 |
use Encode::Encoder qw(encoder);
|
|
Packit |
d0f5c2 |
encoder($data)->iso_8859_1;
|
|
Packit |
d0f5c2 |
# you can stack them!
|
|
Packit |
d0f5c2 |
encoder($data)->iso_8859_1->base64; # provided base64() is defined
|
|
Packit |
d0f5c2 |
# you can use it as a decoder as well
|
|
Packit |
d0f5c2 |
encoder($base64)->bytes('base64')->latin1;
|
|
Packit |
d0f5c2 |
# stringified
|
|
Packit |
d0f5c2 |
print encoder($data)->utf8->latin1; # prints the string in latin1
|
|
Packit |
d0f5c2 |
# numified
|
|
Packit |
d0f5c2 |
encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 ABSTRACT
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
B<Encode::Encoder> allows you to use Encode in an object-oriented
|
|
Packit |
d0f5c2 |
style. This is not only more intuitive than a functional approach,
|
|
Packit |
d0f5c2 |
but also handier when you want to stack encodings. Suppose you want
|
|
Packit |
d0f5c2 |
your UTF-8 string converted to Latin1 then Base64: you can simply say
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $base64 = encoder($utf8)->latin1->base64;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
instead of
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $latin1 = encode("latin1", $utf8);
|
|
Packit |
d0f5c2 |
my $base64 = encode_base64($utf8);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
or the lazier and more convoluted
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $base64 = encode_base64(encode("latin1", $utf8));
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 Description
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Here is how to use this module.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=over 4
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item *
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
There are at least two instance variables stored in a hash reference,
|
|
Packit |
d0f5c2 |
{data} and {encoding}.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item *
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
When there is no method, it takes the method name as the name of the
|
|
Packit |
d0f5c2 |
encoding and encodes the instance I<data> with I<encoding>. If successful,
|
|
Packit |
d0f5c2 |
the instance I<encoding> is set accordingly.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item *
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
You can retrieve the result via -E<gt>data but usually you don't have to
|
|
Packit |
d0f5c2 |
because the stringify operator ("") is overridden to do exactly that.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=back
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head2 Predefined Methods
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
This module predefines the methods below:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=over 4
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
returns an encoder object. Its data is initialized with $data if
|
|
Packit |
d0f5c2 |
present, and its encoding is set to $encoding if present.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
When $encoding is omitted, it defaults to utf8 if $data is already in
|
|
Packit |
d0f5c2 |
utf8 or "" (empty string) otherwise.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item encoder()
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item $e-E<gt>data([$data])
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
When $data is present, sets the instance data to $data and returns the
|
|
Packit |
d0f5c2 |
object itself. Otherwise, the current instance data is returned.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item $e-E<gt>encoding([$encoding])
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
When $encoding is present, sets the instance encoding to $encoding and
|
|
Packit |
d0f5c2 |
returns the object itself. Otherwise, the current instance encoding is
|
|
Packit |
d0f5c2 |
returned.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item $e-E<gt>bytes([$encoding])
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
decodes instance data from $encoding, or the instance encoding if
|
|
Packit |
d0f5c2 |
omitted. If the conversion is successful, the instance encoding
|
|
Packit |
d0f5c2 |
will be set to "".
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
The name I<bytes> was deliberately picked to avoid namespace tainting
|
|
Packit |
d0f5c2 |
-- this module may be used as a base class so method names that appear
|
|
Packit |
d0f5c2 |
in Encode::Encoding are avoided.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=back
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head2 Example: base64 transcoder
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
This module is designed to work with L<Encode::Encoding>.
|
|
Packit |
d0f5c2 |
To make the Base64 transcoder example above really work, you could
|
|
Packit |
d0f5c2 |
write a module like this:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
package Encode::Base64;
|
|
Packit |
d0f5c2 |
use parent 'Encode::Encoding';
|
|
Packit |
d0f5c2 |
__PACKAGE__->Define('base64');
|
|
Packit |
d0f5c2 |
use MIME::Base64;
|
|
Packit |
d0f5c2 |
sub encode{
|
|
Packit |
d0f5c2 |
my ($obj, $data) = @_;
|
|
Packit |
d0f5c2 |
return encode_base64($data);
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
sub decode{
|
|
Packit |
d0f5c2 |
my ($obj, $data) = @_;
|
|
Packit |
d0f5c2 |
return decode_base64($data);
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
1;
|
|
Packit |
d0f5c2 |
__END__
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
And your caller module would be something like this:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use Encode::Encoder;
|
|
Packit |
d0f5c2 |
use Encode::Base64;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# now you can really do the following
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
encoder($data)->iso_8859_1->base64;
|
|
Packit |
d0f5c2 |
encoder($base64)->bytes('base64')->latin1;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head2 Operator Overloading
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
This module overloads two operators, stringify ("") and numify (0+).
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Stringify dumps the data inside the object.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Numify returns the number of bytes in the instance data.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
They come in handy when you want to print or find the size of data.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SEE ALSO
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
L<Encode>,
|
|
Packit |
d0f5c2 |
L<Encode::Encoding>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=cut
|