Blame lib/Encode/Encoder.pm

Packit d0f5c2
#
Packit d0f5c2
# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $
Packit d0f5c2
#
Packit d0f5c2
package Encode::Encoder;
Packit d0f5c2
use strict;
Packit d0f5c2
use warnings;
Packit d0f5c2
our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
Packit d0f5c2
Packit d0f5c2
require Exporter;
Packit d0f5c2
our @ISA       = qw(Exporter);
Packit d0f5c2
our @EXPORT_OK = qw ( encoder );
Packit d0f5c2
Packit d0f5c2
our $AUTOLOAD;
Packit d0f5c2
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
Packit d0f5c2
use Encode qw(encode decode find_encoding from_to);
Packit d0f5c2
use Carp;
Packit d0f5c2
Packit d0f5c2
sub new {
Packit d0f5c2
    my ( $class, $data, $encname ) = @_;
Packit d0f5c2
    unless ($encname) {
Packit d0f5c2
        $encname = Encode::is_utf8($data) ? 'utf8' : '';
Packit d0f5c2
    }
Packit d0f5c2
    else {
Packit d0f5c2
        my $obj = find_encoding($encname)
Packit d0f5c2
          or croak __PACKAGE__, ": unknown encoding: $encname";
Packit d0f5c2
        $encname = $obj->name;
Packit d0f5c2
    }
Packit d0f5c2
    my $self = {
Packit d0f5c2
        data     => $data,
Packit d0f5c2
        encoding => $encname,
Packit d0f5c2
    };
Packit d0f5c2
    bless $self => $class;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub encoder { __PACKAGE__->new(@_) }
Packit d0f5c2
Packit d0f5c2
sub data {
Packit d0f5c2
    my ( $self, $data ) = @_;
Packit d0f5c2
    if ( defined $data ) {
Packit d0f5c2
        $self->{data} = $data;
Packit d0f5c2
        return $data;
Packit d0f5c2
    }
Packit d0f5c2
    else {
Packit d0f5c2
        return $self->{data};
Packit d0f5c2
    }
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub encoding {
Packit d0f5c2
    my ( $self, $encname ) = @_;
Packit d0f5c2
    if ($encname) {
Packit d0f5c2
        my $obj = find_encoding($encname)
Packit d0f5c2
          or confess __PACKAGE__, ": unknown encoding: $encname";
Packit d0f5c2
        $self->{encoding} = $obj->name;
Packit d0f5c2
        return $self;
Packit d0f5c2
    }
Packit d0f5c2
    else {
Packit d0f5c2
        return $self->{encoding};
Packit d0f5c2
    }
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub bytes {
Packit d0f5c2
    my ( $self, $encname ) = @_;
Packit d0f5c2
    $encname ||= $self->{encoding};
Packit d0f5c2
    my $obj = find_encoding($encname)
Packit d0f5c2
      or confess __PACKAGE__, ": unknown encoding: $encname";
Packit d0f5c2
    $self->{data} = $obj->decode( $self->{data}, 1 );
Packit d0f5c2
    $self->{encoding} = '';
Packit d0f5c2
    return $self;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub DESTROY {    # defined so it won't autoload.
Packit d0f5c2
    DEBUG and warn shift;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub AUTOLOAD {
Packit d0f5c2
    my $self = shift;
Packit d0f5c2
    my $type = ref($self)
Packit d0f5c2
      or confess "$self is not an object";
Packit d0f5c2
    my $myname = $AUTOLOAD;
Packit d0f5c2
    $myname =~ s/.*://;    # strip fully-qualified portion
Packit d0f5c2
    my $obj = find_encoding($myname)
Packit d0f5c2
      or confess __PACKAGE__, ": unknown encoding: $myname";
Packit d0f5c2
    DEBUG and warn $self->{encoding}, " => ", $obj->name;
Packit d0f5c2
    if ( $self->{encoding} ) {
Packit d0f5c2
        from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
Packit d0f5c2
    }
Packit d0f5c2
    else {
Packit d0f5c2
        $self->{data} = $obj->encode( $self->{data}, 1 );
Packit d0f5c2
    }
Packit d0f5c2
    $self->{encoding} = $obj->name;
Packit d0f5c2
    return $self;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
use overload
Packit d0f5c2
  q("") => sub { $_[0]->{data} },
Packit d0f5c2
  q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
Packit d0f5c2
  fallback => 1,
Packit d0f5c2
  ;
Packit d0f5c2
Packit d0f5c2
1;
Packit d0f5c2
__END__
Packit d0f5c2
Packit d0f5c2
=head1 NAME
Packit d0f5c2
Packit d0f5c2
Encode::Encoder -- Object Oriented Encoder
Packit d0f5c2
Packit d0f5c2
=head1 SYNOPSIS
Packit d0f5c2
Packit d0f5c2
  use Encode::Encoder;
Packit d0f5c2
  # Encode::encode("ISO-8859-1", $data); 
Packit d0f5c2
  Encode::Encoder->new($data)->iso_8859_1; # OOP way
Packit d0f5c2
  # shortcut
Packit d0f5c2
  use Encode::Encoder qw(encoder);
Packit d0f5c2
  encoder($data)->iso_8859_1;
Packit d0f5c2
  # you can stack them!
Packit d0f5c2
  encoder($data)->iso_8859_1->base64;  # provided base64() is defined
Packit d0f5c2
  # you can use it as a decoder as well
Packit d0f5c2
  encoder($base64)->bytes('base64')->latin1;
Packit d0f5c2
  # stringified
Packit d0f5c2
  print encoder($data)->utf8->latin1;  # prints the string in latin1
Packit d0f5c2
  # numified
Packit d0f5c2
  encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
Packit d0f5c2
Packit d0f5c2
=head1 ABSTRACT
Packit d0f5c2
Packit d0f5c2
B<Encode::Encoder> allows you to use Encode in an object-oriented
Packit d0f5c2
style.  This is not only more intuitive than a functional approach,
Packit d0f5c2
but also handier when you want to stack encodings.  Suppose you want
Packit d0f5c2
your UTF-8 string converted to Latin1 then Base64: you can simply say
Packit d0f5c2
Packit d0f5c2
  my $base64 = encoder($utf8)->latin1->base64;
Packit d0f5c2
Packit d0f5c2
instead of
Packit d0f5c2
Packit d0f5c2
  my $latin1 = encode("latin1", $utf8);
Packit d0f5c2
  my $base64 = encode_base64($utf8);
Packit d0f5c2
Packit d0f5c2
or the lazier and more convoluted
Packit d0f5c2
Packit d0f5c2
  my $base64 = encode_base64(encode("latin1", $utf8));
Packit d0f5c2
Packit d0f5c2
=head1 Description
Packit d0f5c2
Packit d0f5c2
Here is how to use this module.
Packit d0f5c2
Packit d0f5c2
=over 4
Packit d0f5c2
Packit d0f5c2
=item *
Packit d0f5c2
Packit d0f5c2
There are at least two instance variables stored in a hash reference,
Packit d0f5c2
{data} and {encoding}.
Packit d0f5c2
Packit d0f5c2
=item *
Packit d0f5c2
Packit d0f5c2
When there is no method, it takes the method name as the name of the
Packit d0f5c2
encoding and encodes the instance I<data> with I<encoding>.  If successful,
Packit d0f5c2
the instance I<encoding> is set accordingly.
Packit d0f5c2
Packit d0f5c2
=item *
Packit d0f5c2
Packit d0f5c2
You can retrieve the result via -E<gt>data but usually you don't have to 
Packit d0f5c2
because the stringify operator ("") is overridden to do exactly that.
Packit d0f5c2
Packit d0f5c2
=back
Packit d0f5c2
Packit d0f5c2
=head2 Predefined Methods
Packit d0f5c2
Packit d0f5c2
This module predefines the methods below:
Packit d0f5c2
Packit d0f5c2
=over 4
Packit d0f5c2
Packit d0f5c2
=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
Packit d0f5c2
Packit d0f5c2
returns an encoder object.  Its data is initialized with $data if
Packit d0f5c2
present, and its encoding is set to $encoding if present.
Packit d0f5c2
Packit d0f5c2
When $encoding is omitted, it defaults to utf8 if $data is already in
Packit d0f5c2
utf8 or "" (empty string) otherwise.
Packit d0f5c2
Packit d0f5c2
=item encoder()
Packit d0f5c2
Packit d0f5c2
is an alias of Encode::Encoder-E<gt>new().  This one is exported on demand.
Packit d0f5c2
Packit d0f5c2
=item $e-E<gt>data([$data])
Packit d0f5c2
Packit d0f5c2
When $data is present, sets the instance data to $data and returns the
Packit d0f5c2
object itself.  Otherwise, the current instance data is returned.
Packit d0f5c2
Packit d0f5c2
=item $e-E<gt>encoding([$encoding])
Packit d0f5c2
Packit d0f5c2
When $encoding is present, sets the instance encoding to $encoding and
Packit d0f5c2
returns the object itself.  Otherwise, the current instance encoding is
Packit d0f5c2
returned.
Packit d0f5c2
Packit d0f5c2
=item $e-E<gt>bytes([$encoding])
Packit d0f5c2
Packit d0f5c2
decodes instance data from $encoding, or the instance encoding if
Packit d0f5c2
omitted.  If the conversion is successful, the instance encoding
Packit d0f5c2
will be set to "".
Packit d0f5c2
Packit d0f5c2
The name I<bytes> was deliberately picked to avoid namespace tainting
Packit d0f5c2
-- this module may be used as a base class so method names that appear
Packit d0f5c2
in Encode::Encoding are avoided.
Packit d0f5c2
Packit d0f5c2
=back
Packit d0f5c2
Packit d0f5c2
=head2 Example: base64 transcoder
Packit d0f5c2
Packit d0f5c2
This module is designed to work with L<Encode::Encoding>.
Packit d0f5c2
To make the Base64 transcoder example above really work, you could
Packit d0f5c2
write a module like this:
Packit d0f5c2
Packit d0f5c2
  package Encode::Base64;
Packit d0f5c2
  use parent 'Encode::Encoding';
Packit d0f5c2
  __PACKAGE__->Define('base64');
Packit d0f5c2
  use MIME::Base64;
Packit d0f5c2
  sub encode{ 
Packit d0f5c2
      my ($obj, $data) = @_; 
Packit d0f5c2
      return encode_base64($data);
Packit d0f5c2
  }
Packit d0f5c2
  sub decode{
Packit d0f5c2
      my ($obj, $data) = @_; 
Packit d0f5c2
      return decode_base64($data);
Packit d0f5c2
  }
Packit d0f5c2
  1;
Packit d0f5c2
  __END__
Packit d0f5c2
Packit d0f5c2
And your caller module would be something like this:
Packit d0f5c2
Packit d0f5c2
  use Encode::Encoder;
Packit d0f5c2
  use Encode::Base64;
Packit d0f5c2
Packit d0f5c2
  # now you can really do the following
Packit d0f5c2
Packit d0f5c2
  encoder($data)->iso_8859_1->base64;
Packit d0f5c2
  encoder($base64)->bytes('base64')->latin1;
Packit d0f5c2
Packit d0f5c2
=head2 Operator Overloading
Packit d0f5c2
Packit d0f5c2
This module overloads two operators, stringify ("") and numify (0+).
Packit d0f5c2
Packit d0f5c2
Stringify dumps the data inside the object.
Packit d0f5c2
Packit d0f5c2
Numify returns the number of bytes in the instance data.
Packit d0f5c2
Packit d0f5c2
They come in handy when you want to print or find the size of data.
Packit d0f5c2
Packit d0f5c2
=head1 SEE ALSO
Packit d0f5c2
Packit d0f5c2
L<Encode>,
Packit d0f5c2
L<Encode::Encoding>
Packit d0f5c2
Packit d0f5c2
=cut