Blame lib/Archive/Zip/MemberRead.pm

Packit 0bf95d
package Archive::Zip::MemberRead;
Packit 0bf95d
Packit 0bf95d
=head1 NAME
Packit 0bf95d
Packit 0bf95d
Archive::Zip::MemberRead - A wrapper that lets you read Zip archive members as if they were files.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
=head1 SYNOPSIS
Packit 0bf95d
Packit 0bf95d
  use Archive::Zip;
Packit 0bf95d
  use Archive::Zip::MemberRead;
Packit 0bf95d
  $zip = Archive::Zip->new("file.zip");
Packit 0bf95d
  $fh  = Archive::Zip::MemberRead->new($zip, "subdir/abc.txt");
Packit 0bf95d
  while (defined($line = $fh->getline()))
Packit 0bf95d
  {
Packit 0bf95d
      print $fh->input_line_number . "#: $line\n";
Packit 0bf95d
  }
Packit 0bf95d
Packit 0bf95d
  $read = $fh->read($buffer, 32*1024);
Packit 0bf95d
  print "Read $read bytes as :$buffer:\n";
Packit 0bf95d
Packit 0bf95d
=head1 DESCRIPTION
Packit 0bf95d
Packit 0bf95d
The Archive::Zip::MemberRead module lets you read Zip archive member data
Packit 0bf95d
just like you read data from files.
Packit 0bf95d
Packit 0bf95d
=head1 METHODS
Packit 0bf95d
Packit 0bf95d
=over 4
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
use strict;
Packit 0bf95d
Packit 0bf95d
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
Packit 0bf95d
Packit 0bf95d
use vars qw{$VERSION};
Packit 0bf95d
Packit 0bf95d
my $nl;
Packit 0bf95d
Packit 0bf95d
BEGIN {
Packit 0bf95d
    $VERSION = '1.60';
Packit 0bf95d
    $VERSION = eval $VERSION;
Packit 0bf95d
Packit 0bf95d
# Requirement for newline conversion. Should check for e.g., DOS and OS/2 as well, but am too lazy.
Packit 0bf95d
    $nl = $^O eq 'MSWin32' ? "\r\n" : "\n";
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item Archive::Zip::Member::readFileHandle()
Packit 0bf95d
Packit 0bf95d
You can get a C<Archive::Zip::MemberRead> from an archive member by
Packit 0bf95d
calling C<readFileHandle()>:
Packit 0bf95d
Packit 0bf95d
  my $member = $zip->memberNamed('abc/def.c');
Packit 0bf95d
  my $fh = $member->readFileHandle();
Packit 0bf95d
  while (defined($line = $fh->getline()))
Packit 0bf95d
  {
Packit 0bf95d
      # ...
Packit 0bf95d
  }
Packit 0bf95d
  $fh->close();
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub Archive::Zip::Member::readFileHandle {
Packit 0bf95d
    return Archive::Zip::MemberRead->new(shift());
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item Archive::Zip::MemberRead->new($zip, $fileName)
Packit 0bf95d
Packit 0bf95d
=item Archive::Zip::MemberRead->new($zip, $member)
Packit 0bf95d
Packit 0bf95d
=item Archive::Zip::MemberRead->new($member)
Packit 0bf95d
Packit 0bf95d
Construct a new Archive::Zip::MemberRead on the specified member.
Packit 0bf95d
Packit 0bf95d
  my $fh = Archive::Zip::MemberRead->new($zip, 'fred.c')
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub new {
Packit 0bf95d
    my ($class, $zip, $file) = @_;
Packit 0bf95d
    my ($self, $member);
Packit 0bf95d
Packit 0bf95d
    if ($zip && $file)    # zip and filename, or zip and member
Packit 0bf95d
    {
Packit 0bf95d
        $member = ref($file) ? $file : $zip->memberNamed($file);
Packit 0bf95d
    } elsif ($zip && !$file && ref($zip))    # just member
Packit 0bf95d
    {
Packit 0bf95d
        $member = $zip;
Packit 0bf95d
    } else {
Packit 0bf95d
        die(
Packit 0bf95d
            'Archive::Zip::MemberRead::new needs a zip and filename, zip and member, or member'
Packit 0bf95d
        );
Packit 0bf95d
    }
Packit 0bf95d
Packit 0bf95d
    $self = {};
Packit 0bf95d
    bless($self, $class);
Packit 0bf95d
    $self->set_member($member);
Packit 0bf95d
    return $self;
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
sub set_member {
Packit 0bf95d
    my ($self, $member) = @_;
Packit 0bf95d
Packit 0bf95d
    $self->{member} = $member;
Packit 0bf95d
    $self->set_compression(COMPRESSION_STORED);
Packit 0bf95d
    $self->rewind();
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
sub set_compression {
Packit 0bf95d
    my ($self, $compression) = @_;
Packit 0bf95d
    $self->{member}->desiredCompressionMethod($compression) if $self->{member};
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item setLineEnd(expr)
Packit 0bf95d
Packit 0bf95d
Set the line end character to use. This is set to \n by default
Packit 0bf95d
except on Windows systems where it is set to \r\n. You will
Packit 0bf95d
only need to set this on systems which are not Windows or Unix
Packit 0bf95d
based and require a line end different from \n.
Packit 0bf95d
This is a class method so call as C<Archive::Zip::MemberRead>->C<setLineEnd($nl)>
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub setLineEnd {
Packit 0bf95d
    shift;
Packit 0bf95d
    $nl = shift;
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item rewind()
Packit 0bf95d
Packit 0bf95d
Rewinds an C<Archive::Zip::MemberRead> so that you can read from it again
Packit 0bf95d
starting at the beginning.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub rewind {
Packit 0bf95d
    my $self = shift;
Packit 0bf95d
Packit 0bf95d
    $self->_reset_vars();
Packit 0bf95d
    $self->{member}->rewindData() if $self->{member};
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
sub _reset_vars {
Packit 0bf95d
    my $self = shift;
Packit 0bf95d
Packit 0bf95d
    $self->{line_no} = 0;
Packit 0bf95d
    $self->{at_end}  = 0;
Packit 0bf95d
Packit 0bf95d
    delete $self->{buffer};
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item input_record_separator(expr)
Packit 0bf95d
Packit 0bf95d
If the argument is given, input_record_separator for this
Packit 0bf95d
instance is set to it. The current setting (which may be
Packit 0bf95d
the global $/) is always returned.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub input_record_separator {
Packit 0bf95d
    my $self = shift;
Packit 0bf95d
    if (@_) {
Packit 0bf95d
        $self->{sep} = shift;
Packit 0bf95d
        $self->{sep_re} =
Packit 0bf95d
          _sep_as_re($self->{sep});    # Cache the RE as an optimization
Packit 0bf95d
    }
Packit 0bf95d
    return exists $self->{sep} ? $self->{sep} : $/;
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
# Return the input_record_separator in use as an RE fragment
Packit 0bf95d
# Note that if we have a per-instance input_record_separator
Packit 0bf95d
# we can just return the already converted value. Otherwise,
Packit 0bf95d
# the conversion must be done on $/ every time since we cannot
Packit 0bf95d
# know whether it has changed or not.
Packit 0bf95d
sub _sep_re {
Packit 0bf95d
    my $self = shift;
Packit 0bf95d
Packit 0bf95d
    # Important to phrase this way: sep's value may be undef.
Packit 0bf95d
    return exists $self->{sep} ? $self->{sep_re} : _sep_as_re($/);
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
# Convert the input record separator into an RE and return it.
Packit 0bf95d
sub _sep_as_re {
Packit 0bf95d
    my $sep = shift;
Packit 0bf95d
    if (defined $sep) {
Packit 0bf95d
        if ($sep eq '') {
Packit 0bf95d
            return "(?:$nl){2,}";
Packit 0bf95d
        } else {
Packit 0bf95d
            $sep =~ s/\n/$nl/og;
Packit 0bf95d
            return quotemeta $sep;
Packit 0bf95d
        }
Packit 0bf95d
    } else {
Packit 0bf95d
        return undef;
Packit 0bf95d
    }
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item input_line_number()
Packit 0bf95d
Packit 0bf95d
Returns the current line number, but only if you're using C<getline()>.
Packit 0bf95d
Using C<read()> will not update the line number.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub input_line_number {
Packit 0bf95d
    my $self = shift;
Packit 0bf95d
    return $self->{line_no};
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item close()
Packit 0bf95d
Packit 0bf95d
Closes the given file handle.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub close {
Packit 0bf95d
    my $self = shift;
Packit 0bf95d
Packit 0bf95d
    $self->_reset_vars();
Packit 0bf95d
    $self->{member}->endRead();
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item buffer_size([ $size ])
Packit 0bf95d
Packit 0bf95d
Gets or sets the buffer size used for reads.
Packit 0bf95d
Default is the chunk size used by Archive::Zip.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub buffer_size {
Packit 0bf95d
    my ($self, $size) = @_;
Packit 0bf95d
Packit 0bf95d
    if (!$size) {
Packit 0bf95d
        return $self->{chunkSize} || Archive::Zip::chunkSize();
Packit 0bf95d
    } else {
Packit 0bf95d
        $self->{chunkSize} = $size;
Packit 0bf95d
    }
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item getline()
Packit 0bf95d
Packit 0bf95d
Returns the next line from the currently open member.
Packit 0bf95d
Makes sense only for text files.
Packit 0bf95d
A read error is considered fatal enough to die.
Packit 0bf95d
Returns undef on eof. All subsequent calls would return undef,
Packit 0bf95d
unless a rewind() is called.
Packit 0bf95d
Note: The line returned has the input_record_separator (default: newline) removed.
Packit 0bf95d
Packit 0bf95d
=item getline( { preserve_line_ending => 1 } )
Packit 0bf95d
Packit 0bf95d
Returns the next line including the line ending.
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
sub getline {
Packit 0bf95d
    my ($self, $argref) = @_;
Packit 0bf95d
Packit 0bf95d
    my $size = $self->buffer_size();
Packit 0bf95d
    my $sep  = $self->_sep_re();
Packit 0bf95d
Packit 0bf95d
    my $preserve_line_ending;
Packit 0bf95d
    if (ref $argref eq 'HASH') {
Packit 0bf95d
        $preserve_line_ending = $argref->{'preserve_line_ending'};
Packit 0bf95d
        $sep =~ s/\\([^A-Za-z_0-9])+/$1/g;
Packit 0bf95d
    }
Packit 0bf95d
Packit 0bf95d
    for (; ;) {
Packit 0bf95d
        if (   $sep
Packit 0bf95d
            && defined($self->{buffer})
Packit 0bf95d
            && $self->{buffer} =~ s/^(.*?)$sep//s) {
Packit 0bf95d
            my $line = $1;
Packit 0bf95d
            $self->{line_no}++;
Packit 0bf95d
            if ($preserve_line_ending) {
Packit 0bf95d
                return $line . $sep;
Packit 0bf95d
            } else {
Packit 0bf95d
                return $line;
Packit 0bf95d
            }
Packit 0bf95d
        } elsif ($self->{at_end}) {
Packit 0bf95d
            $self->{line_no}++ if $self->{buffer};
Packit 0bf95d
            return delete $self->{buffer};
Packit 0bf95d
        }
Packit 0bf95d
        my ($temp, $status) = $self->{member}->readChunk($size);
Packit 0bf95d
        if ($status != AZ_OK && $status != AZ_STREAM_END) {
Packit 0bf95d
            die "ERROR: Error reading chunk from archive - $status";
Packit 0bf95d
        }
Packit 0bf95d
        $self->{at_end} = $status == AZ_STREAM_END;
Packit 0bf95d
        $self->{buffer} .= $$temp;
Packit 0bf95d
    }
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
=item read($buffer, $num_bytes_to_read)
Packit 0bf95d
Packit 0bf95d
Simulates a normal C<read()> system call.
Packit 0bf95d
Returns the no. of bytes read. C<undef> on error, 0 on eof, I<e.g.>:
Packit 0bf95d
Packit 0bf95d
  $fh = Archive::Zip::MemberRead->new($zip, "sreeji/secrets.bin");
Packit 0bf95d
  while (1)
Packit 0bf95d
  {
Packit 0bf95d
    $read = $fh->read($buffer, 1024);
Packit 0bf95d
    die "FATAL ERROR reading my secrets !\n" if (!defined($read));
Packit 0bf95d
    last if (!$read);
Packit 0bf95d
    # Do processing.
Packit 0bf95d
    ....
Packit 0bf95d
   }
Packit 0bf95d
Packit 0bf95d
=cut
Packit 0bf95d
Packit 0bf95d
#
Packit 0bf95d
# All these $_ are required to emulate read().
Packit 0bf95d
#
Packit 0bf95d
sub read {
Packit 0bf95d
    my $self = $_[0];
Packit 0bf95d
    my $size = $_[2];
Packit 0bf95d
    my ($temp, $status, $ret);
Packit 0bf95d
Packit 0bf95d
    ($temp, $status) = $self->{member}->readChunk($size);
Packit 0bf95d
    if ($status != AZ_OK && $status != AZ_STREAM_END) {
Packit 0bf95d
        $_[1] = undef;
Packit 0bf95d
        $ret = undef;
Packit 0bf95d
    } else {
Packit 0bf95d
        $_[1] = $$temp;
Packit 0bf95d
        $ret = length($$temp);
Packit 0bf95d
    }
Packit 0bf95d
    return $ret;
Packit 0bf95d
}
Packit 0bf95d
Packit 0bf95d
1;
Packit 0bf95d
Packit 0bf95d
=back
Packit 0bf95d
Packit 0bf95d
=head1 AUTHOR
Packit 0bf95d
Packit 0bf95d
Sreeji K. Das E<lt>sreeji_k@yahoo.comE<gt>
Packit 0bf95d
Packit 0bf95d
See L<Archive::Zip> by Ned Konz without which this module does not make
Packit 0bf95d
any sense! 
Packit 0bf95d
Packit 0bf95d
Minor mods by Ned Konz.
Packit 0bf95d
Packit 0bf95d
=head1 COPYRIGHT
Packit 0bf95d
Packit 0bf95d
Copyright 2002 Sreeji K. Das.
Packit 0bf95d
Packit 0bf95d
This program is free software; you can redistribute it and/or modify it under
Packit 0bf95d
the same terms as Perl itself.
Packit 0bf95d
Packit 0bf95d
=cut