|
Packit |
be8974 |
package FileSlurp_12;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
use strict;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
use Carp ;
|
|
Packit |
be8974 |
use Fcntl qw( :DEFAULT ) ;
|
|
Packit |
be8974 |
use POSIX qw( :fcntl_h ) ;
|
|
Packit |
be8974 |
use Symbol ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
use base 'Exporter' ;
|
|
Packit |
be8974 |
use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
%EXPORT_TAGS = ( 'all' => [
|
|
Packit |
be8974 |
qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
|
|
Packit |
be8974 |
@EXPORT_OK = qw( slurp ) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
$VERSION = '9999.13';
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my $is_win32 = $^O =~ /win32/i ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# Install subs for various constants that aren't set in older perls
|
|
Packit |
be8974 |
# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
|
|
Packit |
be8974 |
# () prototype These can't be overridden with the constant pragma or
|
|
Packit |
be8974 |
# we get a prototype mismatch. Hence this less than aesthetically
|
|
Packit |
be8974 |
# appealing BEGIN block:
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
BEGIN {
|
|
Packit |
be8974 |
unless( eval { defined SEEK_SET() } ) {
|
|
Packit |
be8974 |
*SEEK_SET = sub { 0 };
|
|
Packit |
be8974 |
*SEEK_CUR = sub { 1 };
|
|
Packit |
be8974 |
*SEEK_END = sub { 2 };
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
unless( eval { defined O_BINARY() } ) {
|
|
Packit |
be8974 |
*O_BINARY = sub { 0 };
|
|
Packit |
be8974 |
*O_RDONLY = sub { 0 };
|
|
Packit |
be8974 |
*O_WRONLY = sub { 1 };
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
unless ( eval { defined O_APPEND() } ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
if ( $^O =~ /olaris/ ) {
|
|
Packit |
be8974 |
*O_APPEND = sub { 8 };
|
|
Packit |
be8974 |
*O_CREAT = sub { 256 };
|
|
Packit |
be8974 |
*O_EXCL = sub { 1024 };
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
elsif ( $^O =~ /inux/ ) {
|
|
Packit |
be8974 |
*O_APPEND = sub { 1024 };
|
|
Packit |
be8974 |
*O_CREAT = sub { 64 };
|
|
Packit |
be8974 |
*O_EXCL = sub { 128 };
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
elsif ( $^O =~ /BSD/i ) {
|
|
Packit |
be8974 |
*O_APPEND = sub { 8 };
|
|
Packit |
be8974 |
*O_CREAT = sub { 512 };
|
|
Packit |
be8974 |
*O_EXCL = sub { 2048 };
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# print "OS [$^O]\n" ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# print "O_BINARY = ", O_BINARY(), "\n" ;
|
|
Packit |
be8974 |
# print "O_RDONLY = ", O_RDONLY(), "\n" ;
|
|
Packit |
be8974 |
# print "O_WRONLY = ", O_WRONLY(), "\n" ;
|
|
Packit |
be8974 |
# print "O_APPEND = ", O_APPEND(), "\n" ;
|
|
Packit |
be8974 |
# print "O_CREAT ", O_CREAT(), "\n" ;
|
|
Packit |
be8974 |
# print "O_EXCL ", O_EXCL(), "\n" ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
*slurp = \&read_file ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
sub read_file {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my( $file_name, %args ) = @_ ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# set the buffer to either the passed in one or ours and init it to the null
|
|
Packit |
be8974 |
# string
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my $buf ;
|
|
Packit |
be8974 |
my $buf_ref = $args{'buf_ref'} || \$buf ;
|
|
Packit |
be8974 |
${$buf_ref} = '' ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my( $read_fh, $size_left, $blk_size ) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# check if we are reading from a handle (glob ref or IO:: object)
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
if ( ref $file_name ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# slurping a handle so use it and don't open anything.
|
|
Packit |
be8974 |
# set the block size so we know it is a handle and read that amount
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
$read_fh = $file_name ;
|
|
Packit |
be8974 |
$blk_size = $args{'blk_size'} || 1024 * 1024 ;
|
|
Packit |
be8974 |
$size_left = $blk_size ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
|
|
Packit |
be8974 |
# glob/handle. only the DATA handle is untainted (since it is from
|
|
Packit |
be8974 |
# trusted data in the source file). this allows us to test if this is
|
|
Packit |
be8974 |
# the DATA handle and then to do a sysseek to make sure it gets
|
|
Packit |
be8974 |
# slurped correctly. on some systems, the buffered i/o pointer is not
|
|
Packit |
be8974 |
# left at the same place as the fd pointer. this sysseek makes them
|
|
Packit |
be8974 |
# the same so slurping with sysread will work.
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
eval{ require B } ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
if ( $@ ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
@_ = ( \%args, <
|
|
Packit |
be8974 |
Can't find B.pm with this Perl: $!.
|
|
Packit |
be8974 |
That module is needed to slurp the DATA handle.
|
|
Packit |
be8974 |
ERR
|
|
Packit |
be8974 |
goto &_error ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# set the seek position to the current tell.
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) ||
|
|
Packit |
be8974 |
croak "sysseek $!" ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
else {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# a regular file. set the sysopen mode
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my $mode = O_RDONLY ;
|
|
Packit |
be8974 |
$mode |= O_BINARY if $args{'binmode'} ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# open the file and handle any error
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
$read_fh = gensym ;
|
|
Packit |
be8974 |
unless ( sysopen( $read_fh, $file_name, $mode ) ) {
|
|
Packit |
be8974 |
@_ = ( \%args, "read_file '$file_name' - sysopen: $!");
|
|
Packit |
be8974 |
goto &_error ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# get the size of the file for use in the read loop
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
$size_left = -s $read_fh ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
unless( $size_left ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
$blk_size = $args{'blk_size'} || 1024 * 1024 ;
|
|
Packit |
be8974 |
$size_left = $blk_size ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# infinite read loop. we exit when we are done slurping
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
while( 1 ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# do the read and see how much we got
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my $read_cnt = sysread( $read_fh, ${$buf_ref},
|
|
Packit |
be8974 |
$size_left, length ${$buf_ref} ) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
if ( defined $read_cnt ) {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# good read. see if we hit EOF (nothing left to read)
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
last if $read_cnt == 0 ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# loop if we are slurping a handle. we don't track $size_left then.
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
next if $blk_size ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# count down how much we read and loop if we have more to read.
|
|
Packit |
be8974 |
$size_left -= $read_cnt ;
|
|
Packit |
be8974 |
last if $size_left <= 0 ;
|
|
Packit |
be8974 |
next ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# handle the read error
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
@_ = ( \%args, "read_file '$file_name' - sysread: $!");
|
|
Packit |
be8974 |
goto &_error ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# fix up cr/lf to be a newline if this is a windows text file
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# this is the 5 returns in a row. each handles one possible
|
|
Packit |
be8974 |
# combination of caller context and requested return type
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my $sep = $/ ;
|
|
Packit |
be8974 |
$sep = '\n\n+' if defined $sep && $sep eq '' ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# caller wants to get an array ref of lines
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# this split doesn't work since it tries to use variable length lookbehind
|
|
Packit |
be8974 |
# the m// line works.
|
|
Packit |
be8974 |
# return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ;
|
|
Packit |
be8974 |
return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ]
|
|
Packit |
be8974 |
if $args{'array_ref'} ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# caller wants a list of lines (normal list context)
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# same problem with this split as before.
|
|
Packit |
be8974 |
# return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ;
|
|
Packit |
be8974 |
return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : ()
|
|
Packit |
be8974 |
if wantarray ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# caller wants a scalar ref to the slurped text
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
return $buf_ref if $args{'scalar_ref'} ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# caller wants a scalar with the slurped text (normal scalar context)
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
return ${$buf_ref} if defined wantarray ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# caller passed in an i/o buffer by reference (normal void context)
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
return ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# error handling section
|
|
Packit |
be8974 |
#
|
|
Packit |
be8974 |
# all the error handling uses magic goto so the caller will get the
|
|
Packit |
be8974 |
# error message as if from their code and not this module. if we just
|
|
Packit |
be8974 |
# did a call on the error code, the carp/croak would report it from
|
|
Packit |
be8974 |
# this module since the error sub is one level down on the call stack
|
|
Packit |
be8974 |
# from read_file/write_file/read_dir.
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my %err_func = (
|
|
Packit |
be8974 |
'carp' => \&carp,
|
|
Packit |
be8974 |
'croak' => \&croak,
|
|
Packit |
be8974 |
) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
sub _error {
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my( $args, $err_msg ) = @_ ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# get the error function to use
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
my $func = $err_func{ $args->{'err_mode'} || 'croak' } ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# if we didn't find it in our error function hash, they must have set
|
|
Packit |
be8974 |
# it to quiet and we don't do anything.
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
return unless $func ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# call the carp/croak function
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
$func->($err_msg) ;
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
# return a hard undef (in list context this will be a single value of
|
|
Packit |
be8974 |
# undef which is not a legal in-band value)
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
return undef ;
|
|
Packit |
be8974 |
}
|
|
Packit |
be8974 |
|
|
Packit |
be8974 |
1;
|