| #!/usr/bin/perl |
| ############################################################################## |
| # Tool for using regular expressions against the contents of files in a tar |
| # archive. See 'ptargrep --help' for more documentation. |
| # |
| |
| BEGIN { pop @INC if $INC[-1] eq '.' } |
| use strict; |
| use warnings; |
| |
| use Pod::Usage qw(pod2usage); |
| use Getopt::Long qw(GetOptions); |
| use Archive::Tar qw(); |
| use File::Path qw(mkpath); |
| |
| my(%opt, $pattern); |
| |
| if(!GetOptions(\%opt, |
| 'basename|b', |
| 'ignore-case|i', |
| 'list-only|l', |
| 'verbose|v', |
| 'help|?', |
| )) { |
| pod2usage(-exitval => 1, -verbose => 0); |
| } |
| |
| |
| pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; |
| |
| pod2usage(-exitval => 1, -verbose => 0, |
| -message => "No pattern specified", |
| ) unless @ARGV; |
| make_pattern( shift(@ARGV) ); |
| |
| pod2usage(-exitval => 1, -verbose => 0, |
| -message => "No tar files specified", |
| ) unless @ARGV; |
| |
| process_archive($_) foreach @ARGV; |
| |
| exit 0; |
| |
| |
| sub make_pattern { |
| my($pat) = @_; |
| |
| if($opt{'ignore-case'}) { |
| $pattern = qr{(?im)$pat}; |
| } |
| else { |
| $pattern = qr{(?m)$pat}; |
| } |
| } |
| |
| |
| sub process_archive { |
| my($filename) = @_; |
| |
| _log("Processing archive: $filename"); |
| my $next = Archive::Tar->iter($filename); |
| while( my $f = $next->() ) { |
| next unless $f->is_file; |
| match_file($f) if $f->size > 0; |
| } |
| } |
| |
| |
| sub match_file { |
| my($f) = @_; |
| my $path = $f->name; |
| my $prefix = $f->prefix; |
| if (defined $prefix) { |
| $path = File::Spec->catfile($prefix, $path); |
| } |
| |
| _log("filename: %s (%d bytes)", $path, $f->size); |
| |
| my $body = $f->get_content(); |
| if($body !~ $pattern) { |
| _log(" no match"); |
| return; |
| } |
| |
| if($opt{'list-only'}) { |
| print $path, "\n"; |
| return; |
| } |
| |
| save_file($path, $body); |
| } |
| |
| |
| sub save_file { |
| my($path, $body) = @_; |
| |
| _log(" found match - extracting"); |
| my($fh); |
| my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; |
| if($dir and not $opt{basename}) { |
| _log(" writing to $dir/$file"); |
| $dir =~ s{\A/}{./}; |
| mkpath($dir) unless -d $dir; |
| open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; |
| } |
| else { |
| _log(" writing to ./$file"); |
| open $fh, '>', $file or die "open($file): $!"; |
| } |
| print $fh $body; |
| close($fh); |
| } |
| |
| |
| sub _log { |
| return unless $opt{verbose}; |
| my($format, @args) = @_; |
| warn sprintf($format, @args) . "\n"; |
| } |
| |
| |
| __END__ |
| |
| =head1 NAME |
| |
| ptargrep - Apply pattern matching to the contents of files in a tar archive |
| |
| =head1 SYNOPSIS |
| |
| ptargrep [options] <pattern> <tar file> ... |
| |
| Options: |
| |
| --basename|-b ignore directory paths from archive |
| --ignore-case|-i do case-insensitive pattern matching |
| --list-only|-l list matching filenames rather than extracting matches |
| --verbose|-v write debugging message to STDERR |
| --help|-? detailed help message |
| |
| =head1 DESCRIPTION |
| |
| This utility allows you to apply pattern matching to B<the contents> of files |
| contained in a tar archive. You might use this to identify all files in an |
| archive which contain lines matching the specified pattern and either print out |
| the pathnames or extract the files. |
| |
| The pattern will be used as a Perl regular expression (as opposed to a simple |
| grep regex). |
| |
| Multiple tar archive filenames can be specified - they will each be processed |
| in turn. |
| |
| =head1 OPTIONS |
| |
| =over 4 |
| |
| =item B<--basename> (alias -b) |
| |
| When matching files are extracted, ignore the directory path from the archive |
| and write to the current directory using the basename of the file from the |
| archive. Beware: if two matching files in the archive have the same basename, |
| the second file extracted will overwrite the first. |
| |
| =item B<--ignore-case> (alias -i) |
| |
| Make pattern matching case-insensitive. |
| |
| =item B<--list-only> (alias -l) |
| |
| Print the pathname of each matching file from the archive to STDOUT. Without |
| this option, the default behaviour is to extract each matching file. |
| |
| =item B<--verbose> (alias -v) |
| |
| Log debugging info to STDERR. |
| |
| =item B<--help> (alias -?) |
| |
| Display this documentation. |
| |
| =back |
| |
| =head1 COPYRIGHT |
| |
| Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> |
| |
| This program is free software; you can redistribute it and/or modify it |
| under the same terms as Perl itself. |
| |
| =cut |
| |
| |
| |