|
Packit |
fa4fcc |
#!perl
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
# dups: simple script for showing duplicate files
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
=head1 NAME
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
dups - Show Duplicate Files
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
=head1 SYNOPSIS
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
Usage: dups files ...
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
dups is a fast script for discovering duplicate files. It
|
|
Packit |
fa4fcc |
achieves its efficiency by comparing file digests rather than the
|
|
Packit |
fa4fcc |
file contents themselves, the latter being much larger in general.
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
The NIST Secure Hash Algorithm (SHA) is highly collision-resistant,
|
|
Packit |
fa4fcc |
meaning that two files with the same SHA digest have an almost
|
|
Packit |
fa4fcc |
certain probability of being identical.
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
The dups script works by computing the SHA-1 digest of each file
|
|
Packit |
fa4fcc |
and looking for matches. The search can reveal more than one set
|
|
Packit |
fa4fcc |
of duplicates, so the output is written as follows:
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
match1_file1
|
|
Packit |
fa4fcc |
match1_file2
|
|
Packit |
fa4fcc |
match1_file3
|
|
Packit |
fa4fcc |
etc.
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
match2_file1
|
|
Packit |
fa4fcc |
match2_file2
|
|
Packit |
fa4fcc |
etc.
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
=head1 AUTHOR
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
Mark Shelor <mshelor@cpan.org>
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
=head1 SEE ALSO
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
Perl module L<Digest::SHA> or L<Digest::SHA::PurePerl>
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
=cut
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
use strict;
|
|
Packit |
fa4fcc |
use Digest::SHA;
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
die "usage: dups files ...\n" unless @ARGV;
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
my @files = grep { -f $_ } @ARGV;
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
my %dups;
|
|
Packit |
fa4fcc |
for (@files) {
|
|
Packit |
fa4fcc |
my $digest = Digest::SHA->new->addfile($_, "b")->hexdigest;
|
|
Packit |
fa4fcc |
push(@{$dups{$digest}}, $_);
|
|
Packit |
fa4fcc |
}
|
|
Packit |
fa4fcc |
|
|
Packit |
fa4fcc |
for (keys %dups) {
|
|
Packit |
fa4fcc |
my $ref = $dups{$_};
|
|
Packit |
fa4fcc |
if (scalar(@$ref) > 1) {
|
|
Packit |
fa4fcc |
print join("\n\t", @$ref), "\n\n";
|
|
Packit |
fa4fcc |
}
|
|
Packit |
fa4fcc |
}
|