|
Packit |
d0f5c2 |
#!./perl
|
|
Packit |
d0f5c2 |
use 5.008001;
|
|
Packit |
d0f5c2 |
BEGIN { pop @INC if $INC[-1] eq '.' }
|
|
Packit |
d0f5c2 |
use strict;
|
|
Packit |
d0f5c2 |
use warnings;
|
|
Packit |
d0f5c2 |
use Encode;
|
|
Packit |
d0f5c2 |
use Getopt::Std;
|
|
Packit |
d0f5c2 |
use Carp;
|
|
Packit |
d0f5c2 |
use Encode::Guess;
|
|
Packit |
d0f5c2 |
$Getopt::Std::STANDARD_HELP_VERSION = 1;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my %opt;
|
|
Packit |
d0f5c2 |
getopts( "huSs:", \%opt );
|
|
Packit |
d0f5c2 |
my @suspect_list;
|
|
Packit |
d0f5c2 |
list_valid_suspects() and exit if $opt{S};
|
|
Packit |
d0f5c2 |
@suspect_list = split /:,/, $opt{s} if $opt{s};
|
|
Packit |
d0f5c2 |
HELP_MESSAGE() if $opt{h};
|
|
Packit |
d0f5c2 |
HELP_MESSAGE() unless @ARGV;
|
|
Packit |
d0f5c2 |
do_guess($_) for @ARGV;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub read_file {
|
|
Packit |
d0f5c2 |
my $filename = shift;
|
|
Packit |
d0f5c2 |
local $/;
|
|
Packit |
d0f5c2 |
open my $fh, '<:raw', $filename or croak "$filename:$!";
|
|
Packit |
d0f5c2 |
my $content = <$fh>;
|
|
Packit |
d0f5c2 |
close $fh;
|
|
Packit |
d0f5c2 |
return $content;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub do_guess {
|
|
Packit |
d0f5c2 |
my $filename = shift;
|
|
Packit |
d0f5c2 |
my $data = read_file($filename);
|
|
Packit |
d0f5c2 |
my $enc = guess_encoding( $data, @suspect_list );
|
|
Packit |
d0f5c2 |
if ( !ref($enc) && $opt{u} ) {
|
|
Packit |
d0f5c2 |
return 1;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
print "$filename\t";
|
|
Packit |
d0f5c2 |
if ( ref($enc) ) {
|
|
Packit |
d0f5c2 |
print $enc->mime_name();
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
print "unknown";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
print "\n";
|
|
Packit |
d0f5c2 |
return 1;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub list_valid_suspects {
|
|
Packit |
d0f5c2 |
print join( "\n", Encode->encodings(":all") );
|
|
Packit |
d0f5c2 |
print "\n";
|
|
Packit |
d0f5c2 |
return 1;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub HELP_MESSAGE {
|
|
Packit |
d0f5c2 |
exec 'pod2usage', $0 or die "pod2usage: $!"
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
__END__
|
|
Packit |
d0f5c2 |
=head1 NAME
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
encguess - guess character encodings of files
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 VERSION
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
$Id: encguess,v 0.2 2016/08/04 03:15:58 dankogai Exp $
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SYNOPSIS
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
encguess [switches] filename...
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head2 SWITCHES
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=over 2
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item -h
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
show this message and exit.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item -s
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
specify a list of "suspect encoding types" to test,
|
|
Packit |
d0f5c2 |
seperated by either C<:> or C<,>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item -S
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
output a list of all acceptable encoding types that can be used with
|
|
Packit |
d0f5c2 |
the -s param
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item -u
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
suppress display of unidentified types
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=back
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head2 EXAMPLES:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=over 2
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item *
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Guess encoding of a file named C<test.txt>, using only the default
|
|
Packit |
d0f5c2 |
suspect types.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
encguess test.txt
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item *
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Guess the encoding type of a file named C<test.txt>, using the suspect
|
|
Packit |
d0f5c2 |
types C<euc-jp,shiftjis,7bit-jis>.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
encguess -s euc-jp,shiftjis,7bit-jis test.txt
|
|
Packit |
d0f5c2 |
encguess -s euc-jp:shiftjis:7bit-jis test.txt
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item *
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Guess the encoding type of several files, do not display results for
|
|
Packit |
d0f5c2 |
unidentified files.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
encguess -us euc-jp,shiftjis,7bit-jis test*.txt
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=back
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 DESCRIPTION
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
The encoding identification is done by checking one encoding type at a
|
|
Packit |
d0f5c2 |
time until all but the right type are eliminated. The set of encoding
|
|
Packit |
d0f5c2 |
types to try is defined by the -s parameter and defaults to ascii,
|
|
Packit |
d0f5c2 |
utf8 and UTF-16/32 with BOM. This can be overridden by passing one or
|
|
Packit |
d0f5c2 |
more encoding types via the -s parameter. If you need to pass in
|
|
Packit |
d0f5c2 |
multiple suspect encoding types, use a quoted string with the a space
|
|
Packit |
d0f5c2 |
separating each value.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SEE ALSO
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
L<Encode::Guess>, L<Encode::Detect>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 LICENSE AND COPYRIGHT
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Copyright 2015 Michael LaGrasta and Dan Kogai.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
This program is free software; you can redistribute it and/or modify it
|
|
Packit |
d0f5c2 |
under the terms of the the Artistic License (2.0). You may obtain a
|
|
Packit |
d0f5c2 |
copy of the full license at:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
L<http://www.perlfoundation.org/artistic_license_2_0>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=cut
|