|
Packit |
d0f5c2 |
BEGIN {
|
|
Packit |
d0f5c2 |
require Config; import Config;
|
|
Packit |
d0f5c2 |
if ($Config{'extensions'} !~ /\bEncode\b/) {
|
|
Packit |
d0f5c2 |
print "1..0 # Skip: Encode was not built\n";
|
|
Packit |
d0f5c2 |
exit 0;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
if (ord("A") == 193) {
|
|
Packit |
d0f5c2 |
print "1..0 # Skip: EBCDIC\n";
|
|
Packit |
d0f5c2 |
exit 0;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
$| = 1;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use strict;
|
|
Packit |
d0f5c2 |
use File::Basename;
|
|
Packit |
d0f5c2 |
use File::Spec;
|
|
Packit |
d0f5c2 |
use Encode qw(decode encode find_encoding _utf8_off);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
#use Test::More qw(no_plan);
|
|
Packit |
d0f5c2 |
use Test::More tests => 32;
|
|
Packit |
d0f5c2 |
BEGIN { use_ok("Encode::Guess") }
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $ascii = join('' => map {chr($_)}(0x21..0x7e));
|
|
Packit |
d0f5c2 |
my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
|
|
Packit |
d0f5c2 |
my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe));
|
|
Packit |
d0f5c2 |
my $utf8off = $utf8on; _utf8_off($utf8off);
|
|
Packit |
d0f5c2 |
my $utf16 = encode('UTF-16', $utf8on);
|
|
Packit |
d0f5c2 |
my $utf32 = encode('UTF-32', $utf8on);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
like(guess_encoding(''), qr/empty string/io, 'empty string');
|
|
Packit |
d0f5c2 |
is(guess_encoding($ascii)->name, 'ascii', 'ascii');
|
|
Packit |
d0f5c2 |
like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
|
|
Packit |
d0f5c2 |
is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
|
|
Packit |
d0f5c2 |
is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
|
|
Packit |
d0f5c2 |
is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
|
|
Packit |
d0f5c2 |
is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
|
|
Packit |
d0f5c2 |
is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
|
|
Packit |
d0f5c2 |
my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
|
|
Packit |
d0f5c2 |
my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
open my $fh, $jisx0208 or die "$jisx0208: $!";
|
|
Packit |
d0f5c2 |
binmode($fh);
|
|
Packit |
d0f5c2 |
$utf8off = join('' => <$fh>);
|
|
Packit |
d0f5c2 |
close $fh;
|
|
Packit |
d0f5c2 |
$utf8on = decode('utf8', $utf8off);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my @jp = qw(7bit-jis shiftjis euc-jp);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Guess->set_suspects(@jp);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
for my $jp (@jp){
|
|
Packit |
d0f5c2 |
my $test = encode($jp, $utf8on);
|
|
Packit |
d0f5c2 |
is(guess_encoding($test)->name, $jp, "JP:$jp");
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
|
|
Packit |
d0f5c2 |
eval{ encode('Guess', $utf8on) };
|
|
Packit |
d0f5c2 |
like($@, qr/not defined/io, "no encode()");
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
{
|
|
Packit |
d0f5c2 |
my $warning;
|
|
Packit |
d0f5c2 |
local $SIG{__WARN__} = sub { $warning = shift };
|
|
Packit |
d0f5c2 |
my $euc_jp = my $euc_jp_clone = encode('euc-jp', $utf8on);
|
|
Packit |
d0f5c2 |
Encode::from_to($euc_jp, 'Guess', 'euc-jp');
|
|
Packit |
d0f5c2 |
is $euc_jp_clone, $euc_jp, "from_to(..., 'Guess')";
|
|
Packit |
d0f5c2 |
ok !$warning, "no warning";
|
|
Packit |
d0f5c2 |
diag $warning if $warning;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my %CJKT =
|
|
Packit |
d0f5c2 |
(
|
|
Packit |
d0f5c2 |
'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
|
|
Packit |
d0f5c2 |
'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
|
|
Packit |
d0f5c2 |
'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
|
|
Packit |
d0f5c2 |
'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Guess->set_suspects(keys %CJKT);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
for my $name (keys %CJKT){
|
|
Packit |
d0f5c2 |
open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
|
|
Packit |
d0f5c2 |
binmode($fh);
|
|
Packit |
d0f5c2 |
$utf8off = join('' => <$fh>);
|
|
Packit |
d0f5c2 |
close $fh;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $test = encode($name, decode('utf8', $utf8off));
|
|
Packit |
d0f5c2 |
is(guess_encoding($test)->name, $name, "CJKT:$name");
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
my $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}";
|
|
Packit |
d0f5c2 |
my $english = "The quick brown fox jumps over the black lazy dog.";
|
|
Packit |
d0f5c2 |
for my $utf (qw/UTF-16 UTF-32/){
|
|
Packit |
d0f5c2 |
for my $bl (qw/BE LE/){
|
|
Packit |
d0f5c2 |
my $test = encode("$utf$bl" => $english);
|
|
Packit |
d0f5c2 |
is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
for my $bl (qw/BE LE/){
|
|
Packit |
d0f5c2 |
my $test = encode("UTF-16$bl" => $ambiguous);
|
|
Packit |
d0f5c2 |
my $result = guess_encoding($test);
|
|
Packit |
d0f5c2 |
ok(! ref($result), "UTF-16$bl:$result");
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Guess->set_suspects();
|
|
Packit |
d0f5c2 |
for my $jp (@jp){
|
|
Packit |
d0f5c2 |
# intentionally set $1 a priori -- see Changes
|
|
Packit |
d0f5c2 |
my $test = "English";
|
|
Packit |
d0f5c2 |
'$1' =~ m/^(.*)/o;
|
|
Packit |
d0f5c2 |
is(guess_encoding($test, ($jp))->name, 'ascii',
|
|
Packit |
d0f5c2 |
"ascii vs $jp (\$1 messed)");
|
|
Packit |
d0f5c2 |
$test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
|
|
Packit |
d0f5c2 |
is(guess_encoding($test, ($jp))->name,
|
|
Packit |
d0f5c2 |
$jp, "$jp vs ascii (\$1 messed)");
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
__END__;
|