|
Packit |
d0f5c2 |
package Encode::Alias;
|
|
Packit |
d0f5c2 |
use strict;
|
|
Packit |
d0f5c2 |
use warnings;
|
|
Packit |
d0f5c2 |
our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
|
|
Packit |
d0f5c2 |
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use Exporter 'import';
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Public, encouraged API is exported by default
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
our @EXPORT =
|
|
Packit |
d0f5c2 |
qw (
|
|
Packit |
d0f5c2 |
define_alias
|
|
Packit |
d0f5c2 |
find_alias
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
our @Alias; # ordered matching list
|
|
Packit |
d0f5c2 |
our %Alias; # cached known aliases
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub find_alias {
|
|
Packit |
d0f5c2 |
my $class = shift;
|
|
Packit |
d0f5c2 |
my $find = shift;
|
|
Packit |
d0f5c2 |
unless ( exists $Alias{$find} ) {
|
|
Packit |
d0f5c2 |
$Alias{$find} = undef; # Recursion guard
|
|
Packit |
d0f5c2 |
for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
|
|
Packit |
d0f5c2 |
my $alias = $Alias[$i];
|
|
Packit |
d0f5c2 |
my $val = $Alias[ $i + 1 ];
|
|
Packit |
d0f5c2 |
my $new;
|
|
Packit |
d0f5c2 |
if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
|
|
Packit |
d0f5c2 |
DEBUG and warn "eval $val";
|
|
Packit |
d0f5c2 |
$new = eval $val;
|
|
Packit |
d0f5c2 |
DEBUG and $@ and warn "$val, $@";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( ref($alias) eq 'CODE' ) {
|
|
Packit |
d0f5c2 |
DEBUG and warn "$alias", "->", "($find)";
|
|
Packit |
d0f5c2 |
$new = $alias->($find);
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( lc($find) eq lc($alias) ) {
|
|
Packit |
d0f5c2 |
$new = $val;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
if ( defined($new) ) {
|
|
Packit |
d0f5c2 |
next if $new eq $find; # avoid (direct) recursion on bugs
|
|
Packit |
d0f5c2 |
DEBUG and warn "$alias, $new";
|
|
Packit |
d0f5c2 |
my $enc =
|
|
Packit |
d0f5c2 |
( ref($new) ) ? $new : Encode::find_encoding($new);
|
|
Packit |
d0f5c2 |
if ($enc) {
|
|
Packit |
d0f5c2 |
$Alias{$find} = $enc;
|
|
Packit |
d0f5c2 |
last;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# case insensitive search when canonical is not in all lowercase
|
|
Packit |
d0f5c2 |
# RT ticket #7835
|
|
Packit |
d0f5c2 |
unless ( $Alias{$find} ) {
|
|
Packit |
d0f5c2 |
my $lcfind = lc($find);
|
|
Packit |
d0f5c2 |
for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
|
|
Packit |
d0f5c2 |
{
|
|
Packit |
d0f5c2 |
$lcfind eq lc($name) or next;
|
|
Packit |
d0f5c2 |
$Alias{$find} = Encode::find_encoding($name);
|
|
Packit |
d0f5c2 |
DEBUG and warn "$find => $name";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
if (DEBUG) {
|
|
Packit |
d0f5c2 |
my $name;
|
|
Packit |
d0f5c2 |
if ( my $e = $Alias{$find} ) {
|
|
Packit |
d0f5c2 |
$name = $e->name;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
$name = "";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
warn "find_alias($class, $find)->name = $name";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
return $Alias{$find};
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub define_alias {
|
|
Packit |
d0f5c2 |
while (@_) {
|
|
Packit |
d0f5c2 |
my $alias = shift;
|
|
Packit |
d0f5c2 |
my $name = shift;
|
|
Packit |
d0f5c2 |
unshift( @Alias, $alias => $name ) # newer one has precedence
|
|
Packit |
d0f5c2 |
if defined $alias;
|
|
Packit |
d0f5c2 |
if ( ref($alias) ) {
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# clear %Alias cache to allow overrides
|
|
Packit |
d0f5c2 |
my @a = keys %Alias;
|
|
Packit |
d0f5c2 |
for my $k (@a) {
|
|
Packit |
d0f5c2 |
if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
|
|
Packit |
d0f5c2 |
DEBUG and warn "delete \$Alias\{$k\}";
|
|
Packit |
d0f5c2 |
delete $Alias{$k};
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( ref($alias) eq 'CODE' && $alias->($k) ) {
|
|
Packit |
d0f5c2 |
DEBUG and warn "delete \$Alias\{$k\}";
|
|
Packit |
d0f5c2 |
delete $Alias{$k};
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif (defined $alias) {
|
|
Packit |
d0f5c2 |
DEBUG and warn "delete \$Alias\{$alias\}";
|
|
Packit |
d0f5c2 |
delete $Alias{$alias};
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif (DEBUG) {
|
|
Packit |
d0f5c2 |
require Carp;
|
|
Packit |
d0f5c2 |
Carp::croak("undef \$alias");
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias
|
|
Packit |
d0f5c2 |
use Encode ();
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Allow latin-1 style names as well
|
|
Packit |
d0f5c2 |
# 0 1 2 3 4 5 6 7 8 9 10
|
|
Packit |
d0f5c2 |
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Allow winlatin1 style names as well
|
|
Packit |
d0f5c2 |
our %Winlatin2cp = (
|
|
Packit |
d0f5c2 |
'latin1' => 1252,
|
|
Packit |
d0f5c2 |
'latin2' => 1250,
|
|
Packit |
d0f5c2 |
'cyrillic' => 1251,
|
|
Packit |
d0f5c2 |
'greek' => 1253,
|
|
Packit |
d0f5c2 |
'turkish' => 1254,
|
|
Packit |
d0f5c2 |
'hebrew' => 1255,
|
|
Packit |
d0f5c2 |
'arabic' => 1256,
|
|
Packit |
d0f5c2 |
'baltic' => 1257,
|
|
Packit |
d0f5c2 |
'vietnamese' => 1258,
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
init_aliases();
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub undef_aliases {
|
|
Packit |
d0f5c2 |
@Alias = ();
|
|
Packit |
d0f5c2 |
%Alias = ();
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub init_aliases {
|
|
Packit |
d0f5c2 |
undef_aliases();
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Try all-lower-case version should all else fails
|
|
Packit |
d0f5c2 |
define_alias( qr/^(.*)$/ => '"\L$1"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# UTF/UCS stuff
|
|
Packit |
d0f5c2 |
define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
|
|
Packit |
d0f5c2 |
define_alias(
|
|
Packit |
d0f5c2 |
qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
|
|
Packit |
d0f5c2 |
qr/^UCS-?4-?(BE|LE|)?$/i => 'uc("UTF-32$1")',
|
|
Packit |
d0f5c2 |
qr/^iso-10646-1$/i => '"UCS-2BE"'
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
define_alias(
|
|
Packit |
d0f5c2 |
qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
|
|
Packit |
d0f5c2 |
qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
|
|
Packit |
d0f5c2 |
qr/^UTF-?(16|32)$/i => '"UTF-$1"',
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# ASCII
|
|
Packit |
d0f5c2 |
define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
|
|
Packit |
d0f5c2 |
define_alias( 'C' => 'ascii' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Allow variants of iso-8859-1 etc.
|
|
Packit |
d0f5c2 |
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# At least HP-UX has these.
|
|
Packit |
d0f5c2 |
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# More HP stuff.
|
|
Packit |
d0f5c2 |
define_alias(
|
|
Packit |
d0f5c2 |
qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
|
|
Packit |
d0f5c2 |
'"${1}8"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# The Official name of ASCII.
|
|
Packit |
d0f5c2 |
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# This is a font issue, not an encoding issue.
|
|
Packit |
d0f5c2 |
# (The currency symbol of the Latin 1 upper half
|
|
Packit |
d0f5c2 |
# has been redefined as the euro symbol.)
|
|
Packit |
d0f5c2 |
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
|
|
Packit |
d0f5c2 |
'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
define_alias(
|
|
Packit |
d0f5c2 |
qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
|
|
Packit |
d0f5c2 |
hebrew|arabic|baltic|vietnamese)$/ix =>
|
|
Packit |
d0f5c2 |
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Common names for non-latin preferred MIME names
|
|
Packit |
d0f5c2 |
define_alias(
|
|
Packit |
d0f5c2 |
'ascii' => 'US-ascii',
|
|
Packit |
d0f5c2 |
'cyrillic' => 'iso-8859-5',
|
|
Packit |
d0f5c2 |
'arabic' => 'iso-8859-6',
|
|
Packit |
d0f5c2 |
'greek' => 'iso-8859-7',
|
|
Packit |
d0f5c2 |
'hebrew' => 'iso-8859-8',
|
|
Packit |
d0f5c2 |
'thai' => 'iso-8859-11',
|
|
Packit |
d0f5c2 |
);
|
|
Packit |
d0f5c2 |
# RT #20781
|
|
Packit |
d0f5c2 |
define_alias(qr/\btis-?620\b/i => '"iso-8859-11"');
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
|
|
Packit |
d0f5c2 |
# And Microsoft has their own naming (again, surprisingly).
|
|
Packit |
d0f5c2 |
# And windows-* is registered in IANA!
|
|
Packit |
d0f5c2 |
define_alias(
|
|
Packit |
d0f5c2 |
qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Sometimes seen with a leading zero.
|
|
Packit |
d0f5c2 |
# define_alias( qr/\bcp037\b/i => '"cp37"');
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Mac Mappings
|
|
Packit |
d0f5c2 |
# predefined in *.ucm; unneeded
|
|
Packit |
d0f5c2 |
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
|
|
Packit |
d0f5c2 |
define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' );
|
|
Packit |
d0f5c2 |
# http://rt.cpan.org/Ticket/Display.html?id=36326
|
|
Packit |
d0f5c2 |
define_alias( qr/^macintosh$/i => '"MacRoman"' );
|
|
Packit |
d0f5c2 |
# https://rt.cpan.org/Ticket/Display.html?id=78125
|
|
Packit |
d0f5c2 |
define_alias( qr/^macce$/i => '"MacCentralEurRoman"' );
|
|
Packit |
d0f5c2 |
# Ououououou. gone. They are different!
|
|
Packit |
d0f5c2 |
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# Standardize on the dashed versions.
|
|
Packit |
d0f5c2 |
define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
unless ($Encode::ON_EBCDIC) {
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# for Encode::CN
|
|
Packit |
d0f5c2 |
define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
|
|
Packit |
d0f5c2 |
# CP936 doesn't have vendor-addon for GBK, so they're identical.
|
|
Packit |
d0f5c2 |
define_alias( qr/^gbk$/i => '"cp936"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# This fixes gb2312 vs. euc-cn confusion, practically
|
|
Packit |
d0f5c2 |
define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# for Encode::JP
|
|
Packit |
d0f5c2 |
define_alias( qr/\bjis$/i => '"7bit-jis"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bujis$/i => '"euc-jp"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bsjis$/i => '"shiftjis"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bwindows-31j$/i => '"cp932"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# for Encode::KR
|
|
Packit |
d0f5c2 |
define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# This fixes ksc5601 vs. euc-kr confusion, practically
|
|
Packit |
d0f5c2 |
define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# for Encode::TW
|
|
Packit |
d0f5c2 |
define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# https://github.com/dankogai/p5-encode/issues/37
|
|
Packit |
d0f5c2 |
define_alias(qr/cp65000/i => '"UTF-7"');
|
|
Packit |
d0f5c2 |
define_alias(qr/cp65001/i => '"utf-8-strict"');
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# utf8 is blessed :)
|
|
Packit |
d0f5c2 |
define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# At last, Map white space and _ to '-'
|
|
Packit |
d0f5c2 |
define_alias( qr/^([^\s_]+)[\s_]+([^\s_]*)$/i => '"$1-$2"' );
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
1;
|
|
Packit |
d0f5c2 |
__END__
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
|
|
Packit |
d0f5c2 |
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
|
|
Packit |
d0f5c2 |
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
|
|
Packit |
d0f5c2 |
# TODO: Armenian encoding ARMSCII-8
|
|
Packit |
d0f5c2 |
# TODO: Hebrew encoding ISO-8859-8-1
|
|
Packit |
d0f5c2 |
# TODO: Thai encoding TCVN
|
|
Packit |
d0f5c2 |
# TODO: Vietnamese encodings VPS
|
|
Packit |
d0f5c2 |
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
|
|
Packit |
d0f5c2 |
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
|
|
Packit |
d0f5c2 |
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
|
|
Packit |
d0f5c2 |
# Kannada Khmer Korean Laotian Malayalam Mongolian
|
|
Packit |
d0f5c2 |
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 NAME
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Alias - alias definitions to encodings
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SYNOPSIS
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use Encode;
|
|
Packit |
d0f5c2 |
use Encode::Alias;
|
|
Packit |
d0f5c2 |
define_alias( "newName" => ENCODING);
|
|
Packit |
d0f5c2 |
define_alias( qr/.../ => ENCODING);
|
|
Packit |
d0f5c2 |
define_alias( sub { return ENCODING if ...; } );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 DESCRIPTION
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Allows newName to be used as an alias for ENCODING. ENCODING may be
|
|
Packit |
d0f5c2 |
either the name of an encoding or an encoding object (as described
|
|
Packit |
d0f5c2 |
in L<Encode>).
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Currently the first argument to define_alias() can be specified in the
|
|
Packit |
d0f5c2 |
following ways:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=over 4
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item As a simple string.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item As a qr// compiled regular expression, e.g.:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
|
|
Packit |
d0f5c2 |
in order to allow C<$1> etc. to be substituted. The example is one
|
|
Packit |
d0f5c2 |
way to alias names as used in X11 fonts to the MIME names for the
|
|
Packit |
d0f5c2 |
iso-8859-* family. Note the double quotes inside the single quotes.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
(or, you don't have to do this yourself because this example is predefined)
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
If you are using a regex here, you have to use the quotes as shown or
|
|
Packit |
d0f5c2 |
it won't work. Also note that regex handling is tricky even for the
|
|
Packit |
d0f5c2 |
experienced. Use this feature with caution.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=item As a code reference, e.g.:
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
The same effect as the example above in a different way. The coderef
|
|
Packit |
d0f5c2 |
takes the alias name as an argument and returns a canonical name on
|
|
Packit |
d0f5c2 |
success or undef if not. Note the second argument is ignored if provided.
|
|
Packit |
d0f5c2 |
Use this with even more caution than the regex version.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=back
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head3 Changes in code reference aliasing
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
As of Encode 1.87, the older form
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
no longer works.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode up to 1.86 internally used "local $_" to implement this older
|
|
Packit |
d0f5c2 |
form. But consider the code below;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use Encode;
|
|
Packit |
d0f5c2 |
$_ = "eeeee" ;
|
|
Packit |
d0f5c2 |
while (/(e)/g) {
|
|
Packit |
d0f5c2 |
my $utf = decode('aliased-encoding-name', $1);
|
|
Packit |
d0f5c2 |
print "position:",pos,"\n";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Prior to Encode 1.86 this fails because of "local $_".
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head2 Alias overloading
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
You can override predefined aliases by simply applying define_alias().
|
|
Packit |
d0f5c2 |
The new alias is always evaluated first, and when necessary,
|
|
Packit |
d0f5c2 |
define_alias() flushes the internal cache to make the new definition
|
|
Packit |
d0f5c2 |
available.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
|
|
Packit |
d0f5c2 |
# superset of SHIFT_JIS
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
define_alias( qr/shift.*jis$/i => '"cp932"' );
|
|
Packit |
d0f5c2 |
define_alias( qr/sjis$/i => '"cp932"' );
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
If you want to zap all predefined aliases, you can use
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Alias->undef_aliases;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
to do so. And
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Alias->init_aliases;
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
gets the factory settings back.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Note that define_alias() will not be able to override the canonical name
|
|
Packit |
d0f5c2 |
of encodings. Encodings are first looked up by canonical name before
|
|
Packit |
d0f5c2 |
potential aliases are tried.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SEE ALSO
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
L<Encode>, L<Encode::Supported>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=cut
|
|
Packit |
d0f5c2 |
|