From d0f5c2cd2ba05c065613df51eb570ee45b3c8a0f Mon Sep 17 00:00:00 2001 From: Packit Date: Sep 16 2020 11:50:08 +0000 Subject: perl-Encode-2.97 base --- diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..bdbf08d --- /dev/null +++ b/AUTHORS @@ -0,0 +1,66 @@ +# To give due honour to those who have made the Encode module what it +# is today, here are easily-from-changelogs-extractable people and their +# (hopefully) current and preferred email addresses (as of early 2002, +# if known). +# +# The use of this database for anything else than Encode and/or Perl +# development is strictly forbidden. (Passive distribution with the Perl +# source code kit or CPAN is, of course, allowed.) +# +# This list is in alphabetical order. +-- +Alex Davies +Andreas J. Koenig +Anton Tagunov +Autrijus Tang +Benjamin Goldberg +Bjoern Hoehrmann +Bjoern Jacke +Chris Nandor +Craig A. Berry +Curtis Jewell +Dan Kogai +Dave Evans +Deng Liu +Dominic Dunlop +Elizabeth Mattijsen +Gerrit P. Haase +Gisle Aas +Graham Barr +Gurusamy Sarathy +H.Merijn Brand +Hugo van der Sanden +Inaba Hiroto +Jarkko Hietaniemi +Jesse Vincent +Jungshik Shin +KONNO Hiroharu +Laszlo Molnar +MATSUNO Tokuhiro +MORIYAMA Masayuki +Makamaka +Mark-Jason Dominus +Mattia Barbon +Michael G Schwern +Miron Cuperman +Nicholas Clark +Nick Ing-Simmons +Paul Marquess +Peter Prymmer +Philip Newton +Piotr Fusik +Rafael Garcia-Suarez +Robin Barker +SADAHIRO Tomoyuki +SUGAWARA Hajime +SUZUKI Norio +Simon Cozens +Slaven Rezic +Spider Boardman +Steve Hay +Steve Peters +Tatsuhiko Miyagawa +Tels +Tony Cook +Vadim Konovalov +Yitzchak Scott-Thoennes diff --git a/Byte/Byte.pm b/Byte/Byte.pm new file mode 100644 index 0000000..d105aa2 --- /dev/null +++ b/Byte/Byte.pm @@ -0,0 +1,120 @@ +package Encode::Byte; +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +1; +__END__ + +=head1 NAME + +Encode::Byte - Single Byte Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $greek = encode("iso-8859-7", $utf8); # loads Encode::Byte implicitly + $utf8 = decode("iso-8859-7", $greek); # ditto + +=head1 ABSTRACT + +This module implements various single byte encodings. For most cases it uses +\x80-\xff (upper half) to map non-ASCII characters. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + # ISO 8859 series + (iso-8859-1 is in built-in) + iso-8859-2 latin2 [ISO] + iso-8859-3 latin3 [ISO] + iso-8859-4 latin4 [ISO] + iso-8859-5 [ISO] + iso-8859-6 [ISO] + iso-8859-7 [ISO] + iso-8859-8 [ISO] + iso-8859-9 latin5 [ISO] + iso-8859-10 latin6 [ISO] + iso-8859-11 + (iso-8859-12 is nonexistent) + iso-8859-13 latin7 [ISO] + iso-8859-14 latin8 [ISO] + iso-8859-15 latin9 [ISO] + iso-8859-16 latin10 [ISO] + + # Cyrillic + koi8-f + koi8-r cp878 [RFC1489] + koi8-u [RFC2319] + + # Vietnamese + viscii + + # all cp* are also available as ibm-*, ms-*, and windows-* + # also see L + + cp424 + cp437 + cp737 + cp775 + cp850 + cp852 + cp855 + cp856 + cp857 + cp860 + cp861 + cp862 + cp863 + cp864 + cp865 + cp866 + cp869 + cp874 + cp1006 + cp1250 WinLatin2 + cp1251 WinCyrillic + cp1252 WinLatin1 + cp1253 WinGreek + cp1254 WinTurkish + cp1255 WinHebrew + cp1256 WinArabic + cp1257 WinBaltic + cp1258 WinVietnamese + + # Macintosh + # Also see L + MacArabic + MacCentralEurRoman + MacCroatian + MacCyrillic + MacFarsi + MacGreek + MacHebrew + MacIcelandic + MacRoman + MacRomanian + MacRumanian + MacSami + MacThai + MacTurkish + MacUkrainian + + # More vendor encodings + AdobeStandardEncoding + nextstep + hp-roman8 + +=head1 DESCRIPTION + +To find how to use this module in detail, see L. + +=head1 SEE ALSO + +L + +=cut diff --git a/Byte/Makefile.PL b/Byte/Makefile.PL new file mode 100644 index 0000000..6824bbb --- /dev/null +++ b/Byte/Makefile.PL @@ -0,0 +1,200 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; +use File::Spec::Functions; + +my $name = 'Byte'; +my %tables = ( + byte_t => + [ + # misc. vendors + # 'gsm0338.ucm', now in Encode::GSM0338 + 'nextstep.ucm', + 'hp-roman8.ucm', + 'viscii.ucm', + 'adobeStdenc.ucm', + # koi8 + 'koi8-f.ucm', 'koi8-r.ucm', 'koi8-u.ucm', + # Mac + qw( + macArabic.ucm + macCentEuro.ucm + macCroatian.ucm + macCyrillic.ucm + macFarsi.ucm + macGreek.ucm + macHebrew.ucm + macIceland.ucm + macRoman.ucm + macROMnn.ucm + macRUMnn.ucm + macSami.ucm + macThai.ucm + macTurkish.ucm + macUkraine.ucm + ), + ], + ); + +my %not_here = + map {$_ => 1} +( + '8859-1.ucm', 'cp1252.ucm', # default + qw(cp037.ucm cp1026.ucm cp1047.ucm cp500.ucm cp875.ucm), # EBCDIC + qw(cp932.ucm cp936.ucm cp949.ucm cp950.ucm), # CJK + ); + +opendir(ENC,catdir(updir(),'ucm')) or die $!; +while (defined(my $file = readdir(ENC))) +{ + $file =~ /^(8859|cp).*\.ucm$/io or next; + $not_here{$file} and next; + push(@{$tables{byte_t}},$file); +} +closedir(ENC); + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q" -"O"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/CN/CN.pm b/CN/CN.pm new file mode 100644 index 0000000..830f345 --- /dev/null +++ b/CN/CN.pm @@ -0,0 +1,74 @@ +package Encode::CN; +BEGIN { + if ( ord("A") == 193 ) { + die "Encode::CN not supported on EBCDIC\n"; + } +} +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +# Relocated from Encode.pm + +use Encode::CN::HZ; + +# use Encode::CN::2022_CN; + +1; +__END__ + +=head1 NAME + +Encode::CN - China-based Chinese Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $euc_cn = encode("euc-cn", $utf8); # loads Encode::CN implicitly + $utf8 = decode("euc-cn", $euc_cn); # ditto + +=head1 DESCRIPTION + +This module implements China-based Chinese charset encodings. +Encodings supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + euc-cn /\beuc.*cn$/i EUC (Extended Unix Character) + /\bcn.*euc$/i + /\bGB[-_ ]?2312(?:\D.*$|$)/i (see below) + gb2312-raw The raw (low-bit) GB2312 character map + gb12345-raw Traditional chinese counterpart to + GB2312 (raw) + iso-ir-165 GB2312 + GB6345 + GB8565 + additions + MacChineseSimp GB2312 + Apple Additions + cp936 Code Page 936, also known as GBK + (Extended GuoBiao) + hz 7-bit escaped GB2312 encoding + -------------------------------------------------------------------- + +To find how to use this module in detail, see L. + +=head1 NOTES + +Due to size concerns, C (an extension to C) is distributed +separately on CPAN, under the name L. That module +also contains extra Taiwan-based encodings. + +=head1 BUGS + +When you see C on mails and web pages, they really +mean C encodings. To fix that, C is aliased to C. +Use C when you really mean it. + +The ASCII region (0x00-0x7f) is preserved for all encodings, even though +this conflicts with mappings by the Unicode Consortium. + +=head1 SEE ALSO + +L + +=cut diff --git a/CN/Makefile.PL b/CN/Makefile.PL new file mode 100644 index 0000000..094f016 --- /dev/null +++ b/CN/Makefile.PL @@ -0,0 +1,176 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; +use strict; + +my %tables = (euc_cn_t => ['euc-cn.ucm', + 'cp936.ucm', + 'macChinsimp.ucm', + ], + '2312_t' => ['gb2312.ucm'], + '12345_t' => ['gb12345.ucm'], + ir_165_t => ['ir-165.ucm'], + ); + +unless ($ENV{AGGREGATE_TABLES}){ + my @ucm; + for my $k (keys %tables){ + push @ucm, @{$tables{$k}}; + } + %tables = (); + my $seq = 0; + for my $ucm (sort @ucm){ + # 8.3 compliance ! + my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++); + $tables{$t} = [ $ucm ]; + } +} + +my $name = 'CN'; + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + XSPROTOARG => '-noprototypes', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/Changes b/Changes new file mode 100644 index 0000000..9de550c --- /dev/null +++ b/Changes @@ -0,0 +1,2962 @@ +# Revision history for Perl extension Encode. +# +# $Id: Changes,v 2.97 2018/02/21 12:14:33 dankogai Exp dankogai $ +# +$Revision: 2.97 $ $Date: 2018/02/21 12:14:33 $ +! Encode.xs + Pulled: New perls that fixes + https://github.com/dankogai/p5-encode/issues/129 + https://rt.cpan.org/Ticket/Display.html?id=124399 + https://github.com/dankogai/p5-encode/pull/130 + +2.96 2018/02/11 05:35:26 +! Encode.pm encoding.pm Unicode/Unicode.pm + VERSION++ to make bleadperl happy + + +2.95 2018/02/08 00:26:15 +! Encode.pm Encode.xs Encode/encode.h Unicode/Unicode.pm + Unicode/Unicode.xs encengine.c + Pulled: new perls + https://github.com/dankogai/p5-encode/pull/128 + +2.94 2018/01/09 05:53:00 +! lib/Encode/Alias.pm + Fixed: deep recursion in Encode::find_encoding when decoding + bad MIME header + https://github.com/dankogai/p5-encode/pull/127 +! Encode.pm + Pulled: Include more information about Encode::is_utf8() that it + should not be normally used + https://github.com/dankogai/p5-encode/pull/126 + Pulled: Remove misleading documentation about UTF8 flag + https://github.com/dankogai/p5-encode/pull/125 + +2.93 2017/10/06 22:21:53 +! lib/Encode/MIME/Name.pm t/mime-name.t + Pulled: Add "euc-cn" => "EUC-CN" alias to Encode::MIME::Name + https://github.com/dankogai/p5-encode/pull/124 +! encoding.pm + Pulled: Propagate fatal errors from the encoding pragma back to the caller + Resolves rt #100427 + https://github.com/dankogai/p5-encode/pull/123 + https://rt.cpan.org/Ticket/Display.html?id=100427 +! lib/Encode/CN/HZ.pm lib/Encode/JP/JIS7.pm lib/Encode/MIME/Header.pm + t/decode.t + Pulled: Uninitialized value fixes #122 + https://github.com/dankogai/p5-encode/pull/122 +! Makefile.PL + Pulled: Fix -Werror=declaration-after-statement for gcc 4.1.2 + https://github.com/dankogai/p5-encode/pull/121 + +2.92 2017/07/18 07:15:29 +! Encode.pm MANIFEST lib/Encode/Alias.pm ++ t/use-Encode-Alias.t + Pulled: Fix loading Encode::Alias before Encode + https://github.com/dankogai/p5-encode/pull/118 +! Makefile.PL + Pulled: Fix gccversion Argument "630 20170516" isn't numeric + https://github.com/dankogai/p5-encode/pull/118 +! lib/Encode/MIME/Header.pm t/mime-header.t + Pulled: Encode::MIME::Header: Fix parsing quoted-printable text + in strict mode + https://github.com/dankogai/p5-encode/pull/115 +! Encode.pm + use define_encoding() instead of tweaking $Encode::Encoding{utf8}. + https://github.com/dankogai/p5-encode/commit/208d094b8cf82da488495400ea9a518841fd007a#commitcomment-22698036 + +2.91 2017/06/22 08:11:05 +! Encode.pm + Addressed: RT#122167: use parent q{Encode::Encoding}; fails: + Can't locate object + https://rt.cpan.org/Ticket/Display.html?id=122167 +! Makefile.PL + Pulled: fix gcc warnings for older gcc < 4.0 + https://github.com/dankogai/p5-encode/pull/114 + +2.90 2017/06/10 17:23:50 +! Makefile.PL + Pulled: Include all contributors into META + https://github.com/dankogai/p5-encode/pull/111 +! bin/enc2xs bin/ucmlint encoding.pm + lib/Encode/Encoding.pm lib/Encode/GSM0338.pm t/CJKT.t + Pulled: Where possible do not depend on value of $@, + instead use return value of eval + https://github.com/dankogai/p5-encode/pull/110 +! Encode.xs + Pulled: Fix more XS problems in Encode.xs file + https://github.com/dankogai/p5-encode/pull/109 +! encoding.pm lib/Encode/Encoding.pm t/guess.t + Pulled: Small fixes + https://github.com/dankogai/p5-encode/pull/108 +! Encode.pm Makefile.PL + Pulled: Load modules Encode::MIME::Name and Storable normally + https://github.com/dankogai/p5-encode/pull/107 +! Unicode/Unicode.pm lib/Encode/Alias.pm lib/Encode/Encoding.pm + lib/Encode/Unicode/UTF7.pm + Pulled: Remove no warnings 'redefine'; and correctly loaddependences + https://github.com/dankogai/p5-encode/pull/106 +! Encode.pm Encode.xs Unicode/Unicode.pm Unicode/Unicode.xs + Pulled: Remove PP stubs and reformat predefine_encodings() + https://github.com/dankogai/p5-encode/pull/104 +! Encode.pm Encode.xs + Pulled: Run Encode XS BOOT code at compile time + https://github.com/dankogai/p5-encode/pull/103 +! Encode.pm Unicode/Unicode.pm lib/Encode/Encoding.pm + lib/Encode/Guess.pm lib/Encode/JP/JIS7.pm lib/Encode/MIME/Header.pm + lib/Encode/MIME/Header/ISO_2022_JP.pm + Pulled: Use Encode::define_encoding and propagate carp/croak message + https://github.com/dankogai/p5-encode/pull/102 +! t/truncated_utf8.t t/utf8messages.t + Pulled: Fixes for older perl versions + https://github.com/dankogai/p5-encode/pull/101 +! Encode.xs encoding.pm t/enc_eucjp.t t/enc_utf8.t + Pulled: cperl fixes: encoding undeprecated, no strict hashpairs + https://github.com/dankogai/p5-encode/pull/100 +! MANIFEST + Pulled: Add missing tests into MANIFEST file + https://github.com/dankogai/p5-encode/pull/99 +! Encode.xs t/fallback.t + Pulled: Cleanup code for handling fallback/replacement characters + https://github.com/dankogai/p5-encode/pull/98 + +2.89 2017/04/21 05:20:14 +! Encode.pm Encode.xs MANIFEST t/enc_eucjp.t t/enc_utf8.t ++ t/utf8messages.t + Pulled: Fixes for Encode::utf8 + https://github.com/dankogai/p5-encode/pull/97 +! Encode.pm + Pulled: Fix documentation about CHECK coderef + https://github.com/dankogai/p5-encode/pull/96 +! Encode.xs + Pulled: For efficiency use newSVpvn() instead of newSVpv() + in do_fallback_cb() + https://github.com/dankogai/p5-encode/pull/95 +! Encode.xs + Pulled Call Encode callback function with integer argument correctly + https://github.com/dankogai/p5-encode/pull/94 +! lib/Encode/CN/HZ.pm lib/Encode/GSM0338.pm lib/Encode/JP/JIS7.pm + lib/Encode/KR/2022_KR.pm lib/Encode/MIME/Header.pm + lib/Encode/MIME/Header/ISO_2022_JP.pm lib/Encode/Unicode/UTF7.pm + t/undef.t + Pulled: Fix all Encode modules so their encode(undef) and decode(undef) + calls returns undef + https://github.com/dankogai/p5-encode/pull/93 ++ t/whatwg-aliases.json t/whatwg-aliases.t + Pulled: New (failing) tests for aliases defined in WHATWG Encoding spec #92 + https://github.com/dankogai/p5-encode/pull/92 +! Encode.pm + Pulled: Update documentation for UTF-8 + https://github.com/dankogai/p5-encode/pull/91 +! Encode.xs t/truncated_utf8.t + Pulled: Consume correct number of bytes on malformed +! Encode.pm Unicode/Unicode.pm + Pulled: document str2bytes and bytes2str + https://github.com/dankogai/p5-encode/pull/86 +! Encode.xs t/fallback.t t/truncated_utf8.t + Pulled: Fix appending correct number of Unicode replacement characters + https://github.com/dankogai/p5-encode/pull/84 + +2.88 2016/11/29 23:29:23 +! t/taint.t + Pulled: Fix test t/taint.t to pass when Encode::ConfigLocal is present + https://github.com/dankogai/p5-encode/pull/83 +! Makefile.PL Unicode/Makefile.PL bin/enc2xs lib/Encode/Alias.pm + t/Aliases.t t/enc_data.t t/enc_module.t t/encoding.t t/jperl.t + Pulled: various fixes + https://github.com/dankogai/p5-encode/pull/82 +! t/mime-header.t + Pulled: Fix test t/mime-header.t to pass on HP-UX 11.23/64 U + with perl v5.8.3 + https://github.com/dankogai/p5-encode/pull/81 +! t/Encode.t + Pulled: Extend COW tests for UTF-8 and Latin1 + https://github.com/dankogai/p5-encode/pull/80 +! Encode.xs Unicode/Unicode.xs + Pulled: Rmv impediment to compiling under C++11 + https://github.com/dankogai/p5-encode/pull/78 +! Encode.xs Unicode/Unicode.xs + Pulled: Do not use expressions in macros SvTRUE, SvPV, SvIV, + attr and attr_true + https://github.com/dankogai/p5-encode/pull/77 +! Unicode/Unicode.xs t/magic.t + Pulled: Fix handling of undef, COW and magic scalar argument + in Unicode.xs + https://github.com/dankogai/p5-encode/pull/76 +! Encode.xs encoding.pm + Fix 2 of 3 problems Steve Hay found. + 1. C89 compiler failures (patch attached). + 2. encoding.pm has changed slightly but has no $VERSION++ + Message-Id: + +2.87 2016/10/28 05:03:52 +! Encode.xs t/taint.t + Pulled: Disable _utf8_on and _utf8_off for tainted values + https://github.com/dankogai/p5-encode/pull/74 +! Encode.xs MANIFEST t/rt65541.t t/rt76824.t t/rt86327.t + Pulled: Fix crash 'panic: sv_setpvn called with negative strlen' + https://github.com/dankogai/p5-encode/pull/73 +! Encode.xs MANIFEST t/rt113164.t + Pulled: Fix crash caused by undefined behaviour between + two sequence points + https://github.com/dankogai/p5-encode/pull/72 +! Encode.xs MANIFEST lib/Encode/CN/HZ.pm lib/Encode/Encoder.pm + t/decode.t t/magic.t t/rt85489.t t/utf8ref.t + Pulled: Fix handling of undef, ref, typeglob, UTF8, COW and magic + scalar argument in all XS functions + https://github.com/dankogai/p5-encode/pull/70 +! Encode/_T.e2x t/at-cn.t t/at-tw.t t/enc_data.t t/enc_module.t + t/encoding-locale.t t/encoding.t t/jperl.t t/mime-name.t t/undef.t + Pulled: Fix unit tests + https://github.com/dankogai/p5-encode/pull/69 +! Encode.pm lib/Encode/MIME/Header.pm lib/Encode/MIME/Name.pm + t/mime-header.t t/mime-name.t t/taint.t + Pulled: Encode::MIME::Header clean up + https://github.com/dankogai/p5-encode/pull/68 +! Encode.xs + Pulled: Generate CHECK value functions with newCONSTSUB() + instead with direct XS + https://github.com/dankogai/p5-encode/pull/67 +! Encode.xs + Pulled: Encode::utf8: Fix count of replacement characters + for overflowed and overlong UTF-8 sequences + https://github.com/dankogai/p5-encode/pull/65 +! Encode.xs t/fallback.t t/utf8strict.t + Pulled: Encode::utf8: Fix processing invalid UTF-8 subsequences + https://github.com/dankogai/p5-encode/pull/63 +! Encode.pm t/utf8ref.t + Pulled: Fix return value of Encode::encode_utf8(undef) + https://rt.cpan.org/Ticket/Display.html?id=116904 + https://github.com/dankogai/p5-encode/pull/62 + +2.86 2016/08/10 18:08:45 +! encoding.pm t/enc_data.t t/enc_eucjp.t t/enc_module.t t/enc_utf8.t + t/encoding.t t/jperl.t + Fixed: #116196: [PATCH] Synchronize encoding.pm with blead + https://rt.cpan.org/Ticket/Display.html?id=116196 +! Byte/Makefile.PL + Patched: #111421: Won't build with statically built perls + https://rt.cpan.org/Public/Bug/Display.html?id=111421 +! Encode.xs encoding.pm + Pulled: Fixes for 5.8.x compilation failures + https://github.com/dankogai/p5-encode/pull/60 +! Encode.xs + Patched: RT#116817 [PATCH] Avoid a C++ comment + https://rt.cpan.org/Ticket/Display.html?id=116817 + +2.85 2016/08/04 03:15:58 +! Encode.pm bin/enc2xs bin/encguess bin/piconv bin/ucmlint bin/unidump + Pulled: CVE-2016-1238: avoid loading optional modules from . + https://github.com/dankogai/p5-encode/pull/58 +! Encode.pm t/utf8warnings.t + Pulled: Rethrow 'utf8' warnings in from_to as well #57 + https://github.com/dankogai/p5-encode/pull/57 +! Encode.xs + Pulled and fixed: + Encode::utf8: Performance optimization for strict UTF-8 encoder #56 + https://github.com/dankogai/p5-encode/pull/56 +! t/Encode.t + s/use Test/use Test::More/ +! t/Encode.t t/decode.t + Skip tests that pass typeglobs to decode if perl < v5.16 +! Encode.xs t/cow.t + Patched: #115540 (from_to affecting COW strings) + https://rt.cpan.org/Ticket/Display.html?id=115540 +! Encode.xs t/Encode.t t/decode.t + Merged: RT#115168: + [PATCH] Passing regex globals to decode() results in wrong result + https://rt.cpan.org/Ticket/Display.html?id=115168 +! Makefile.pl + Pulled: t/encoding-locale.t fails with Test::More@0.80 or before. + https://github.com/dankogai/p5-encode/pull/55 +! Encode.pm + Pulled: In-place modifications made explicit in docs for encode(), + decode() and decode_utf8() + https://github.com/dankogai/p5-encode/pull/54 + +2.84 2016/04/11 07:17:02 +! lib/Encode/MIME/Header.pm + Pulled: Encode::MIME::Header: + Update description that this module is only for unstructured header + https://github.com/dankogai/p5-encode/pull/53 +! lib/Encode/MIME/Header.pm t/mime-header.t + Pulled: Encode::MIME::Header: Fix valid_q_chars, '-' needs to be escaped + https://github.com/dankogai/p5-encode/pull/52 + +2.83 2016/03/24 07:49:54 +! lib/Encode/MIME/Header.pm t/mime-header.t + Both decoder and encoder are rewritten by Pali Rohár. + Encoder should be now fully compliant of RFC 2047. + Decoder is less strict to be able to decode + strings generated by old versions of this module. + https://github.com/dankogai/p5-encode/pull/51 +! t/mime-header.t + Add more test vectors from RFC2047, pp.11-12 +! lib/Encode/Supported.pod + merge: Autrijus -> Audrey + https://github.com/dankogai/p5-encode/pull/50 + +2.82 2016/02/06 20:17:24 +! lib/Encode/MIME/Header.pm + lib/Encode/MIME/Header/ISO_2022_JP.pm + t/mime-header.t + Reverted to 2.80 upon the request of whom submitted pull/48 + +2.81 2016/02/06 19:25:22 +! lib/Encode/MIME/Header.pm + lib/Encode/MIME/Header/ISO_2022_JP.pm + t/mime-header.t + Merged: Encode::MIME::Header: Fix decoder and rewrite encoder + > Encoder should be now fully compliant of RFC 2047. + > Decoder is less strict to be able to decode strings + > generated by old versions of this module. + https://github.com/dankogai/p5-encode/pull/48 + ! t/mime-header.t + merge t/mime-header.t @ https://github.com/asjo/p5-encode + https://github.com/asjo/p5-encode/commit/19dcbff63e71909ffda7c151a73c5baaffe2976c + ! t/mime-header.t + Add more test vectors from RFC2047, pp.11-12 + +2.80 2016/01/25 14:54:13 +! lib/Encode/MIME/Header.pm t/mime-header.t + Address #111417: 2.79 breaks Email-MIME-1.936 tests + https://rt.cpan.org/Ticket/Display.html?id=111417 + +2.79 2016/01/22 06:44:53 +! lib/Encode/MIME/Header.pm t/mime-header.t + Address: #88717: + encode('MIME-Header') does not find word boundaries correctly + By addressing this age-old bug, many other open RTs will be closed. + https://rt.cpan.org/Ticket/Display.html?id=88717 +! lib/Encode/MIME/Header.pm + Address RT#107775: Inserts an empty line in an encoded header field + https://rt.cpan.org/Ticket/Display.html?id=107775 +! lib/Encode/Alias.pm + Pulled: Update Alias.pm + https://github.com/dankogai/p5-encode/pull/47 +! Encode.xs Unicode/Unicode.xs + Pulled: static funcs in Encode.xs and Unicode.xs + https://github.com/dankogai/p5-encode/pull/46 +! Unicode/Unicode.pm + Pulled: Unicode.pm: Fix POD error + https://github.com/dankogai/p5-encode/pull/45 +- META.yml +! MANIFEST + META.yml should not be included in the dist file. + It is also obsolete. + +2.78 2015/09/24 02:19:21 +! Makefile.PL + Mend pull/42 again. This time correctly. +! lib/Encode/Supported.pod + Applied: RT#107146: [PATCH] fix a spelling mistake + https://rt.cpan.org/Public/Bug/Display.html?id=107146 + +2.77 2015/09/15 13:53:27 +! Unicode/Unicode.xs Unicode/Unicode.pm + Address RT#107043: If no BOM is found, the routine dies. + When you decode from UTF-(16|32) without -BE or LE without BOM, + Encode now assumes BE accordingly to RFC2781 and the Unicode + Standard version 8.0 + https://rt.cpan.org/Public/Bug/Display.html?id=107043 +! Makefile.PL encoding.t + Mend pull/42 +! Encode.xs Makefile.PL encoding.pm encoding.t + Pulled: precompile 1252 table as that is now the Pod::Simple default + https://github.com/dankogai/p5-encode/pull/42 + +2.76 2015/07/31 02:18:28 +! ucm/koi8-u.ucm + Pulled: Fix 0x95 + https://github.com/dankogai/p5-encode/pull/41 + +2.75 2015/06/30 09:59:53 +! Unicode/Unicode.pm Unicode/Unicode.xs encoding.pm + VERSION++'ed to make bleadperl happy + Message-Id: + +2.74 2015/06/25 +! Unicode/Unicode.xs + Applied: #101486: [PATCH] reduce compiler warnings and stderr noise (again) + https://rt.cpan.org/Ticket/Display.html?id=101486 +! bin/enc2xs + Applied patch: #105471: make Encode build with -pedantic + https://rt.cpan.org/Ticket/Display.html?id=105471 +! Byte/Makefile.PL + CN/Makefile.PL + EBCDIC/Makefile.PL + JP/Makefile.PL + KR/Makefile.PL + Makefile.PL + Symbol/Makefile.PL + TW/Makefile.PL + Applied patch: #102826: non-deterministic Makefiles + https://rt.cpan.org/Ticket/Display.html?id=102826 + +2.73 2015/04/15 23:14:01 +! lib/Encode/MIME/Header.pm + Addressed: RT#104422 + decode('MIME-header') does not properly join similar Q encoded-words + https://rt.cpan.org/Ticket/Display.html?id=104422 + +2.73 2015/04/15 23:14:01 +! MANIFEST ++ t/isa.t +! Encode.pm + Addressed RT#103253: Encode::XS does not inherit from Encode::Encoding + https://rt.cpan.org/Public/Bug/Display.html?id=103253 +! encoding.pm ++ t/encoding-locale.t + Pulled: Rewrite of encoding::_get_locale_encoding for more portability #40 +! encoding.pm + Pulled: encoding.pm: more inlining #39 + https://github.com/dankogai/p5-encode/pull/39 + +2.72 2015/03/14 02:44:39 +! encoding.pm + Copied from bleadperl to be in sync with it again. + http://www.nntp.perl.org/group/perl.perl5.porters/2015/03/msg226576.html + +2.71 2015/03/12 00:03:52 +! encoding.pm + Pulled: Don't fail 'no encoding' on EBCDIC + https://github.com/dankogai/p5-encode/pull/38 +! lib/Encode/Alias.pm t/Aliases.t + Add cp65000 => UTF-7 and cp65001 => utf-8-strict + https://github.com/dankogai/p5-encode/issues/37 +! encoding.pm + Sync w/ bleadperl + https://github.com/dankogai/p5-encode/pull/36 +! bin/encguess + Pulled: show encguess example per #33 + https://github.com/dankogai/p5-encode/pull/34 + +2.70 2015/02/05 10:53:00 +! Makefile.PL + add bin/encguess to EXE_FILES + +2.69 2015/02/05 10:35:11 +! bin/encguess + Refactored so that + * does not depend on non-core module (File::Slurp in particular) + * PODified document + * -s "encA encB" to -s encA,encB which is more shell-friendly + * and more +! MANIFEST ++ bin/encguess + Pulled: Added CLI wrapper for Encode::Guess + https://github.com/dankogai/p5-encode/pull/32 +! Unicode/Unicode.pm + Pulled: Bump $VERSION in module changed since Encode-2.60 + https://github.com/dankogai/p5-encode/pull/31 + +2.68 2015/01/22 10:17:32 +! Pulled: Fix C++ build on Windows with VC++ + https://github.com/dankogai/p5-encode/pull/30 + https://rt.cpan.org/Public/Bug/Display.html?id=82897 +! lib/Encode/MIME/Header.pm t/taint.t + Pulled: maintain taint flag when encoding MIME on old perl + https://github.com/dankogai/p5-encode/pull/29 +! Encode.pm + POD fixes + https://github.com/dankogai/p5-encode/pull/27 +! bin/enc2xs + Addressed: RT#101345: [PATCH] reduce compiler warnings and stderr noise + enc2xs no longer emits verbose messages to STDERR + unless -v switch or $ENV{ENC2XS_VERBOSE} is set. + https://rt.cpan.org/Public/Bug/Display.html?id=101345 + +2.67 2014/12/04 20:13:00 +! t/taint.t + Now skips nonexistent methods like Encode::Detect->encode() should + that be installed. This resolves RT#100105. + https://rt.cpan.org/Ticket/Display.html?id=100105 + +2.66 2014/12/02 23:30:34 $ +! bin/enc2xs + Resolved RT#100656: enc2xs -C fails if URL::Encode::XS is installed + https://rt.cpan.org/Ticket/Display.html?id=100656 + +2.65 2014/11/27 14:08:33 +! Changes Encode.xs bin/enc2xs + Applied 3 patches from jhi: + 0001-For-C-don-t-use-the-array-size-in-forward-declaratiotion + 0002-Unused-variables + 0003-1-needs-casting-to-STRLEN + Message-Id: <54753674.6070909@iki.fi> + +2.64 2014/10/29 15:37:54 +! t/utf8warnings.t MANIFEST + Retouch pull #26 so it works with perl < 5.14 +! Encode.pm ++ t/utf8warnings.t + Pulled: Catch and re-issue utf8 warnings at a higher level + https://github.com/dankogai/p5-encode/pull/26 +! Encode.xs + Pulled: Validate continuations in the incremental UTF-X decoder + https://github.com/dankogai/p5-encode/pull/25 + +2.63 2014/10/19 07:01:15 +! Encode.xs + Applied: RT #99264: call_pv() can reallocate the stack + https://rt.cpan.org/Ticket/Display.html?id=99264 +! Byte/Makefile.PL CN/Makefile.PL EBCDIC/Makefile.PL Encode.xs + JP/Makefile.PL KR/Makefile.PL Symbol/Makefile.PL TW/Makefile.PL + bin/enc2xs encengine.c + Pulled: add PERL_NO_GET_CONTEXT to all dynamic libs + https://github.com/dankogai/p5-encode/pull/24 + +2.62 2014/05/31 12:12:39 +! Encode.pm + s/2013/2014/ on COPYRIGHT section +! Byte/Makefile.PL + CN/Makefile.PL + EBCDIC/Makefile.PL + Encode/Makefile_PL.e2x + Encode.xs + JP/Makefile.PL + KR/Makefile.PL + Symbol/Makefile.PL + TW/Makefile.PL + bin/enc2xs + Merged from perl.git: "Fix Encode 2.60 with g++" + http://perl5.git.perl.org/perl.git/commit/89c2544cd3 + +2.61 2014/05/31 09:48:48 +! bin/piconv + Applied: piconv nit + + Better error handling when the encoding name is nonexistent + Message-Id: <537139A0.1000503@iki.fi> +! Encode.xs + Applied: RT #95466: + fallback definition of SvIsCOW() is wrong + (and hence breaks on 5.8.2 and earlier) + https://rt.cpan.org/Ticket/Display.html?id=95466 + +2.60 2014/04/29 16:25:06 +! Byte/Makefile.PL + CN/Makefile.PL + EBCDIC/Makefile.PL + Encode/Makefile_PL.e2x + Encode/encode.h + JP/Makefile.PL + KR/Makefile.PL + Symbol/Makefile.PL + TW/Makefile.PL + bin/enc2xs + encengine.c + Applied: more Fix Windows build (of Encode) with VC++ 6.0 + http://perl5.git.perl.org/perl.git/commit/9e9002efd1609c7d154f98af43a026320df7582c +! Unicode/Unicode.xs + Addressed: sign extension issue found by Coverity #21 + https://github.com/dankogai/p5-encode/issues/21 +! Encode/encode.h Encode.xs Unicode/Unicode.xs + removed #define U8 U8 + https://rt.perl.org/Ticket/Display.html?id=121554 + http://perl5.git.perl.org/perl.git/commit/2f2b4ff2c154a8e461857f2e82cb815c238d0d94 + +2.59 2014/04/06 17:23:55 +! Byte/Makefile.PL + CN/Makefile.PL + EBCDIC/Makefile.PL + Encode.pm + Encode.xs + Encode/Makefile_PL.e2x + JP/Makefile.PL + KR/Makefile.PL + Symbol/Makefile.PL + TW/Makefile.PL + bin/enc2xs + Restored the signature of Encode_XSEncoding() to address RT#94478 + * While https://github.com/dankogai/p5-encode/pull/20 + pulls the symnames via argument thus breaks the compatibility + with Encode::XX modules with *.ucm, the restored version + pulls the symanmes via enc->name[0] so the added 2nd argument + is no longer needed. + https://rt.cpan.org/Public/Bug/Display.html?id=94478 + +2.58 2014/03/28 02:37:42 +! bin/piconv + Addressed: piconv bug of decoding UTF-16 (with fix) + https://github.com/dankogai/p5-encode/issues/19 +! Byte/Makefile.PL + CN/Makefile.PL + EBCDIC/Makefile.PL + Encode.pm + Encode.xs + Encode/Makefile_PL.e2x + JP/Makefile.PL + KR/Makefile.PL + Symbol/Makefile.PL + TW/Makefile.PL + bin/enc2xs + Pulled: Remap symname [RT #94221] + https://github.com/dankogai/p5-encode/pull/20 + https://rt.cpan.org/Public/Bug/Display.html?id=94221 +! Encode.pm + Pulled: [doc] clarify that CHECK coderefs return octets #18 + https://github.com/dankogai/p5-encode/pull/18 + +2.57 2014/01/03 04:52:36 +! encengine.c + Pulled: sun compiler (maybe others) doesn't like UTF-8 in the source + https://github.com/dankogai/p5-encode/pull/17 +! bin/enc2xs + Merged RT#91763: POD errors + https://rt.cpan.org/Ticket/Display.html?id=91763 + +2.56 2013/12/22 13:40:00 +! Encode.pm t/Encode.t + Merged RT#91569: [PATCH] decode_utf8 and non-PVs + https://rt.cpan.org/Ticket/Display.html?id=91569 + +2.55 2013/09/14 07:51:59 +! Encode.pm + Makefile.PL + Unicode/Unicode.pm + lib/Encode/Alias.pm + lib/Encode/CN/HZ.pm + lib/Encode/Encoder.pm + lib/Encode/Encoding.pm + lib/Encode/GSM0338.pm + lib/Encode/Guess.pm + lib/Encode/JP/JIS7.pm + lib/Encode/KR/2022_KR.pm + lib/Encode/MIME/Header.pm + lib/Encode/MIME/Header/ISO_2022_JP.pm + lib/Encode/Unicode/UTF7.pm + t/Encoder.t + replaced 'use base' with 'use parent' + base.pm is an heavy module for what it is used for. + Fortunately it has a tiny replacement, parent.pm + that is on CPAN but also in perl core since 5.10.1. + https://github.com/dankogai/p5-encode/pull/15 + +2.54 2013/08/29 16:47:39 +! Encode.xs ++ t/cow.t + Addressed: COW breakage with _utf8_on() + https://rt.cpan.org/Ticket/Display.html?id=88230 +! Encode.pm + Reverted the document accordingly to #11 + https://github.com/dankogai/p5-encode/pull/10 ++ t/decode.t + Unit test for decoding behavior change in #11 + https://github.com/dankogai/p5-encode/pull/12 + +2.53 2013/08/29 15:20:31 +! Encode.pm + Merged: Do not short-circuit decode_utf8 with utf8 flags + https://github.com/dankogai/p5-encode/pull/11 + Merged: document decode_utf8 behaviour more precise + https://github.com/dankogai/p5-encode/pull/10 +! Makefile.PL + Added repository cpan metadata + https://github.com/dankogai/p5-encode/pull/9 + +2.52 2013/08/14 02:29:54 +! ucm/*.ucm + Addressed: + Unicode Mappping tables are missing Unicode Inc. license notification + All files including "as long as this notice remains attached" now + have that notice attached in the comment section. (cp* and mac* + do not since their source files do not include that notice) + https://rt.cpan.org/Ticket/Display.html?id=87340 +! lib/Encode/MIME/Header.pm + t/mime-header.t + Addressed: encoding "0" with MIME-Headers gets a blank string + https://rt.cpan.org/Ticket/Display.html?id=87831 +! Encode.pm + Addressed: Documentation buglet + https://rt.cpan.org/Ticket/Display.html?id=84992 +! Byte/Makefile.PL CN/Makefile.PL EBCDIC/Makefile.PL + Encode/Makefile_PL.e2x JP/Makefile.PL KR/Makefile.PL + Symbol/Makefile.PL TW/Makefile.PL + Applied: Patch to output #includes in deterministic order + https://rt.cpan.org/Ticket/Display.html?id=86974 + +2.51 2013/04/29 22:19:11 +! Encode.xs + Addressed: Encode.xs doesn't compile with Microsoft C compiler + https://rt.cpan.org/Public/Bug/Display.html?id=84920 +! MANIFEST + Addressed: t/taint.t missing + https://rt.cpan.org/Public/Bug/Display.html?id=84919 + +2.50 2013/04/26 18:30:46 +! Encode.xs Unicode/Unicode.xs + lib/Encode/Unicode/UTF7.pm lib/CN/HZ.pm lib/Encode/GSM0338.pm + t/taint.t + Addressed: Encode::encode and Encode::decode + gratuitously launders tainted data + Taintedness now propagates as it should. + https://rt.cpan.org/Ticket/Display.html?id=84879 +! encoding.pm + Addressed: 5.18 deprecation + https://rt.cpan.org/Ticket/Display.html?id=84709 +! bin/piconv + Applied: Update piconv documentation + https://rt.cpan.org/Ticket/Display.html?id=84695 + +2.49 2013/03/05 03:12:49 +! Encode.xs + Addressed: Encoding objects leak memory if decoding fails + https://github.com/dankogai/p5-encode/issues/8 + +2.48 2013/02/18 02:23:56 +! encoding.pm + t/Mod_EUCJP.pm t/enc_data.t t/enc_eucjp.t t/enc_module.t t/enc_utf8.t + t/encoding.t t/jperl.t + [PATCH] Deprecate encoding.pm + https://rt.cpan.org/Ticket/Display.html?id=81255 +! Encode/Supported.pod + Fixed: Pod errors + https://rt.cpan.org/Ticket/Display.html?id=81426 +! Encode.pm t/Encode.t + [PATCH] Fix for shared hash key scalars + https://rt.cpan.org/Ticket/Display.html?id=80608 +! Encode.pm + Fixed: Uninitialized value warning from Encode->encodings() + https://rt.cpan.org/Ticket/Display.html?id=80181 +! Makefile.PL + Install to 'site' instead of 'perl' when perl version is 5.11+ + https://rt.cpan.org/Ticket/Display.html?id=78917 +! Encode/Makefile_PL.e2x + find enc2xs.bat if it works on windows. + https://github.com/dankogai/p5-encode/pull/7 +! t/piconv.t + Fix finding piconv in t/piconv.t + https://github.com/dankogai/p5-encode/pull/6 + +2.47 2012/08/15 05:36:16 +! Encode.pm + POD Fixes: Copyright and mail address +! Makefile.PL + Added LICENSE => 'perl' +! lib/Encode/GSM0338.pm t/gsm0338.t + REALLY fixed RT#75670: Wrong decoding for GSM 3.38 character \x09 + ucm/gsm0338.ucm is dropped from MANIFEST since 2.25 + but I was fixing the wrong file! + https://rt.cpan.org/Ticket/Display.html?id=75670 + +! 2.46 2012/08/12 05:49:30 +! Encode.pm + Fixed: RT#78917 for I18N-Charset: Fails with Encode 2.45 + To be more exact, 2.45 broke Encode->encodings(':all') + https://rt.cpan.org/Ticket/Display.html?id=78917 + +2.45 2012/08/05 23:08:49 +! lib/Encode/Alias.pm + Addressed RT#78125: Missed Mac Alias x-mac-ce + https://rt.cpan.org/Ticket/Display.html?id=78125 +! lib/Encode/Unicode/UTF7.pm + Applied the patch in RT#76711 + https://rt.cpan.org/Ticket/Display.html?id=76711 +! ucm/gsm0338.ucm + Addressed RT#75670: Wrong decoding for GSM 3.38 character \x09 + https://rt.cpan.org/Ticket/Display.html?id=75670 +! Encode.pm + Applied the patch in RT#72519 + https://rt.cpan.org/Ticket/Display.html?id=72519 +! Unicode/Unicode.xs + t/Unicode.t + Bug fixes in Unicode.xs by chansen + https://github.com/dankogai/p5-encode/pull/5 +! Encode.pm + various POD improvements by daxim + https://github.com/dankogai/p5-encode/pull/4 + +2.44 2011/08/09 07:49:44 +! Unicode/Unicode.xs + Addressed the following: + Date: Fri, 22 Jul 2011 13:58:43 +0200 + From: Robert Zacek + To: perl5-security-report@perl.org + Subject: Unicode.xs!decode_xs n-byte heap-overflow +! Encode.pm encoding.pm +! lib/Encode/Alias.pm lib/Encode/Encoder.pm lib/Encode/Guess.pm + Applied: RT#69735: patch for use constant DEBUG => + https://rt.cpan.org/Ticket/Update.html?id=69735 + +2.43 2011/05/21 23:14:43 +! lib/Encode/Alias.pm + Addressed RT#68361: Encode::Bytes x-mac-... aliases missing + https://rt.cpan.org/Ticket/Display.html?id=68361 +! Encode.pm + Applied the 0001-Fix-typo-in-pod.patch + https://rt.cpan.org/Ticket/Update.html?id=64381 + Addressed RT#65796 Deep recursion error finding invalid charset + https://rt.cpan.org/Ticket/Update.html?id=65796 + Applied a jumbo doc patch by Tom Christiansen + Message-Id: <14795.1304618434@chthon> + +2.42 2010/12/31 22:48:48 +! Encode.xs +! Unicode/Unicode.xs + Applied: RT#64371: Update for 5.14 API changes + http://rt.cpan.org/Ticket/Display.html?id=64371 + +2.41 2010/12/23 11:05:58 +! lib/Encode/MIME/Header.pm + Applied: RT#63387 encode of MIME-Header inserts too much whitespace + http://rt.cpan.org/Ticket/Display.html?id=63387 +! t/Aliases.t lib/Encode/Alias.pm + Applied: RT#63286: Various Encode::Alias improvements + http://rt.cpan.org/Ticket/Display.html?id=63286 + +2.40 2010/09/18 18:39:51 +! Encode.pm Encode.xs ++ t/utf8ref.t + Addressed: RT#59981: find_encoding("UTF-8")->encode crashes + decode_utf8() is now a little faster, too. + http://rt.cpan.org/Ticket/Display.html?id=59981 + http://rt.cpan.org/Ticket/Display.html?id=58541 +! lib/Encode/Unicode/UTF7.pm + Addressed: RT#56443 utf-8 flag is not turned off after calling + Encode::encode('UTF-7', $string) to encode an ascii string + http://rt.cpan.org/Ticket/Display.html?id=56443 +! t/utf8strict.t + Addressed: RT#57799 + http://rt.cpan.org/Ticket/Display.html?id=57799 +! lib/Encode/Guess.pm + Addressed: RT#46080: guess_encoding documentation + http://rt.cpan.org/Ticket/Display.html?id=46080 +! ucm/nextstep.ucm + Addressed: RT#59668: nextstep encoding is broken - missing ASCII characters + http://rt.cpan.org/Ticket/Display.html?id=59668 +! lib/Encode/MIME/Header.pm t/mime-header.t + Addressed: RT#52103: Encode::MIME::Header encoded words not separated by + white space + http://rt.cpan.org/Ticket/Display.html?id=52103 +! t/guess.t lib/Encode/Guess.pm + Addressed: Encode: silenced a warning by from_to(..., 'Guess', ...) + http://coderepos.org/share/changeset/37731 + +2.39 2009/11/26 09:23:59 +! Encode.xs t/fallback.t + $utf8 = decode('utf8', $malformed, sub{ ... }) # now works! + http://rt.cpan.org/Ticket/Display.html?id=51204 +! t/CJKT.t t/guess.t t/perlio.t + $ENV{'PERL_CORE'} tricks removed since they are no longer necessary. + Message-Id: <20091116161513.GA25556@bestpractical.com> + +2.38 2009/11/16 14:08:13 +! Encode.xs + Addressed: Encode memory corruption [perl #70528] + Message-Id: +! t/Unicode.t Unicode/Unicode.xs + Patched: #51263: set magic is not applied when modifying encode arguments + http://rt.cpan.org/Ticket/Display.html?id=51263 +! Encode.xs + Patched: #51204: Callback CHECK not supported for UTF-8 decoder/encoder + http://rt.cpan.org/Ticket/Display.html?id=51204 +! Byte/Byte.pm CN/CN.pm Changes JP/JP.pm KR/KR.pm TW/TW.pm + Unicode/Unicode.pm bin/enc2xs lib/Encode/Supported.pod + Fix URLs + http://rt.cpan.org/Ticket/Display.html?id=49776 +! t/CJKT.t t/guess.t t/perlio.t t/piconv.t + $PERL_CORE trick is now off for perl 5.11 or better. + Message-Id: + Message-Id: + Message-Id: <20090907154908.GS60303@plum.flirble.org> + Message-Id: <20090907161509.GN8057@iabyn.com> + +2.37 2009/09/06 14:32:21 +! Encode.xs + fixed: compilation failure on compilers not supporting C99 + http://rt.cpan.org/Ticket/Display.html?id=49466 + +2.36 2009/09/06 09:03:07 +! Encode.xs + fixed: 'find_encoding("utf8")->decode(undef)' causes segmentation fault + http://rt.cpan.org/Ticket/Display.html?id=49462 + +2.35 2009/07/13 02:06:30 +! lib/Encode/MIME/Header.pm + Addressed RT #40027: + decode of MIME-Header removes too much whitespace + http://rt.cpan.org/Ticket/Display.html?id=40027 + http://rt.cpan.org/Ticket/Display.html?id=42902 +! t/piconv.t + Addressed by CSJEWELL: t/piconv.t loops infinitely on Win32 + http://rt.cpan.org/Ticket/Display.html?id=47760 + +2.34 2009/07/08 13:34:15 +! bin/piconv + duplicate-BOM problem now fixed. + Message-Id: <10ECB9B7-006E-4570-9EB6-51C49F04ADCF@dan.co.jp> +! bin/piconv ++ t/piconv.t + patches and tests by SREZIC + Message-Id: <4A5366DA.8050801@iconmobile.com> +! Makefile.PL + man* removed on behalf of blead + Message-Id: <20090326135219.GU18164@plum.flirble.org> + +2.33 2009/03/25 07:55:57 +! lib/Encode/MIME/Header.pm + Decontaminated $& which sneaked in on 2.31. + Message-Id: <67FC9F3A39C746DA95AAB6BB01539099@robmhp> + Message-Id: <693254b90903242352x2dc26ba6p5e68deb871fa88ae@mail.gmail.com> + http://coderepos.org/share/changeset/31542 + +2.32 2009/03/07 07:32:37 +! lib/Encode/Alias.pm t/Alias.t + Encode now resolves 'en_US.UTF-8' to utf-8-strict like 'ja_JP.euc' + Those who set locale on their shells should be happier now. +! AUTHORS + added tokuhirom +! Encode.pm + "encode(undef, 'str') should die earlier" + http://coderepos.org/share/changeset/30790 + +2.31 2009/02/16 06:18:09 +! lib/Encode/MIME/Header.pm + "Revert [29767] and [29771] since it breaks perl 5.8" by miyagawa + http://coderepos.org/share/changeset/30111 + +2.30 2009/02/15 17:44:13 +! encoding.pm + fixed regexes, et cetera. by drry + http://coderepos.org/share/changeset/29767 +! lib/Encode/MIME/Header.pm + Addressed: Encode::MIME::Header::decode should respect CHECK + http://rt.cpan.org/Ticket/Display.html?id=43204 + http://coderepos.org/share/changeset/29767 + +2.29 2009/02/01 13:14:37 +! Encode.pm + VERSION++ just to make PAUSE happy + Message-Id: <877i4anwwt.fsf@k75.linux.bogus> + +2.28 Date: 2009/02/01 12:30:18 +! Unicode/Unicode.xs + Latest refactoring broke the backward compatibility + w/ Perl 5.8.6 and before now restored + Message-Id: <1233185156.DABa130.74940@basic2.hostingcompartido.com> + Message-Id: <693254b90902010027x277a5d0fm4f5700ba2f276239@mail.gmail.com> +! lib/Encode/MIME/Header.pm + Addressed: Split header lines are joined incorrectly + http://rt.cpan.org/Ticket/Display.html?id=42902 + +2.27 2009/01/21 22:55:07 +! lib/Encode/MIME/Header.pm t/mime-header.t + Addressed: Encode::MIME::Header MIME-Q encoding truncates + trailing zeros in some circumstances + http://rt.cpan.org/Ticket/Display.html?id=42627 +! lib/Encode/Alias.pm + Added alias: unicode-1-1-utf-7 + http://rt.cpan.org/Ticket/Display.html?id=38558 +! Encode.pm + Documented: _utf8_on() does not work for tainted values + http://rt.cpan.org/Ticket/Display.html?id=41163 +! bin/enc2xs + s[oss.software.ibm.com/icu][www.icu-project.org]g + http://rt.cpan.org/Ticket/Display.html?id=40245 +! lib/Encode/Guess.pm t/guess.t + Addressed:Empty file should produce an error message + http://rt.cpan.org/Ticket/Display.html?id=38652 +! Unicode/Unicode.xs AUTHORS + Refactored by Alex Davies + http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2007-10/msg00745.html + Message-Id: <7637669B2E3D46B187591747DA27F4C8@Amelie> + +2.26 2008/07/01 20:56:17 +! Encode.pm + Absense of Encode::ConfigLocal no longer carps no matter what. + http://bugzilla.redhat.com/show_bug.cgi?id=435505#c2 + http://rt.cpan.org/Ticket/Display.html?id=28638 + http://rt.cpan.org/Ticket/Display.html?id=11511 +! lib/Encode/JIS7.pm + use encoding 'utf8' and 'iso-2022-jp' glitches on perl 5.10 + Thanks, MIYAGAWA + Message-Id: <693254b90807011224h3ab50d76v50c6fea87baf223c@mail.gmail.com> +! lib/Encode/Alias.pm t/Aliases.t + macintosh' not recognize as MacRoman + http://rt.cpan.org/Ticket/Display.html?id=36326 +! Makefile.PL + s{INC => "-I./Encode"} + {INC => '-I' . File::Spec->catfile( '.', 'Encode' )} + To prevent some platforms from forgetting to include Encode/encode.h. + http://rt.cpan.org/Ticket/Display.html?id=36348 + +2.25 2008/05/07 20:56:05 +! Encode.pm + added ':default' to Exporter option. +! lib/Encode/GSM0338.pm + GSM0338 now handles coderef in CHECK + http://rt.cpan.org/Ticket/Display.html?id=31335 +! Makefile.PL + Perl 5.10/Encode 2.24: Tiny typo in Encode's Makefile.PL arg processing + Message-Id: <961C2A4F-92B3-416D-A9F9-E7B0ADA9F134@fsck.com> +! lib/Encode/Alias.pm + "This fix for Encode::Alias should make Solaris happy:" + Message-ID: <47D886D9.6060001@iki.fi> + +2.24 2008/03/12 09:51:11 +! lib/Encode/Config.pm + adds and fixes also adds cp858 support. +! Encode.pm encoding.pm lib/Encode/Alias.pm ucm/cp858.ucm + Merged perl@33486. + > Change 33486 by rgs@scipion on 2008/03/12 08:50:11 + An unfortunate side-effect of Encode and Encode::Alias use'ing each + other, and Encode::Alias exporting functions into Encode for it to use + as methods, broke the loading of the find_alias() Encode method in some + cases since 5.10. Breaking the recursive inheritance fixes it. + Message-Id: +! Encode.pm + POD fix by tels + Message-Id: <200711281835.36125@bloodgate.com> +! bin/ucmlint + Fix by MIYAGAWA via CodeRepos + http://coderepos.org/share/changeset/1791 +! encoding.pm t/mime_header_iso2022jp.t + ported back from Perl 5.10-RC1 + +2.23 2007/05/29 18:15:32 +! Encode.xs + got rid of global fallback_cb; encode_method() now takes one more + argument which is a coderef to fallback. This should make + encode_method() thread-safe. +! Encode.pm + Added perluniintro, perlunifaq, and perlunitut to POD +! Encode.xs + Plug a memory leak in Encode -- by rgs + Message-Id: +! Unicode/Unicode.pm + POD fixes on UTF-16LE + http://aspn.activestate.com/ASPN/Mail/Message/perl5-porters/3486118 +! Makefile.PL + man page generation is now conditional; yes by default but no if $PERL_CORE + Message-Id: + +2.22 2007/05/29 07:35:27 +! Encode.pm + from_to() does not honor the check while decoding. That's a feature. + To make sure it is a feature it is mentioned in the POD. + http://rt.cpan.org/NoAuth/Bug.html?id=27277 +! Makefile.pl + Encode used to suppress man page generation. Now it does. + http://rt.cpan.org/NoAuth/Bug.html?id=27200 +! Encode.pm Encode.xs t/fallback.t + Addressed: (de|en)code("ascii", "\x{3000}", sub{ $_[0] }) segfaults + Reported by MIYAGAWA + +2.21 2007/05/12 06:42:19 ++ lib/Encode/MIME/Name.pm t/mime-name.t +! Encode.pm Encode.xs lib/Encode/Encoding.pm + new method: mime_name() + inspired by: MIYAGAWA +! t/encoding.t + Subject: Re: Compress::Zlib, pack "C" and utf-8 [PATCH] + From: Marc Lehmann + Date: Thu, 12 Apr 2007 08:41:53 +0200 + Message-ID: <20070412064153.GA22475@schmorp.de> + http://public.activestate.com/cgi-bin/perlbrowse/p/31194 +! Unicode/Unicode.pm + POD fix. + Message-Id: <20070417220547.GA11999@zetta.zet> + +2.20 2007/04/22 14:56:12 +! Encode.pm + Pod fixes. Now find_encoding() is explained more in details. ++ lib/Encode/GSM0338.pm +- ucm/gsm0338.ucm +! lib/Encode/Supported.pod lib/Encode/Config.pm Bytes/Makefile.PL t/gsm0338.t + ESTI GSM 03.38 support is relocated from Encode::Byte to Encode::GSM0338. + This encoding is so kaputt it is unfit for Encode::XS! + Though it was okay for general cases and escape sequences, + '\0' => '@' IFF '\0\0' => '\0' had gliches. + So kaputt even t/gsm0338 wrongly interpreted that. + ref. http://www.csoft.co.uk/sms/character_sets/gsm.htm +! encoding.pm t/Aliases.t + Imported from bleedperl #31015 + +2.19 2007/04/06 12:53:41 +! lib/Encode/JP/JIS7.pm ++ t/jis7-fallback.t + encode('iso-2022-jp') fallback support added by MIYAGAWA++ + decode()'s fallback remains unchanged (FB_PERLQQ) since UTF-8 + contains all characters in iso-2022-jp so there's no need for fancy stuff. + Message-Id: <693254b90704060526s6d850320h71cdda50dfbf7eba@mail.gmail.com> +! Encode.pm + #25216 ([PATCH] Encode.pm: postpone the load of Encode::Encoding) + http://rt.cpan.org/NoAuth/Bug.html?id=25216 +! lib/Encode/MIME/Header.pm t/mime-header.t + #24418 (Encode::MIME::Header: wrong encoding with latin1 characters) + http://rt.cpan.org/NoAuth/Bug.html?id=24418 +! Encode.pm + #23876 (Add documentation for LEAVE_SRC) + http://rt.cpan.org/NoAuth/Bug.html?id=23876 +! lib/Encode/Alias.pm t/Aliases.t + #20781: Thai encoding needs alias for tis-620 + http://rt.cpan.org/NoAuth/Bug.html?id=20781 +! bin/piconv AUTHORS + #20344: piconv: wrong conversion of utf-16le encoded files (with PATCH) + http://rt.cpan.org/NoAuth/Bug.html?id=20344 +! Encode.pm Encode.xs bin/enc2xs encoding.pm t/Aliases.t t/utf8strict.t + Imported from bleedperl's 2.18_01 + +2.18 2006/06/03 20:28:48 +! bin/enc2xs + overhauled the -C option + - added ascii-ctrl', 'null', 'utf-8-strict' to core + - auto-generated Encode::ConfigLocal no longer use v-string for version + - now searches modules via File::Find so Encode/JP/Mobile is happy +! Byte/Byte.pm CN/CN.pm EBCDIC/EBCDIC.pm JP/JP.pm KR/KR.pm Symbol/Symbol.pm + use strict added; though all they do is load XS, it's + still better a practice +! *.pm + use warnings added to all of them for better practices' sake. + +2.17 2006/05/09 17:10:09 +! encode.pm + 'chin' =~ /^zh_CN|chin(?:a|ese)?$/i is true + but chin is not china or chinese. + http://d.hatena.ne.jp/jankogai/20060508/1147090316 +! Encode.xs + Integrated maintperl change (27824|27824) which I overlooked + -- sorry, Nicholas and Coverity Scan. + Message-Id: <200604152115.k3FLF1Ar014538@smtp3.ActiveState.com> + Message-Id: <200605091615.k49GF1gJ016777@smtp3.ActiveState.com> + +2.16 2006/05/03 18:24:10 +! bin/piconv + --xmlcref and --htmlcref added. +! Encode.pm + Copyright Notice Added. + http://rt.cpan.org/NoAuth/Bug.html?id=19056 +! * + Replaced remaining ^\t with q( ) x 4. -- Perl Best Practice pp. 20 + And all .pm's are now perltidy-ed. + +2.15 2006/04/06 15:44:11 +! Unicode/Unicode.xs + Addressed: UTF-16, UTF-32, UCS, UTF-7 decoders mishandle illegal characters + http://rt.cpan.org/NoAuth/Bug.html?id=18556 +! Encode.pm + added str2bytes() as an alias to encode() and bytes2str() as an alias + to decode() + http://rt.cpan.org/NoAuth/Bug.html?id=17103 +! Encode.xs + Change 26922: Avoid warning with MS Visual C compiler. + Message-Id: <200601231245.k0NCj2dw009484@smtp3.ActiveState.com> +! t/perlio.t + Change 26067: As using -C to turn on utf8 IO is equivalent to the open pragma + Message-Id: <200511092227.jA9MRcYD009025@smtp3.ActiveState.com> + +2.14 2006/01/15 15:43:36 +! Makefile.PL + Change 26295: Don't build manpages for Encode and Unicode::Normalize + Message-Id: <200512071540.jB7Fe4Gt017960@smtp3.ActiveState.com> +! Encode.pm + Change 26081: Pod nit in Encode.pm, found by Marc Lehmann in RT #36949. + Message-Id: <200511110357.jAB3vZcP023647@smtp3.ActiveState.com> +! Encode.xs Encode/encode.h bin/enc2xs encengine.c + Change 25821: Mark more static Encode data structures as const. + Change 25823: use more 'const' in the Encode data structures. + Message-Id: <200510221243.j9MChTSu027711@smtp3.ActiveState.com> + Message-Id: <200510221343.j9MDhTk9001245@smtp3.ActiveState.com> + +2.13 2006/01/15 15:06:36 +! AUTHORS + Miyagawa's mail address updated + Message-Id: <693254b90601150535o767e10bai4f4732c275b4ebe0@mail.gmail.com> +! lib/Encode/MIME/Header.pm + #16413: Encode::MIME::Headers patch to solve what is probably someone else's bug + http://rt.cpan.org/NoAuth/Bug.html?id=16413 +! lib/Encode/MIME/Header.pm t/mime-header.t + Applied: RT #16258: Support for RFC 2184 language tag + http://rt.cpan.org/NoAuth/Bug.html?id=16258 +! Encode.pm + Fixed RT #14559: fix for #8872 introduces new "bug" + http://rt.cpan.org/NoAuth/Bug.html?id=14559 +! Encode.pm ++ t/from_to.t + from_to() now makes use of $check more naturally. + Message-Id: <693254b90601150535o767e10bai4f4732c275b4ebe0@mail.gmail.com> + + +2.12 2005/09/08 14:17:17 +! Encode.xs Encode.pm t/fallback.t + Now accepts coderef for CHECK! +! ucm/8859-7.ucm + Updated to newer version at unicode.org + http://rt.cpan.org/NoAuth/Bug.html?id=14222 +! lib/Encode/Supported.pod + More POD typo fixed. + <42F5E243.80500@gmail.com> +! encoding.pm + More POD typo leftover fixed. + Message-Id: + +2.11 2005/08/05 10:58:25 +! AUTHORS CHANGES + To reflect changes below +! Encode.pm encoding.pm + lib/Encode/Alias.pm lib/Encode/PerlIO.pod lib/Encode/Supported.pod + Typo fixed by Piotr Fusik in Change 25261 & 25266 + Message-ID: <001401c595bd$dccb5d80$0bd34dd5@piec> +! Encode.xs + Addresses "BUG REPORT: panic in Encode.xs". + Message-Id: <42EDDA97.2010608@hyper.to> ++ lib/Encode/MIME/Header/ISO_2022_JP.pm mime_header_iso2022jp.t +! lib/Encode/MIME/Header.pm lib/Encode/Config.pm + Encoding 'MIME-Header-ISO_2022_JP' is introduced by Makamaka + Message-Id: <200507311557.j6VFvE2K034605@www231.sakura.ne.jp> +! Encode/encode.h Encode.pm Encode.xs + PerlIO's "encoding(utf-8-strict)" got a problem w/ partial character. + Found and addressed by KONNO Hiroharu + See also ext/PerlIO/encoding/encoding.pm + Message-Id: + +2.10 2005/05/16 18:46:36 +! Encode.pm + fixed decode_utf8() accordingly to RT#8872 + http://rt.cpan.org/NoAuth/Bug.html?id=8872 +! Encode.xs AUTHORS + s/SvIVX/SvIV_set/ by Steve Peters. + Message-Id: <2297.67.96.185.36.1114626315.squirrel@webmail3.pair.com> +! AUTHORS + GAAS was missing! +! Encode.pm + New Pod section: "UTF-8 vs utf8"; explains utf-8-strict ++ t/utf8strict.t + Tests utf-8-strict, accordingly to + UTF-8 decoder capability and stress test" by Markus Kuhn + http://smontagu.damowmow.com/utf8test.html + Note that malformed and overlong sequences are not test here + because perl already does that for you, utf-8-strict or not. +! Encode.pm Encode/encode.h t/fallback.t + Addressed "encode(..., Encode::LEAVE_SRC) does not work". + Now FB_(PERLQQ|HTMLCREF|XMLCREF) implies LEAVE_SRC so + you can (en|de)code constant strings with these fallbacks. + http://rt.cpan.org/NoAuth/Bug.html?id=8736 +! Encode.pm Encode.xs lib/Encode/Alias.pm t/Aliases.t + Make Encode.pm support the real UTF-8, by GAAS + Message-Id: + Message-Id: +! Encode.pm Encode.xs + post-2.09 comment patches from GAAS applied. + Message-Id: + Message-Id: + +2.09 2004/12/03 19:16:53 +! Encode.pm Encode.xs + Addressed " :encoding(utf8) broken in perl-5.8.6". + Message-Id: +! Encode.pm + Addressed "(de|en)code($valid_encoding, undef) does not warn". + http://rt.cpan.org/NoAuth/Bug.html?id=8723 +! Encode.pm t/Encode.t + Addressed "Can't encode URI". When a reference is fed to (en|de)code, + Encode now stringifies instead of returning undef. + http://rt.cpan.org/NoAuth/Bug.html?id=8725 +! Encode.xs t/fallback.t + Addressed "FB_HTMLCREF and FB_XMLCREF for the UTF-8 decoder". + http://rt.cpan.org/NoAuth/Bug.html?id=8694 +! Encode.pm + Addressed "s/digit/number/". + http://rt.cpan.org/NoAuth/Bug.html?id=8695 +! Encode.pm + Addressed "while (defined(read )) { ... } is an infinite loop". + http://rt.cpan.org/NoAuth/Bug.html?id=8696 +! Encode.pm + Addressed "What the heck is UCM?". + Document fixed so that it no longer contains "UCM-Based Encodings". + http://rt.cpan.org/NoAuth/Bug.html?id=8697 + +2.08 2004/10/24 13:00:29 +! Encode.xs lib/Encode/Encoding.pm Unicode/Unicode.{pm,xs} + Resolved the issue that was raised by 2.07 -- Encode::utf8 fallbacks + that was introduce messed up PerlIO::encoding. + * To do so, ->renew() is renewed and ->renewed() was introduced to + tell whether the caller is PerlIO or not. + Message-Id: <94B2EB12-25B7-11D9-9E6A-000A95DBB50A@dan.co.jp> + +2.07 2004/10/22 19:35:52 +! lib/Encode/Encoding.pm + "Remove Carp from warnings.pm" that influences Encode, by Tels. + Message-Id: <200410161618.29779@bloodgate.com> +! Encode.xs AUTHORS t/fallback.t + Now Encode::utf8's fallbacks are compliant to Encode standard. + Thank Bjoern Hoehrmann for persistently convincing me. + Message-Id: <41a61aea.638409494@smtp.bjoern.hoehrmann.de> +! Encode.pm + POD further revised. + +2.06 2004/10/22 06:23:11 +! ucm/mac* + RT #8083 reports that MacThai mapping was obsolete + Updated all mac* encodings accordingly to the URI below. + One remaining mystery is that MacRomanian vs. MacRumanian. + MacRumanian is not found in unicode.org... + http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ +! Encode.pm t/Encode.t + Fixed RT #8081: "decode(..., bless{},'x') segfault" + Two more tests added to test that. + http://rt.cpan.org/NoAuth/Bug.html?id=8081 +! Encode.pm + POD revised accordingly to RT #7966 + http://rt.cpan.org/NoAuth/Bug.html?id=7966 +! Unicode/Unicode.pm + POD updated explaining why Encode::Unicode always croaks on error + rather than giving users choices. + http://rt.cpan.org/NoAuth/Bug.html?id=7892 + +2.05 2004/10/19 04:55:01 +! encoding.pm + "unnuke" jhi's patch in bleedperl, with minor correction by dankogai. + Message-ID: <41210A84.6060506@iki.fi> + Message-ID: <20041018233442.7418113f@r2d2> + Message-Id: <2BA3DAC4-218A-11D9-906D-000A95DBB50A@dan.co.jp> + +2.04 2004/10/16 21:22:44 +! Makefle.PL + From: craigberry@mac.com + Subject: [PATCH ext/Encode/Makefile.PL] make Encode.c dependency explicit + Message-Id: <41716868.7000102@mac.com> + +2.03 2004/10/06 05:07:20 +! lib/Encode/Alias.pm + Resolved some alias case sensitivity glitches reported via RT. + http://rt.cpan.org/NoAuth/Bug.html?id=7835 +! bin/piconv + Resolved Win32 glitches reported via RT. + (Fixed by dankogai and tested by Steve Hay) + http://rt.cpan.org/Ticket/Display.html?id=7831 +! JP/JP.pm lib/Encode/Alias.pm lib/Encode/Supported.pod AUTHORS + /\bwindows-31j$/i is now an alias of CP932, by Steve Hay. + http://rt.cpan.org/NoAuth/Bug.html?id=6695 + +2.02 2004/08/31 10:55:34 +! ucm/big5-hkscs.ucm AUTHORS t/big5-hkscs.enc t/big5-hkscs.utf + New map submitted by Deng Liu and Autrijus. Test data needed + to be upgrade as well, done by dankogai + Message-Id: <20040824204828.GB6999@aut.dyndns.org> +! bin/ucmsort + Now works for characters U+10000 and above. This fix was needed + to "tidy" the original map that was submitted. +! bin/enc2xs + "ucmsort" now mentioned in pod + +2.01 2004/05/25 16:27:14 +! bin/enc2xs AUTHORS + From: domo@computer.org + Subject: [PATCH] Correct statistics from enc2xs + <4AF60A4A-B8BB-11D8-BF99-000A27839BD6@computer.org> + +! lib/Encode/Alias.pm + Addressed "False [] range "\s-" in regex;" in Encode::Alias.pm + <200405271148.i4RBm4KY026529@mail.mvnet.de> + +2.01 2004/05/25 16:27:14 +! lib/Encode/CN/HZ.pm lib/Encode/Unicode/UTF7.pm + "If someone thinks utf8::upgrade($1) should be croaked like + chom?p($1),please try the following patch for Encode.pm." + -- sadahiro-san + <20040522212704.C068.BQW10602@nifty.com> + +2.0 2004/05/16 20:55:15 +* version updated to 2.00 + -- sorry, no big feature change. I just hate version 1.100 :) +! lib/Encode/Guess.pm + Unicode/Unicode.pm + addressed UTF-(8|32LE) + BOM misguessing + https://rt.cpan.org/Ticket/Display.html?id=6279 +! Encode.pm + s/is_utif8/is_utf8/ in POD +! Encode/lib/Encode/CN/HZ.pm + Fixes "make test" failure after the patch to pp_hot.c + by Sadahiro-san + Message-Id: <20040222182357.6B39.BQW10602@nifty.com> +! bin/piconv + From: autrijus@autrijus.org + Subject: [PATCH] "piconv -C 512" badly broken + Message-Id: <1072870210.769.5.camel@localhost> + +1.99 2003/12/29 02:47:16 +! Unicode/Unicode.xs + find_encoding("UTF-16BE")->encode("abc") now null terminates + http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2003-10/threads.html#00258 +! Encode.pm + prototype bug in decode_utf8() fixed + Message-Id: <600A4CDA-F004-11D7-B570-000393AE4244@dan.co.jp> +! Encode.pm /MANIFEST encoding.pm lib/Encode/Supported.pod + t/at-cn.t t/at-tw.t t/gsm0338.t ucm/gsm0338.ucm ++ t/gsm0338.t + Merged from maintperl@21987 + +1.98 2003/08/20 11:15:31 +! lib/Encode/MIME/Header.pm AUTHORS t/mime-header.t + Dave Evans has found and corrected a bug in Encode::MIME::Header. + Test suite added by Dan Kogai. + Message-Id: <3F43440B.7060606@rudolf.org.uk> +! encoding.pm + Typo fixes rolled back in from bleedperl +! t/at-cn.t t/at-tw.t + v-strings, now depreciated in perl 5.8.1, is replaced by sadahiro + Message-Id: <20030805002313.9880.BQW10602@nifty.com> +! bin/enc2xs + argv case nit for VMS by Craig + Message-ID: <3F2B02DE.10207@mac.com> +! t/enc_eucjp.t t/enc_utf8.t AUTHORS + Encode test fixes for VMS by Peter Prymmer + Message-ID: +! lib/Encode/Alias.pm t/Aliases.t + koi-8 aliases bug detected and patched by sadahiro. + Further fix and test suite by dankogai + Message-Id: <20030713102228.C76A.BQW10602@nifty.com> + +1.97 2003/07/08 21:52:14 +! encoding.pm lib/Encode/Guess.pm lib/Encode/Alias.pm + lib/Encode/JP/JIS7.pm lib/Encode/Encoder.pm Encode.pm + $DEBUG replaced with DEBUG() so perl optimizes better, + by Rafael with further fixes by dankogai + Message-Id: <20030705222023.1f24e041.rgarciasuarez@free.fr> +! lib/Encode/Aliases.pm + Was: define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' ); + Now: define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' ); + So new hash seeding introduced in bleedperl works. + Message-Id: <20030629100937.GD20285@vipunen.hut.fi> +! lib/Encode/Guess.pm + $Encode::Guess::NoUTFAutoGuess is added so you can turn off + automatic utf(8|16|32) guessing -- originally by Autrijus + Message-Id: <20030626162731.GA2077@not.autrijus.org> +! Encode.pm + Addressed the following; + Subject: [perl #22835] FB_QUIET doesn't work with Encode::encode + Message-Id: + +1.96 2003/06/18 09:29:02 +! lib/Encode/JP/JP.pm t/guess.t + m/(...)/ in void context then $1 is considered a Bad Thing + Message-Id: +! Encode.pm + Mentions in POD that as of perl 5.8.1 utf8::is_utf8() is + also available. +! encengine.c + More typecast from maintperl@19739 + Message-Id: <200306110645.h5B6j5D2009640@smtp3.ActiveState.com> +! t/perlio.t + Tests 37 & 38 failed on Win32 -- yet another CRLF issue + Message-Id: <200306090733.h597XQPA031646@smtp3.ActiveState.com> +! t/Encode.t + Now skips for EBCDIC platform. + Message-Id: +! t/perlio.t + Craig's patch applied that addresses "Many systems (DOS, VMS) cannot + have more than one C<.> in their filenames." -- perlport. + Message-Id: <3ED79E01.8050401@mac.com> +! bin/piconv + Found and fixed the back that -p,--perlqq does not work. + Induced by the change from Getopt::Std to Getopt::Long. +! encoding.pm + Addressed [cpan #2629] Wrong assumption in numeric comparison + Message-Id: +! Encode.pm Encode.xs Unicode/Unicode.pm Unicode/Unicode.xs + lib/Encode/Encoding.pm t/perlio.t + ! API Change: ->new_sequence() => ->renew() + + Encode::Unicode makes use of it so it can handle BOM on PerlIO + + Encode::XS and Encode::utf8 now supports ->renew() + + Encode::Encoding now documents this with examples + - Non-XS (en|de)code stripped out of Encode::Unicode + Message-Id: <146957DB-8C39-11D7-9C91-000393AE4244@dan.co.jp> + +1.95 2003/05/21 08:41:11 +! ucm/8859-*.ucm + Since bogus entries were found in iso-8859-6, all entries are + re-generated once again out of + http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT + Thank David Graff for the discovery + Message-Id: <200305201819.h4KIJRRU013746@unagi.cis.upenn.edu> ++ lib/Encode/Unicode/UTF7.pm +! lib/Encode/Config.pm lib/Encode/Alias.pm Unicode/Unicode.pm t/Unicode.t + lib/Encode/Supported.pod + UTF-7 support is now added. With this Encode now has all transcoding + methods in Unicode::String. + +1.94 2003/05/10 18:13:59 +! lib/Encode/MIME/Header.pm + A more sophisticated solution for double-encoding by dankogai +! lib/Encode/MIME/Header.pm AUTHORS + Two bugs fixed by Bjoern Jacke + * "Double Encoding" was not possible + i.e. encode("MIME-B" => "=?UTF-8?B?w4RwZmVs?=") + * encode("MIME-Q") had UTF-8 flag on + Message-Id: +! lib/Encode/MIME/Header.pm AUTHORS + Two occurances of "croak ()" fixed as "croak qq()". + Simon Cozens is added to AUTHORS as a result. + Message-Id: <20030509103708.GA30664@deep-dark-truthful-mirror.pad> +! bin/piconv + POD fixes that reflect enhancements by jhi +! bin/piconv + Two enhancements by jhi. + + Now uses Getopt::Long so it accepts long name options + (--from for -f, for example) + + New option: -r,--resolve + Message-Id: <20030505114149.GA227075@kosh.hut.fi> +! MANIFEST META.yml + META.yml added upon request of Schwern + Message-Id: +! AUTHORS + Enache Adrian removed upon request -- to live longer than Encode + and/or FreeBSD (toy-)?thread :) + Message-Id: <20030425015701.GA2069@ratsnest.hole> +! t/enc_module.t + "close STDOUT unless $^O eq 'freebsd';" once again relocated + to keep VMS happy in which case "$^O eq 'freebsd'" is required + to keep FreeBSD+thread happy. Sigh. + Message-Id: <3EA88ADC.3000300@mac.com> + +1.93 2003/04/24 17:43:16 +! t/enc_eucjp.t + added "no warnings 'pack'" in for loop to keep bleedperl from + complaining "Character in 'C' format wrapped in pack". +! Makefile.PL + More elegant perl core detection inspired by Ilya Zakharevich + (but further elaborated for general cases). +! lib/Encode/Encoding.pm lib/Encode/PerlIO.pod + POD fixes. +! t/euc-jp.ucm + like cp9??, \x80-\x9F (control + 0x80) are zapped so they + are less likely to be confused w/ ISO-8859-* +! t/CJKT.t + RT tests added (vendor encodings are exemplified) + -- that successfully found a flaw on iso-2022-kr before the patch. +! lib/Encode/CJKConstants.pm lib/Encode/KR/2022_KR.pm + decode("ISO-2022-KR") has been buggy but no one ever sited + that since no one seems to be using it. Bugs discovered by + SADAHIRO-san + Message-Id: <20030416231757.A545.BQW10602@nifty.com> +! lib/Encode/CN/HZ.pm t/perlio.t + HZ is now perlio_ok, thanks to SADAHIRO-san. perlio.t modified + so it adds test for HZ. + Message-Id: <20030416231757.A545.BQW10602@nifty.com> +! lib/Encode/Guess.pm + Now guesses UTF-(16|32)(BE|LE) when the string contains \x00. + So long as the string contains \x{00}-\x{ff} it does not fail. + See perldoc for details. + Message-Id: + +1.92 2003/03/31 03:27:27 +! ucm/big5-eten.ucm ucm/big5-hkscs.ucm + Extraneous single-byte chars in range \x80-\xA0 and \xFA-\xFF + removed. FYI, IBM's ICU has none of these for java-Big5-1.3_P.ucm + but glibc-BIG5-2.1.2.ucm does. + Message-Id: <20030325215213.4CA1.BQW10602@nifty.com> +! ucm/cp932.ucm ucm/cp936.ucm ucm/cp949.ucm ucm/cp950.ucm + Maps regenerated again but this time based upon + http://oss.software.ibm.com/cvs/icu/charset/data/ucm/ + (But where is THE DOCUMENT by MICROSOFT?) +! t/enc_module.t AUTHORS + failure with threaded Perl on FreeBSD addressed. + Enache Adrian is added to AUTHORS for this. + Message-Id: <20030322230131.GA813@ratsnest.hole> +! lib/Encode/Guess.pm + Some POD fixes. +! t/CJKT.t + Change 18989: Make the :bytes conditional on PerlIO. + further Modified by Dan Kogai + <200303161730.h2GHU5B16265@smtp3.ActiveState.com> +! t/enc_module.t + Chnage 18966: another fix for failing test on windows ("use encoding" + puts STDIN in :raw mode, so chomp() wasn't stripping the CR), by gsar + Message-Id: <200303140545.h2E5j5B08856@smtp3.ActiveState.com> +! t/CJKT.t + Change 18970: Hopefully this works also in Win32, by jhi + Message-Id: <200303140745.h2E7j6B22729@smtp3.ActiveState.com> + Change 18965: fix CJKT.t failures on windows due to incorrect + binmode(), by gsar + Message-Id: <200303140530.h2E5U5B07046@smtp3.ActiveState.com> + +1.91 2003/03/09 20:07:37 +! encoding.pm + even more proofread by jhi. + Message-Id: <20030309194323.GT20843@kosh.hut.fi> +! t/enc_module.t + -use lib 't'; + +use lib qw(t ext/Encode/t ../ext/Encode/t); + Message-Id: <20030309182057.GR20843@kosh.hut.fi> +! AUTHORS + s/Hirohito/Hiroto/ig; Sorry, Hiroto-san. + Message-Id: <20030309181748.GP20843@kosh.hut.fi> +! encoding.pm + s/logner/longer/ + Message-Id: <20030309181907.GQ20843@kosh.hut.fi> + +1.90 2003/03/09 17:32:43 +! encoding.pm ++ t/enc_data.t + Inaba-san has added a patch for perl 5.8.1 or later that makes + encoding.pm work for filehandle. t/enc_data.t is to test + that. POD is further revised. + Message-Id: <200303091515.h29FF6B03903@smtp3.ActiveState.com> +! encoding.pm t/enc_module.t + encoding vs. ${^UNICODE} resolved. POD revised accordingly. + Message-Id: <20030306112940.GN20652@kosh.hut.fi> + +1.89 2003/02/28 +! Encode.xs + signed vs. unsigned issue discovered by Craig on OpenVM + Message-Id: +! encoding.pm AUTHORS ++ t/Mod_EUCJP.pm t/enc_module.enc t/enc_module.t + Because binmode() stacks layers instead of overwrite, you have to + ":raw :encoding()" in encoding.pm or your are in trouble when you + call encoding.pm multiple times. There are several workarounds + but Inaba-san's idea is in. SUGAWARA Hajime , + who was the first to address this problem was added to AUTHORS. + The test suites was added for this, which is a modified version + of SUGAWARA-san's scripts + Message-Id: <3E5CF695.6AE07852@st.rim.or.jp> + +1.88 2003/02/20 14:42:34 +! Encode.xs + one signedness nit for Encode by jhi + <200302161933.h1GJX876018710@kosh.hut.fi> +! ucm/viscii.ucm + VISCII map was incorrect; fixed by Sadahiro-san + Message-Id: <20030216120828.47D3.BQW10602@nifty.com> +! t/enc_eucjp.t t/enc_utf8.t AUTHORS + You can't unlink files that are opened in cygwin but the last + file handle opened in t/enc_*.t left open. Patch submitted + by Yitzchak and he was added to AUTHORS. + Message-Id: +! t/CJKT.t + now works with 'LC_ALL=en_US.UTF-8 PERL_UTF8_LOCALE=1' + Message-Id: <20030206104513.GA11081@kosh.hut.fi> +! Unicode/Unicode.xs + For 1.88: Unicode.xs =~ s/regog/recog/ -- jhi + Message-Id: <20030206045153.GA6826@kosh.hut.fi> + +1.87 2003/02/06 01:52:11 +! AUTHORS + * Inaba "Sensei" Hirohito added (I thought I have done so a long + ago but apparently I did not). + * SUZUKI Norio added for verious and useful bug reports. +! Byte/Byte.pm KR/KR.pm Unicode/Unicode.pm + lib/Encode/Encoder.pm lib/Encode/CJKConstants.pm + podchecked so all warnings are gone except for L. +! encoding.pm t/enc_eucjp.t + * t/uni/tr_utf8.t now t ok on maintperl (sorry, jhi) + * Filter option overhaul + * POD revision +! Encode.pm Encode.xs encengine.c Encode/encode.h + lib/Encode/Encoding.pm lib/Encode/JP/JIS7.pm + Merged inaba-san's patch that fixes "use encoding 'shiftjis'" + without filter. podchecked by Dan Kogai. + Message-Id: <3E3BC46B.6C687CFD@st.rim.or.jp> +! lib/Encode/Alias.pm + decode('alias', $1) went wild because of local $_ in find_alias() + the evil local $_ is eradicated but that changes find_alias() + format for coderef aliasing. See Encode::Alias for details + Message-Id: <200302051704.AA00042@kipp0.nifty.com> + +1.86 2003/01/22 03:29:07 +! encoding.pm + * Don't forget to canonize when you attempt an exact match! + Message-Id: <73E7F801-2DAA-11D7-BF9A-000393AE4244@dan.co.jp> + * ${^ENCODING} exception is off for $] > 5.008 + Message-Id: <20030122110617T.inaba.hiroto@toshiba-it.co.jp> +! t/enc_utf8.t + $] check commented out so it runs on 5.8.0 + +1.85 2003/01/21 22:19:14 +! encoding.pm + ${^ENCODING} exception is now explicit rather than handled by regex. ++ t/enc_eucjp.t t/enc_utf8.t + Test suite for the better "encoding" pragma support for bleedperl. + On 5.8.0, they will just be skipped. + +1.84 2003/01/10 12:00:16 +! encoding.pm + ${^ENCODING} is no longer set for utf so encoding is no longer fun :) + (That is to prevent duplicate encoding first by IO then ${^ENCODING}) + Message-Id: <20030108213737.GK331043@lyta.hut.fi> +! Unicode/Unicode.xs + %_ fixes saves the resulting .so .05% smaller, by NC + Message-Id: <20021226225709.GF284@Bagpuss.unfortu.net> +! Encode.pm + Silence Encode on undef, by Andreas + Message-Id: + Message-Id: +! Unicode/Unicode.xs + s/regognised/recognised/ . British spelling left intact to pay + respect to two British Nicks :) + Message-Id: <20021203020454.GK2274@kosh.hut.fi> + +1.83 2002/11/18 17:28:49 +! Encode.xs lib/Encode/JIS7.pm + Even more patches from Inaba-san has been applied. With this + patch t/uni/tr_7jis.t and t/uni/t_utf8.t of bleedperl will work. + Message-Id: <20021115105514D.inaba.hiroto@toshiba-it.co.jp> + +1.82 2002/11/14 23:06:12 +! Encode.xs + Encode::utf8 (XS Version) assertion botch first found in Cygwin, + later found in perls w/ -Dusemymalloc was fixed by NC. + Message-Id: <20021114210349.GA288@Bagpuss.unfortu.net> + +1.81 2002/11/08 18:29:27 +! Encode.pm Encode.xs + Non-XS version of Encode::utf8 is back (with XS being default). + Encode::predefine_encodings(0) to turn off XS. + This is primarily to cope w/ Cygwin smoke but Sadahiro-san has + found that it was Test::More causing the problem, not Encode. + But I have already made it configurable so it may be useful in + some rare cases.... + Message-Id: <20021107210110.2EE4.BQW10602@nifty.com>, et al. +! bin/enc2xs + The ingenious patch by Nicholas Clark that reduces shlib sizes by + 50% with no penalty and backward compatibility preserved, is in. + Message-Id: <20021103231324.GE288@Bagpuss.unfortu.net> + +1.80 2002/10/21 20:39:09 +! Encode.xs t/mime-header.t + Even more patches from NI-XS regarding Encode::utf8->decode(). + And one more test to t/mime-header.t to prove it + Message-Id: + +1.79 2002/10/21 06:05:37 +! Encode.xs + Further patches from NI-XS. Encode::utf8->decode() now checks the + value of utf8 flag of the argument. As a result, the fix to + lib/Encode/MIME/Header.pm is no longer neccessary but since it did + no harm (even speedwise) I'll leave it unreverted. +! ucm/cp949.ucm ucm/cp950.ucm + U+20AC EURO SIGN + U+00AE REGISTERED SIGN + were missing as a result of 1.78. Discovered by Moriyama-san. + Moriyama-san has also developed a test script that compares + (en|de)coded results to the corresponding Win32 API result and + all cp9?? maps are now verified. + Message-Id: <20021021025220.3AED.MSYK@mtg.biglobe.ne.jp> + +1.78 2002/10/20 15:44:00 +! lib/Encode/MIME/Header.pm + fixed so that it works with new Encode::utf8 +! Encode.pm Encode.xs + Encode::utf8 is now in Encode.xs by Nick In-XS. This allows + :encoding(UTF-8) to handle partial chars at end of buffers + correctly. + Message-Id: <20021020134935.2079.3@bactrian.ni-s.u-net.com> +! lib/Encode/Supported.pod + More nitpickings applied. ++ t/rt.pl MANIFEST +! t/CJKT.t + Moriyama-san has discovered a serious bug in t/CJKT.t; its roundtrip + tests were completely useless. To redeem that and get the peace of + mind again, I wrote t/rt.pl to test ALL '|0' ENTRIES in all + ucm/*.ucm Since this script takes too long to finish (30 seconds on + PIII-800MHz, FreeBSD), it is deliberately excluded from 'make test' + but you can easily run that by either renaming it or: + perl -Mblib t/rt.pl + Message-Id: <20021019065420.0C48.MSYK@mtg.biglobe.ne.jp> +! ucm/cp936.ucm ucm/cp949.ucm ucm/cp950.ucm + Other CJKT cp9?? also updated according to the URI below; + http://www.microsoft.com/typography/unicode/cscp.htm ++ bin/ucmsort MANIFEST + ucmsort is a crude utility that sorts CHARMAP entries in UCM files + to proper order. intended for hardcore develpers only. +! ucm/cp932.ucm JP/JP.pm AUTHORS + CP932 mapping which was based upon the mapping file at unicode.org + was found obsolete by MORIYAMA Masayuki msyk@mtg.biglobe.ne.jp>. He + has also supplied the patch so he was added to AUTHORS. +! lib/Encode/Supported.pod + ISO-8859-11 != TIS 620 + == TIS 620 + \xA0 ( ) + Message-Id: + + +1.77 2002/10/06 03:27:02 +! t/jperl.t + * Modified to accomodate up and comming patch by Inaba-san that + will fix tr/// needing eval qq{} + Message-Id: <9F78A19C-D6C3-11D6-BAC6-0003939A104C@dan.co.jp> +! encoding.pm + * pod fixes/enhancements to reflect the changes above +! lib/Encode/Alias.pm + "Encode::TW is correct, Encode::Alias not." - /Autrijus/ + Message-Id: <20021001015648.GB18710@not.autrijus.org> + +1.76 2002/08/25 15:09:51 +! t/big5-eten.utf + To reflect ucm change by Autrijus. t/big5-eten.enc was regenerated + but naturally identical to previous version -- dankogai +! ucm/big5-eten.ucm + Codepoint fixes -- autrijus + Message-Id: <20020805040236.GC5220@not.autrijus.org> += * + copied everything under perl-5.8.0/ext/Encode to make sure Encode + is in sync w/ perl core +! t/CJKT.t t/guess.t + Change 17175 by jhi@alpha on 2002/06/10 23:24:42 + Now that binmode(FH) does implicit ":bytes" revisit + the failing tests. The worrisome one is the Digest::MD5 + test-- how will it fare in CRLF lands now? +! t/CJKT.t t/guess.t + From: Radu Greab + Date: Mon, 10 Jun 2002 00:40:34 +0300 + Message-Id: <200206092140.g59LeYn15745@ix.netsoft.ro> + Fixes for en_US.UTF-8 failures, all but ext/PerlIO/t/fallback.t + ones which I cannot figure out. +! lib/Encode/Alias.pm + Subject: [Encode PATCH] spurious warning + From: Nicholas Clark + Date: Sun, 2 Jun 2002 20:26:22 +0100 + Message-ID: <20020602192619.GA320@Bagpuss.unfortu.net> + +1.75 2002/06/01 18:07:49 +! lib/Encode/Alias.pm t/Alias.t lib/Encode/Supported.pod TW/TW.pm + glibc compliance cited by Autrijus. + http://www.li18nux.org/docs/html/CodesetAliasTable-V10.html +! bin/enc2xs bin/piconv + Subject: Re: forewarning: usedevel and versiononly + Message-Id: <20020529081515.D570.H.M.BRAND@hccnet.nl> + +1.74 2002/05/28 18:33:15 ++ ucm/null.ucm ucm/ctrl.ucm +! Makefile.PL bin/enc2xs lib/Encode/Supported.pod + "null" and "ascii-ctrl" encodings added upon the request of Autrijus + Subject: Re: unicode -> &# notation + Message-ID: <20020518193704.GB40272@not.autrijus.org> + +1.73 2002/05/28 17:26:18 +! */Makefile.PL Makefile.PL bin/enc2xs Encode/Makefile_PL.e2x AUTHORS + Chris Nandor has fixed Encode so that it works w/ MacPerl -- + at least w/ PPC (68k need static linking which does not work due to + 64k limit). pudge is added to AUTHORS (I'm surprised he was not + there in the list). Encode/Makefile_PL.e2x was additionally fixed + by dankogai to reflect changes in other Makefile.PL + Message-Id: +! t/mime-header.t + Subject: Change 16746: -Mutf8 cleanup. + Message-Id: <200205222345.g4MNj7e10597@smtp3.ActiveState.com> + +1.72 2002/05/20 15:49:56 +! Makefile.PL + Subject: [PATCH] Encode should be in perl-core library path + Message-Id: <86r8k7h738.wl@mail.edge.co.jp> + Message-Id: <20020520161201.A11019@alpha.hut.fi> +! lib/Encode/MIME/Header.pm + Subject: [PATCH] Encode::MIME::Header + Message-Id: <86sn4nh7a8.wl@mail.edge.co.jp> +! Encode/Makefile_PL.e2x + Subject: [PATCH] Make Makefile_PL.e2x happy on MSWin32 + Message-Id: <20020519201031.GA1603@not.autrijus.org> +! CN/Makefile.PL Byte/Makefile.PL JP/Makefile.PL TW/Makefile.PL + Symbol/Makefile.PL KR/Makefile.PL EBCDIC/Makefile.PL Makefile.PL + AUTHORS + @16628 and @16652 from Vadim. Vadim was added to AUTHORS. + Subject: [PATCH] good day for WinCE port of perl. + Message-ID: <001301c1fc68$e808e560$a95cc3d9@vad> +! Encode.xs +! Unicode/Unicode.xs + Even more linting by Robin via @16532 +! Encode.xs + Even more typecast by Sarathy in @16460 + +1.71 2002/05/07 16:22:42 +! Encode.xs + even more typecasts by Robin + Message-Id: <200205071513.QAA05846@tempest.npl.co.uk> +! bin/enc2xs + A very strange bug that was causing a bugus ucm -> C table + generation that was revealed by a UCM file that Andreas was + working. This is the king of wierdest bug I've encountered + in the course of Encode maintenance. + Message-Id: <6C04F0FA-61D4-11D6-B164-00039301D480@dan.co.jp> + +1.70 2002/05/06 10:26:48 +! encoding.pm + Made more 'module-safe' with conjunction w/ 'no encoding'. + Message-Id: +! lib/Encode/Encoding.pm + 'require Encode' because ->Define uses Encode::define_encoding(); + problem and solution addressed by Miyagawa-kun + Message-Id: <86znzdfvuh.wl@mail.edge.co.jp> +! t/Unicode.t + Cuts the frill to make djgpp happier, as suggested by Laszlo + Message-Id: <20020506105819.H17012@libra.eth.ericsson.se> +! bin/enc2xs + enc2xs no longer overwrites files w/ -M option, as suggested by Andreas + Message-Id: + +1.69 2002/05/04 16:41:18 +! lib/Encode/MIME/Header + Floating-point coerced for UNICOS (in integer arithmetics it folds + line one character too early). Verification by Mark is pending. + Message-Id: +! Unicode/Unicode.pm + more doc patch from Elizabeth + Message-Id: <4.2.0.58.20020503210946.02f4ed30@mickey.dijkmat.nl> +! Encode/Makefile_PL.e2x + More platform-independent patch from Benjamin + Message-Id: <3CD31BE0.69F79B06@earthlink.net> +! lib/Encode/Guess AUTHORS + split regex fix by Graham Barr. Adds him to AUTHORS. + Message-Id: <20020504085419.E95940@valueclick.com> +! Encode/Makefile_PL.e2x + enc2xs script discovery made smarter and more sensible, first cited + by Miyagawa-kun and further suggestions by Rafael and Andreas +! Encode.pm lib/Encode/Guess.pm t/fallback.t t/guess.t t/mime-header.t + "The EBCDIC remapping of the low 256 bites again" #16372 by jhi + +1.68 2002/05/03 12:20:13 +! lib/Encode/Alias.pm lib/Encode/Supported.pod t/Alias.t AUTHORS + UCS-4 added to aliases of UTF-32 by Elizabeth Mattijsen. Alias.t + and Supported.pod modified to reflect the change. Elizabeth added + to Authors. And H.M. is also added for forwarding her patch among + other contributions (I was rather surprised to find his name was not + there yet!) + Message-Id: <20020503114901.D639.H.M.BRAND@hccnet.nl> + +1.67 2002/05/02 07:33:09 +! Encode.xs + Error message now consistent w/ perlqq (\N{U+} -> \x{}) + done in perl@16308 but Philip linted me further. Now the error + messages are macronized as ERR_ENCODE_NOMAP and ERR_DECODE_NOMAP +! lib/Encode/Guess.pm + Sanity check for happier -w by Autrijus + +1.66 2002/05/01 05:41:06 +! Encode.xs t/fallback.t + WARN_ON_ERR no longer assumes RETURN_ON_ERR so you can issue a warning + while fallback is in effect. This even came with a welcome side-effect + of cleaner code with less nests! Thank you, NI-XS. t/fallback.t is + also modified to test this. + And of course, the corresponding varialbles to UV[Xx]f are appropriately + cast. This should've concluded NI-XS homework. +! Encode.pm + encode(undef) does warn again! Repented upon suggestion by NI-XS. + Document for unless vs. '' added + Message-Id: <20020430171547.3322.13@bactrian.elixent.com> + +1.65 2002/04/30 16:13:37 +! Encode.pm + encode(undef) no longer warns for C. Suggested by Paul. + Message-Id: +! lib/Encode/Supported.pod + Encode::MIME::Header and Encode::Guess mentioned + Updated for Encode::HanExtra 0.05 and Encode::JIS2K +! lib/Encode/Guess.pm + POD fix by Miyagawa-kun + Message-Id: <86k7qqx8p7.wl@mail.edge.co.jp> + +1.64 2002/04/29 06:54:06 +! ucm/euc-jp.ucm + Now decodes euc-jisx0213 also. CAVEAT: encode("euc-jp"...) and + encocde("euc-jisx0213") are still DIFFERENT. + Message-Id: +! Encode.xs + A few white spaces corrected by NI-XS via PerlIO integration to + Mainline + Subject: Change 16247: Integrate perlio; +! Encode.pm + Document fixes by Andreas + Message-Id: + +1.63 2002/04/27 18:59:50 +! lib/Encode/Encoding.pm +! Encoding.pm Unicode/Unicode.pm lib/Encode/Guess.pm lib/Encode/CN/HZ.pm +! lib/Encode/JP/JIS7.pm lib/Encode/MIME/Header.pm lib/Encode/KR/2022_KR.pm + Make use of the Encode::Encoding base class! + And other cleanups in Encode.xs upon NI-XS suggestions + Message-Id: <20020427160718.1290.15@bactrian.ni-s.u-net.com> + +1.62 2002/04/27 11:17:39 +! Encode.pm + encodings() now just check %ExtModule instead of eval{require} + all of them for ":all" to conserve more memory. +! Encode.xs + more "%x" -> "%" UVxf stuff. +! Encode.pm + s/=over2/=over 2/g # oops. + +1.61 2002/04/26 03:02:04 +! t/mime-header.t + Now does decent tests besides use_ok() +! lib/Encode/Guess.pm t/guess.t + UI streamlined, document added +! Unicode/Unicode.xs + various signed/unsigned mismatch nits (#16173) + http://public.activestate.com/cgi-bin/perlbrowse?patch=16173 +! Encode.pm + POD: utf8-flag-related caveats added. A few sections completely + rewritten. +! Encode.xs +! AUTHORS + Thou shalt not assume %d works, either! + Robin Baker added to AUTHORS for this + Message-Id: <200204251132.MAA28237@tempest.npl.co.uk> +! t/CJKT.t + "Change 16144 by gsar@onru on 2002/04/24 18:59:05" + +1.60 2002/04/24 20:06:52 +! Encode.xs + "Thou shalt not assume %x works." -- jhi + Message-Id: <20020424210618.E24347@alpha.hut.fi> +! CN/Makefile.PL JP/Makefile.PL KR/Makefile.PL TW/Makefile.PL To make + low-memory build machines happy, now *.c is created for each *.ucm + (no table aggregation). You can still override this by setting + $ENV{AGGREGATE_TABLES}. + Message-Id: <00B1B3E4-579F-11D6-A441-00039301D480@dan.co.jp> ++ lib/Encode/Guess.pm ++ lib/Encode/JP/JIS7.pm + Encoding-autodetect (mainly for Japanese encoding) added. In a + course of development, JIS7.pm was improved. ++ lib/Encode/HTML/Header.pm ++ lib/Encode/Config.pm + MIME B/Q Header Encoding Added! +! Encode.pm Encode.xs t/fallback.t + new fallbacks; XMLCREF and HTMLCREF upon Bart's request. + Message-Id: <20020424130709.GA14211@tanglefoot> + +1.59 $ 2002/04/22 23:54:22 +! Encode.pm Encode.xs + needs_lines() and perlio_ok() are added to Internal encodings such + as utf8 so XML::SAX is happy. FB_* stub xsubs are now prototyped. + +1.58 2002/04/22 23:54:22 +! TW/TW.pm + s/MacChineseSimp/MacChineseTrad/ # ... oops. +! bin/ucm2text +! t/*.t +- t/*.euc t/*.ref ++ t/*.enc t/*.utf + Now all CJKT encodings go thru round-trip test via t/CJKT.t. + t/(CN|TW).t by Autrijus are renamed at-(cn|tw).t + t/(JP|KR).t are aggregated to t/CJKT.t + test data are all remade via bin/ucm2text. + And .... They are no longer skipped for -Uuseperlio ! + +1.57 2002/04/22 20:27:30 +! t/JP.t t/KR.t t/perlio.t + unless (find PerlIO::Layer 'perlio') ... line is back again. + t/JP.t and t/KR.t were supposed to work but maybe '>:utf8' lines + need PerlIO. Sigh.... +! Encode.xs Unicode/Unicode.pm lib/Encode/JP/JIS7.pm t/perlio.t + ->perlio_ok now does eval{ require PerlIO::encoding } there so + it correctly returns 1 when PerlIO::encoding is yet loaded. +! Encode.xs + perl-current patch #16072 reflected + +1.56 2002/04/22 09:48:07 +! Encode.pm encoding.pm t/perlio.t t/jperl.t + New PerlIO::encoding 0.04 compliance met + +1.55 2002/04/22 03:43:05 +! Encode.pm Encode.xs Unicode/Unicode.pm + needs_lines() defined so Encode::Encoding is no longer needed + for perlio + +1.54 2002/04/22 02:50:01 +! Encode.pm! Encode.xs! Unicode/Unicode.pm t/perlio.t +! lib/Encode/Encoding.pm lib/Encode/CN/HZ.pm + now perlio_ok is true by default if PerlIO::encoding->VERSION is + 0.03 or larger. POD in Encode::Encoding revised to reflect this. + Encode::XS and Encode::Unicode now has perlio_ok() method. +! lib/Encode/Supported.pod + s/UP-UX/HP-UX/ by jhi +! AUTHORS Byte/Byte.pm CN/CN.pm Encode.pm JP/JP.pm KR/KR.pm README +! Symbol/Symbol.pm TW/TW.pm Unicode/Unicode.pm bin/enc2xs bin/piconv +! bin/ucmlint encoding.pm lib/Encode/Alias.pm lib/Encode/CN/HZ.pm +! lib/Encode/Config.pm lib/Encode/Encoder.pm lib/Encode/Encoding.pm +! lib/Encode/KR/2022_KR.pm lib/Encode/PerlIO.pod +! lib/Encode/Supported.pod + Huge document fixes by Philip. +! AUTHORS +! t/JP.t + s/compare\(/compare_text\(/o by Sarathy. Adds him to AUTHORS + http://public.activestate.com/cgi-bin/perlbrowse?patch=16049 +! t/perlio.t + binmode() after "<:encoding" to make Win32 happy, by Mattia. + Mattia added to AUTHORS file + Message-Id: <3CC3150F.5798.22A05AE@localhost> + +1.52 2002/04/20 23:43:47 +! t/perlio.t + TODO: is now SKIP:, as NI-XS requested. Also adds more + eraborate failure analysis added. +! bin/enc2xs + A note on how to make sure of round-trip safety added to POD + section (so Autrijus is happier) +! ucm/big5-hkscs.ucm ucm/big5-eten.ucm t/TW.pm + big5-(eten|hkscs) is round-trip safe again! + Message-Id: +! encoding.pm + Typo fixes by Andreas +! Encode.pm Encode.xs Unicode/Unicode.xs Encode/Encoding.pm +! lib/Encode/JP/JIS7.pm lib/Encode/KR/2022_KR.pm t/perlio.t + PerIO coodination patches from NI-XS. + Message-Id: <2769E572-54A1-11D6-B7E2-00039301D480@dan.co.jp> + +1.51 2002/04/20 09:58:23 +! t/TW.t + Updated test suite by Autrijis so "make test" is happy again + Message-Id: <20020420082104.GA25037@not.autrijus.org> ++ ucm/big5-eten.ucm +! ucm/big5-hkscs.ucm lib/Encode/Alias.pm +- ucm/big5.ucm + TW/TW.pm TW/Makefile.PL + Updates by Autrijus. 'big5' is no longer a canonical but an + alias to 'big5-eten'. big5-hkscs is now in 2001 edition. + Message-Id: <20020419195346.GA19597@not.autrijus.org> +! Encode.xs + Fix by NI-XS that fallback may cause SEGV w/ Perl/TK + Message-Id: <20020419184509.1924.1@bactrian.ni-s.u-net.com> +! Encode.pm + PerlIO detection a little bit smarter; no longer uses eval qq{} + but eval {}. + +1.50 2002/04/19 06:13:02 +! ! Encode.pm Encode.xs Encode/encoding.h ++ t/fallback.pm + New Fallback API imlemented and documented. See "perldoc Encode" + for details +! lib/Encode/JP/JIS7.pm Encode.pm ++ lib/Encode/PerlIO.pod t/perlio.t + API compliance met. However, it still does not work unless perlio + implements line buffer. See BUGS section in perldoc Encode::PerlIO + As a sensible workaround, perlio_ok() added to Encode. +! encoding.pm +! lib/Encode/Supported.pod + Doc fixes from jhi + Message-Id: <20020418174647.J8466@alpha.hut.fi> +! CN/CN.pm + Doc fixes from Autrijus + Message-Id: <20020418144131.GA10987@not.autrijus.org> +! Encode.pm + perlqq mode documented +! t/JP.t ++ t/jisx0201.euc t/jisx0201.ref +! t/jisx0208.euc t/jisx0208.ref + t/JP.t tests more rigorously and with other encodings + t/jisx0201.* added to test JIS7 encodings. jisx0208 is now PURELY + in jis0208 (used to contain jisx0201 part). +! Encode/Makefile_PL.e2x + The resulting Makefile.PL that "enc2xs -M" creates now auto-discovers + enc2xs and encode.h rather than hard-coded. This allows the resulting + module fully CPANizable. +! encoding.pm t/JP.t t/KR.t + PerlIO detection simplified (checks %INC instead of eval{}) +! Encode.xs Encode/encode.h ++ Unicode/Makefile.PL Unicode/Unicode.pm Unicode/Unicode.xs +- lib/Encode/Unicode.pm + (en|de)code_xs relocated to where it belongs. Source reindented + to my taste +! bin/enc2xs + Additional (U8 *) cast added as suggested by jhi + Message-Id: <20020417165916.A28599@alpha.hut.fi> + +1.42 Date: 2002/04/17 +- lib/Encode/XS.pm + no-op module; Thought of adding a pod there but enc2xs has + one so gone. +! encoding.pm +! t/JP.pm +! t/KR.pm + correct mechanism to detect Perlio::encoding layar installed. +! Encode.xs + PerlIO Layer detached. + +1.41 2002/04/16 23:35:00 +! encoding.pm + binmode(STDIN|STDOUT ...) done iff PerlIO is available +! t/*.t + Cleaned up PerlIO skip conditions to prepare for the upcoming + Encode - PerlIO forking. +! Encode.pm + exported functions are now prototyped. +! lib/Encode/CN/HZ.pm +! bin/enc2xs +! Encode.xs + fallback implemented # was /* FIXME */ + affected programs revised to fit (only HZ was using the try-catch + approach which needed to be fixed for API-compliance). +! Encode/Config.pm +! Encode/KR/2022_KR.pm +! Encode/KR/KR.pm + can find =head1 NAME now, jhi + Message-Id: <20020416083059.V30639@alpha.hut.fi> +! encoding.pm + s/\{h\}/{$h}/g ;) +! Encode.xs + now complies with less warnings with the pickest compilers. + Suggested by Craig, fixed by Dan. + ! Encode/Makefile_PL.e2x +! bin/enc2xs + A bug that fails to find *.e2x in certain conditions fixed + +1.40 2002/04/14 22:27:14 ++ Encode/ConfigLocal_PM.e2x +! lib/Encode/Config.pm +! bin/enc2xs + "enc2xs -C" now generates/updates Encode::ConfigLocal. + ConfigLocal_PM.e2x is a skelton thereof. +! lib/Encode/Config.pm +! CN/CN.pm + "use Encode::CN::HZ;" was missing. +! t/Unicode.t +! t/unibench.t + More rigorous tests added to test XS, especially on memory allocation. +! Encode.xs +! lib/Encode/Unicode.pm + NI-S implemented an XS version -- merged + Message-Id: <20020414154857.2066.4@bactrian.ni-s.u-net.com> +! encoding.pm +! t/jperl.t + Source filter option added. With this option on, you can write + perl 5.8-savvy scripts (such as UTF-8 identifiers) in legacy + encodings. t/jperl.t enhanced to test this feature. +! t/Unicode.t + ok() gotcha addressed by Benjamin fixed. Though I didn't exactly + apply his suggestion, this degree of nitting is enough to add him + to AUTHORS list. + Message-Id: <3CB93223.291E5E2E@earthlink.net> +! JP/JP.pm ++ lib/Encode/JP/JIS7.pm +- lib/Encode/JP/JIS.pm +- lib/Encode/JP/2022_JP.pm +- lib/Encode/JP/2022_JP1.pm + 7bit-jis, iso-2022-jp and iso-2022-jp1 are all aggregated to + JIS7.pm for better maintainability and performance +! encoding.pm + Added caveat for non-ascii identifiers. +! encoding.pm + fixes by jhi, the original author of this pragramtic module. + Message-Id: <20020413231527.V1826@alpha.hut.fi> + +1.34 2002/04/12 20:23:05 (Unreleased) +! Encode.pm +! t/Unicode.t + EBCDIC fixes addressed by jhi. + Message-Id: <20020412161844.D9383@alpha.hut.fi> +! lib/Encode/Encoder.pm + POD fix by Miyagawa-kun + Message-Id: <86bscqq4hu.wl@mail.edge.co.jp> + +1.33 2002/04/10 22:28:40 +! AUTHORS + Philip's mail address corrected. +! AUTHORS +! t/Encoder.t +! lib/Encode/Encoder.pm + s/ = shift;/ = @_;/ # trivial but a common idiomatic typo :) + This adds Miyagawa-kun to AUTHORS. + * encoding() no longer exported by default but on demand + * t/Encoder.t updated to test all these + Message-Id: <86hemjpdn4.wl@mail.edge.co.jp> +! lib/Encode/Unicode.pm +! lib/Encode/Supported.pm + Further doc fixes by Anton + +1.32 2002/04/09 20:06:15 ++ bin/ucmlint ++ t/bogus.ucm +- ucm/macDevanaga.ucm Unicode Character Map +- ucm/macGujarati.ucm Unicode Character Map +- ucm/macGurmukhi.ucm Unicode Character Map + A utility to check integrity of .ucm files. t/bogus.ucm is a + ucm that is deliberately bogus. unused Indic mappings are removed + for the time being. +! Encode.pm + resolve_alias() added as suggested by jhi. Same as + find_encoding("alias")->name. For convenience. This one is + defined in Encode.pm instead of Alias.pm. + Message-Id: <20020409215846.H17022@alpha.hut.fi> +! Encode.xs + Memory Allocate but detected during the devel of ucmlint -- fixed. + Message-Id: +! lib/Encode/Unicode.pm + valid_ucs2(0) is false but must be true. + 3 patches from NI-S as follows. This also has fixed the incident + Andy has reported. +! lib/Encode/Alias.pm + find_alias() recursion prevention +! t/Aliases.t + Checks for the patch above +! t/Encode/Unicode.pm + An extra "F" that causes valid_ucs2() return a bogus value fixed + Message-Id: <20020409133927.17803.1@bactrian.elixent.com> + Message-Id: + 2 Small Patches from jhi as follows: +! Encode.pm + Encode->encodings() lists in case-insensitve order (as it was) +! bin/piconv + -l option prints avaiable encodings to STDOUT instead of STDERR +! lib/Encode/Aliases.pm + s/defintion/definition/ + Message-Id: <200204082306.CAA21033@alpha.hut.fi> +! AUTHORS +! lib/Encode/Supported.pod +! lib/Encode/Unicode.pm + POD revise by Philip Newton. This adds Philip to AUTHORS list. + Thank you for the exact quote of Douglas Adams :) + Message-Id: <22s3bu4gpvhhsses64nj3afuu0lo927rv3@4ax.com> + +1.31 2002/04/08 18:08:07 +! lib/Encode/Encoder.pm ++ t/Encoder.t + Encode::Encoder, once just a placeholder of an idea, is now much more + practical. See t/Encode.t to find how practical it can be. ++ lib/Encode/Config.pm +! Encode.pm + my false laziness at Encode.pm is fixed. Now %ExtModules are set + in Encode::Config and they are all literally, not programatically + set. My false laziness was resulting many encodings missing from + %ExtModules. +! lib/Encode/Unicode.pm +! t/Unicode.t + BOM for 32LE was bogus as noted by Anton. t/Unicode.t is fixed + so that it does not rely Encode::Unicode for BOM values + Message-Id: + +1.30 2002/04/08 02:34:51 ++ lib/Encode/Encoder.pm + Object Oriented Encoder. I reckon something like this is in need. +! Encode.pm +! t/Unicode.pm +! lib/Encode/Supported.pod + * autoloading bug that prevented upper-case canonicals such as UTF-16 + is fixed. Now even UTF/UCS are autoloaded! + * encodings() is now more intuitive. + * t/Unicode.t fixed to explicitly use Unicode.pm -- BOM values are + stored therein. + * Obligatory fixes to the POD. +! lib/Encode/Supported.pod + Patch from Anton applied. + Message-Id: <66641479.20020408033300@motor.ru> +! Encode.pm +! lib/Encode/Unicode.pm + Cosmetic changes: "bless $obj, $class" => "bless $obj => class" + +1.28 2002/04/07 18:58:42 +! MANIFEST ++ t/Unicode.t ++ t/grow.t + Just a MANIFEST for those missing files. + +1.26 Date: 2002/04/07 15:22:04 +! JP/Makefile.PL +! t/Aliases.PL + Schwarn's patches against Makefile.PL has zapped jis*.ucm. Restored. + And t/Aliases.t fixed to make sure they all exist. + +1.25 2002/04/07 15:01:25 (Unreleased) +! Encode.pm +! lib/Encode/Unicode.pm + More POD fixes.... +! Encode.pm +- lib/Encode/UTF_EBCDIC.pm +- lib/Encode/Internal.pm +- lib/Encode/utf8.pm + Integrated into Encode.pm as closures. That way "one package, one file" + rule is preserved yet less files to require. +! encoding.pm + commented out binmode(STDERR ... +! Makefile.PL +! Byte/Makefile.PL +! CN/Makefile.PL +! EBCDIC/Makefile.PL +! JP/Makefile.PL +! KR/Makefile.PL +! Symbol/Makefile.PL +! TW/Makefile.PL +! Encode/Makefile_PL.e2x + Schwarn's MM-compliance patch merged + Message-Id: <20020406082609.GA28758@blackrider> +! Encode.pm +! lib/Encode/Unicode.pm ++ lib/Encode/UTF_EBCDIC.pm ++ t/Unicode.t +- lib/Encode/10646_1.pm +- lib/Encode/ucs2_le.pm + (UCS-2|UTF-(16|32))(LE|BE)? implementation and cleanups. Instead of + per-module based (en|de)code, I saved a number of .pm by + reorganizing it as per-object base (Well, this is what Encode::XS + does under the hood). See Encode::Unicode for details. + The original Unicode.pm is now correctly renamed to UTF_EBCDIC.pm. + This module is used only on EBCDIC environments. + +1.21 2002/04/05 14:46:34 (Not Released) +! JP/JP.pm +! Encode.pm ++ ucm/jis0201.ucm ++ ucm/jis0208.ucm ++ ucm/jis0212.ucm + Are back to make Perl/Tk happy Smile, NI-S. +! t/Alias.pm +! lib/Encode/Alias.pm +! lib/Encode/Supported.pm +! lib/Encode/10646_1.pm +! lib/Encode/ucs2_le.pm + UCS-16BE is now canonical for UCS-2/ISO-10646-1. + Leftover implicit aliases in ucs2_le.pm removed. Tests and documents + updated to reflect changes. + essage-Id: <20020405114024.1290.17@bactrian.ni-s.u-net.com> +! lib/Encode/Alias.pm +! lib/Encode/Supported.pm + Anton's revision commited. Added Dan's own fixes as well. + Message-Id: <159103166906.20020405161134@motor.ru> +! lib/Encode/Alias.pm + 134c134 + < qr/^UCS2-le$/i => '"UCS-2"', ); + --- + > qr/^UCS2-LE$/i => '"UTF-16LE"'); + Sigh. Thank you, Anton. + Message-Id: <14567692196.20020405062020@motor.ru> + Message-Id: <69FEC0B4-483E-11D6-A045-00039301D480@dan.co.jp> + +1.20 2002/04/04 19:50:52 ++ bin/unidump + the last minute addtion. Just give it a try. Docs remains to be done. + Not installed by default. +! lib/Encode/Supported.pod + Enhanced Greatly. +! t/Alias.t +! lib/Encode/Alias.pm +! lib/Encode/utf8.pm +! lib/Encode/10464_1.pm +! lib/Encode/ucs2_le.pm + Canonical name for 'UCS-2le" is now "UTF-16LE". UCS-2 left + unchanged but UTF-16BE is added as an alias. Implicit aliases + move to Encode::Alias so init_alias() works more as expected. + Also, 'utf8' is now canonical with 'UTF-8' being an alias. + Though pedantically wrong, This should make perl mongers happier. + t/Alias.t is enhanced to test all these. + Message-Id: <9C39BD58-47AF-11D6-9D82-00039301D480@dan.co.jp> +! Byte/Makefile.PL + Now all .ucm are stacked in byte_t; They all share ascii part so 50% + of the codepoints are common. CJKT left as is because the saving is + not significant. +! Byte/Makefile.PL +! CN/Makefile.PL +! EBCDIC/Makefile.PL +! Encode.xs +! Encode/Makefile_PL.e2x +! JP/Makefile.PL +! KR/Makefile.PL +! Makefile.PL +! Symbol/Makefile.PL +! TW/Makefile.PL +! bin/enc2xs +! AUTHORS + All occurance of _def.h replaced with .exh so djgpp works happily + ever after! To credit this amazing discovery, Laszlo is now in + AUTHORS list + Message-Id: <20020403181424.GA8778@freemail.hu> + Message-Id: +! Makefile.PL +! */Makefile.PL +! Encode/Makefile_PL.skel + bin/enc2xs + No more @INC fiddling! Uses $ENV{PERL_CORE} instead + Message-Id: <20020401222744.GX2000@blackrider>, et al. +! t/encoding.t + Two more tests by added jhi + Message-Id: <200204020000.DAA25121@alpha.hut.fi> ++ t/grow.t +! Encode.xs + The showstopper fixed -- Memory reallocation bug was causing + Encode::XS to fall into infinite loop on certain conditions. + t/grow.t tests that. + Message-Id: <9572CAC4-463C-11D6-ABA5-00039301D480@dan.co.jp>, et al ++ bin/txt2ucm +! */Makefile.PL +! */*.ucm +! */XX.pm +! lib/Encode/Supported.pod + Vendor encodings rebuilt out of original map files at unicode.org. + Indic languages such as MacDevanagali remain unspported do to the + shortcoming of encengine capabilities (they need algorithmical + conversion and I have no knowledge on that!). Pods fixed for added + encodings. + Oh, macJapan.ucm renamed to macJapanese.ucm. + macROMnn is macRomanian and macRUMnn is macRumanian. + txt2ucm is a crude script that is used to convert them. +! bin/enc2xs + Unicode Compound Characters (used extensively on Mac) supported +! bin/piconv + Typo fixes and improvements by jhi + Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al. + +1.11 2002/03/31 22:12:13 ++ t/encoding.t ++ t/jperl.t +! MANIFEST + Missing files from the MANIFEST fixed. + Message-Id: <20020401010156.H10509@alpha.hut.fi> + Version incremented just to make CPAN happy. + +1.10 2002/03/31 21:32:42 +! Makefile.PL +! README + INSTALL_UCM option added to Makefile.PL so you can install *.ucm + if you want. This should make Autrijus happy. Also, piconv + is added to default install. ++ Encode/*.e2x +! bin/enc2xs + Here-documented files that enc2xs generates are now exported + to *.e2x. Much cleaner and easier to debug. +! encoding.pm + encoding enhances so you can make it act more like such + (now prehistoric ) "localized" variations of perl like Jperl. ++ t/jperl.t + Further test for encoding.pm. Written in euc-jp ++ encoding.pm ++ t/encoding.t + Taken over form jhi. + Message-Id: <20020330174618.B10154@alpha.hut.fi> +- Encode/*.ucm ++ ucm/*.ucm +! Makefile.PL +! */Makefile.PL + *.ucm relocated to ucm/ so MakeMaker will not install'em by default. +- ucm2table ++ bin/ucm2table + *** +! AUTHORS +! Byte/Byte.pm +! Encode.pm +! Encode/macIceland.ucm +! lib/Encode/Alias.pm +! lib/Encode/Supported.pod + MacIceland fixes and Pod Typo fixes. This adds Andreas to AUTHORS. + Message-Id: + +1.01 2002/03/29 20:59:39 +! Makefile.PL +! README + s/USE_SCRIPTS/MORE_SCRIPTS/ +! Makefile.PL + installs enc2xs by default for external Encode:: modules in CPAN, + such as Encode::HanExtra +! t/*.t + More sensible perl core detection via $ENV{PERL_CORE} + suggested by Spider + Message-Id: <200203291007.FAA07329@Orb.Nashua.NH.US> +! bin/enc2xs + Perl core ditection via $^X =~ m/\bminiperl$/o + Message-Id: + +1.00 Wed Mar 29 2002 +! * + The version of all files is updated to 1.00 via "ci -f -l1.00", + commemorating version 1.00. All files, including *.ucm are now + under version control. +- encode.h ++ Encode/encode.h + encode.h moved to Encode/ so it will be installed for the later + use by enc2xs +! enc2xs + h2xs-like feature added via "h2xs -M Name *.(enc|ucm)" +! Makefile.PL +! */Makefile.PL +- compile ++ bin/enc2xs + compile renamed to enc2xs. + Affected Makefle.PL updated +- lib/CN/2022_CN.pm + "Punt it. HanExtra can take care of that later." -- Autrijus + Message-Id: <20020328154338.GA7351@not.autrijus.org> +! Encode/johab.ucm +! Encode/euc-kr.ucm +! Encode/ksc5601.ucm +! lib/Encode/CJKConstants.pm +! lib/Encode/KR/2022_KR.pm + Table patches for Euro Signs, 2022-KR fixups by Jungshik + Message-Id: +! README +! Makefile.PL ++ bin/piconv + bin/ added for example scripts. They are not installed by default. + to install them, "perl Makefile.PL USE_SCRIPTS". + piconv is iconv reinvented in perl. in addition to all features + of iconv, it also adds perlish features. See L for more + details. +! lib/Encode/Alias.pm + qr/^ replaced with qr/\b so it directly matches locale names + such as en_US.US-ASCII +! AUTHORS +! t/Aliases.t + Patch by MJD to fix the following problem applied. + Subject: [PATCH 5.7.3 Encode] + Aliases.t not properly skipped when Encode extension not built + Message-Id: <20020328091850.18677.qmail@plover.com> +! lib/Encode/KR/2022_KR.pm +! lib/Encode/CJKConstants.pm + Another patch from Jungshik to make iso-2022-kr actually work + Message-Id: +! Encode/Encode/euc-kr.ucm ++ Encode/Encode/johab.ucm +! Encode/Encode/ksc5601.ucm +! Encode/KR/KR.pm +! Encode/KR/Makefile.PL +! Encode/lib/Encode/Alias.pm +! t/Alias.t + Johab support and complete revision of Korean Encoding by Jungshik + Message-Id: ++ Encode.pm + Revised to make up with now-dropped Encode::Details. +- lib/Encode/Details.pod + Dropped. Besides being obsolete, the topics are now covered in + respective pods now. +! AUTHORS +! t/Alias.t + KR/KR.pm + lib/Encode/Alias.pm + Korean aliases fixed thanks to Jungshik Shin + /ks[-_ ]?c[-_ ]?5601-1987$/i => cp936 + Message-Id: +! *.pm + =head1 NAME added to all modules to make buildtoc happy + Message-Id: <20020327041151.A10618@alpha.hut.fi> +- lib/Encode/CJKguide.pod + Too controversial and dropped from the dist. Will be available + separately on the web. +! Encode/*.ucm + RCS tags added so table debugging gets easier (should that be + needed! I hope they all stay 1.00!) ++ lib/Encode/CJKguide.pod + A detailed guide to mainly, but not limited to, CJK multibyte + encodings. +- Encode/roman8.ucm ++ Encode/hp-roman8.ucm +! Byte/Makefile.PL +! Encode/Supported.pod + All occurance of "roman8" replaced with "hp-roman8" to avoid + confusion +! Encode/Supported.pod +! Encode/mac*.ucm +! t/Alias.t + Mac Encodings now comply the Inside Macintosh +! t/Alias.t + Test for '-raw' conventions added. +! Encode/Alias.pm + aliased gb2312 -> euc-cn, ksc5601 -> euc-kr +! Encode/gb12345.ucm +! Encode/gb2312.ucm +! Encode/ksc5601.ucm + "-raw" appended to canonical names. + File mames stay unchanged thanks to UCM format. +! lib/Encode/CN/HZ.pm + Patch from Autrijus to fix gb2312 -> gb2312-raw + code linting + Message-Id: <20020326035210.GA2091@not.autrijus.org> + +0.99 Tue Mar 26 2002 +- lib/Encode/JP/Const.pm ++ lib/Encode/CJKConstants.pm ++ lib/Encode/CN/2022_CN.pm ++ lib/Encode/KR/2022_KR.pm ++ t/KR.t ++ t/gb2312.euc ++ t/gb2312.ref ++ t/ksc5601.euc ++ t/ksc5601.ref ++ t/table.euc ++ t/table.ref ++ ucm2table + * Support for ISO-2022-KR and ISO-2022-CN added. + * t/KR.t added! + * more t/*.{euc,ref} added, which was autogenerated from ucm2table + * ucm2table autogenerates character table out of UCM files. +- engine.c ++ encengine.c +- lib/Encode/Supports.pod ++ lib/Encode/Supported.pod + Names reverted due to popular demand. + 8.3 rule applies only when there is a conflict. + Message-Id: <20020325095924.GD44120@not.autrijus.org> +! */Makefile.PL +- Encode/*.enc ++ Encode/*.ucm +- lib/Tcl* +- lib/Encode/Format/Enc.pod +- t/Tcl.t + * Character tables is now 100% ucm. + * All files under Encode/ is now 8.3-compliant + * some of missing encodings added (i.e. gsm0338 and nextstep) + * Vendor mappings aggregated with appropriate national std in + Makefile.PL, resulting smaller *.so especially for CJK. + Following is result on Dan's FreeBSD box. + Now Then + --------------------------------------------------------------- + blib/arch/auto/Encode/Byte/Byte.so 157,279 171,042 + blib/arch/auto/Encode/CN/CN.so 1,634,476 1,626,685 + blib/arch/auto/Encode/EBCDIC/EBCDIC.so 18,476 18,476 + blib/arch/auto/Encode/Encode.so 27,791 27,791 + blib/arch/auto/Encode/JP/JP.so 1,408,056 1,832,811 + blib/arch/auto/Encode/KR/KR.so 1,156,518 1,329,587 + blib/arch/auto/Encode/Symbol/Symbol.so 23,940 20,990 + blib/arch/auto/Encode/TW/TW.so* 948,761 1,316,437 + --------------------------------------------------------------- + Total 5,375,297 6,343,819 + Saving 968,522 + * As a result of ucm-transition, Encode::Tcl dropped because + Encode::Tcl demands *.enc. + Encode::Tcl will be supplied in a separate tarball with *.enc. + Message-Id: +!compile +-encengine.c ++encode.c +!Encode.pm +-lib/Encode/Supported.pod ++lib/Encode/Supports.pod +-lib/Encode/iso10646_1.pm ++lib/Encode/10646_1.pm +-lib/Encode/EncFormat.pod ++lib/Encode/Format/Enc.pod + Files renamed 8.3 filename compliance. Affected modules/scripts revised. +- lib/Encode/JP/Constants.pm ++ lib/Encode/JP/Consts.pm +! lib/Encode/JP/JIS.pm +! lib/Encode/JP/H2Z.pm + Version nit problem and 8.3 rule fix. + > Package namespace installed latest in CPAN file + > Encode::JP::Constants 0.92 1.02 J/JH/JHI/perl-5.7.3.tar.gz + was noted by jhi then Dan discovers "Constants.pm" does not comply 8.3 + rule. Contants.pm renamed to Consts.pm and affected modules are fixed + accordingly. In addition, legacy "use vars qw()..." are replaced with + "our"; + Message-Id: <20020325011248.D1561@alpha.hut.fi> + Message-Id: <41023D51-3FB5-11D6-8347-00039301D480@dan.co.jp> +! JP/JP.pm +- lib/Encode/JP/ISO_2022_JP.pm +- lib/Encode/JP/ISO_2022_JP_1.pm ++ lib/Encode/JP/2022_JP.pm ++ lib/Encode/JP/2022_JP1.pm + 01234567.012 + 8.3 naming conflict for vanilla fat addressed by jhi + Message-Id: <20020324201931.V22596@alpha.hut.fi> + +! Encode.xs + Typecast fix addressed by jhi + Message-Id: <20020324185540.T22596@alpha.hut.fi> + +0.98 Mon Mar 25 2002 +! lib/Encode/Supported.pod + Further pod fixes ++ lib/Encode/JP/ISO_2022_JP_1.pm +! lib/Encode/JP/ISO_2022_JP.pm +! lib/Encode/JP/JIS.pm +! JP/JP.pm + Now Encode::JP is more strict on the difference between ISO-2022-JP + and ISO-2022-JP-1. See JP/JP.pm for details. I hope this move + makes Anton happier :) FYI the previous version implements + ISO-2022-JP as ISO-2022-JP-1 since it had X0212 support. +! lib/Encode/Supported.pod + Further pod fixes +! Encode.xs + Avoid core-dump in Encode with PERLIO=mmap by NI-S + Message-Id: <20020324104139.1326.7@bactrian.ni-s.u-net.com> +! CN/CN.pm +! JP/JP.pm +! KR/KR.pm +! TW/TW.pm +! lib/Encode/Suppoted.pod + pod fixes to replace F to L, + as suggested by Autrijius in: + Message-Id: <20020324083943.GA14901@not.autrijus.org> +! lib/Encode/Suppoted.pod + fixes and enhancements by Anton + Message-Id: <10632060120.20020324103753@motor.ru> +! lib/Encode/Alias.pm + > define_alias( qr/^GB[- ]?(\d+)$/i => '"gb$1"' ); + added. Suggested by Anton then deobfuscated by Autrijius + Message-Id: <20020324064455.GA3667@not.autrijus.org> +! compile + Further fix by Nicholas Clark + Message-Id: <20020323145840.GD304@Bagpuss.unfortu.net> +- lib/EncodeFormat.pod ++ lib/Encode/EncFormat.pod +! MANIFEST + File renamed as suggested by Autrijius +! Encode.pm +! lib/Encode/Details.pod +! lib/Encode/Supported.pod Sun Mar 24 13:29:35 2002 +! Encode.pm Sun Mar 24 13:43:47 2002 + pod fixes by Autrijius. + Message-Id: <20020324062804.GA3595@not.autrijus.org> + Message-Id: <20020324075627.GB11986@not.autrijus.org> +! t/Alias.t +! lib/Encode/Alias.pm +! Encode.pm + now more EBCDIC conscious; + %ExtModules on EBCDIC system excludes CJK so that you don't + have to worry about the matched alias resulting cloaking. + t/Alias.t also revised to reflect changes. Verified by jhi + Message-Id: <20020324022929.D22596@alpha.hut.fi> + +0.97 Sun Mar 24 2002 +! CN/CN.pm +! KR/KR.pm +! TW/TW.pm + EBCDIC detection mechanism installed as in JP/JP.pm + Message-Id: <20020323211847.G19148@alpha.hut.fi> +! Byte/Makefile.PL +! CN/Makefile.PL +! EBCDIC/Makefile.PL +! JP/Makefile.PL +! KR/Makefile.PL +! Symbol/Makefile.PL +! TW/Makefile.PL + Now all table files used by compile are postfixed '_t' to avoid + namespace collisions in case insensitive file systems once for all! + inspired by: + Message-ID: <58290227735.20020323195659@familiehaase.de> +! t/Aliases.t + Since the Encode::JP is unsupported under EBCDIC we + cannot run this test (aliases as such should work fine) -- jhi + Message-Id: <20020323202119.D19148@alpha.hut.fi> +! Byte/Makefile.PL + duplicate occurance of ascii.ucm and 8859-1.ucm + causes MacOS X dlyd to cloak +! t/CN.t +! t/Encode.t +! t/JP.t +! t/TW.t +! t/Tcl.t + < chdir 't' if -d 't'; + --- + > if (! -d 'blib' and -d 't'){ chdir 't' }; + When you are "make test"-ing on Encode/ directory, you must not + change $ENV{PWD}. t/JP.t has been fixed before but others somehow + remain unchanced. Also the situation detection was made simpler + in t/JP.t, which was originally; + > chdir 't' if -d 't' and $ENV{PWD} !~ m,/Encode[^/]*$,o; +! Encode.pm + "Use of uninitialized value in string eq at Encode.pm line 96." +! Symbol/Makefile.PL +! EBCDIC/Makefile.PL +! AUTHOR + -- Problem on case insensitive file systems + "coexist of ebcdic.c <> EBCDIC.c on Cygwin not possible" + Message-ID: <88254111953.20020323095503@familiehaase.de> +! compile +! AUTHOR + "So I think it's a bug in gcc, not perl. But it still needs to be + worked around." + Message-Id: <20020323145840.GD304@Bagpuss.unfortu.net> + Message-Id: <20020323170509.C96475@plum.flirble.org> + +0.96 Sat Mar 23 2002 +! TW/TW.pm +! lib/Encode/Encoding.pm +! lib/Encode/Alias.pm +! lib/Encode/Supported.pod +! KR/KR.pm + Pod Fixes by Michael G Schwern via jhi + Message-ID: <20020322073908.GB10539@blackrider> +! Makefile.PL +! Encode.pm + "...I think we should include ISO 8859-1 as well." -- NI-S + Message-Id: <20020322120230.1332.8@bactrian.elixent.com> +! JP/JP.pm +! CN/CN.pm +! KR/KR.pm +! TW/TW.pm +! lib/Encode/Alias.pm + alias definitions relocated to Encode::Alias so module autoloading + works for aliases also. +! Encode.pm + encodings() now accepts args to check ExtModules. ++ Byte/Byte.pm ++ Byte/Makefile.PL ++ EBCDIC/EBCDIC.pm ++ EBCDIC/Makefile.PL ++ Symbol/Makefile.PL ++ Symbol/Symbol.pm +! Encode.pm +! Encode.xs + Latin and single byte encodings are reorganized so they are + demand-loaded like Encode::XX. Now only ascii is compiled into + Encode itself. +! lib/Encode/Alias.pm + for my $k (keys %hash){ delete $hash{$k}; } + is depreciated; fixed. + +0.95 Fri Mar 22 2002 + In this update, pod rewrites and alias fixes are the main issues ++ lib/Encode/Supported.pod + Describes supported encodings +! Makefile.PL + streamlined compiled-in encodings. +! lib/Encode/Description.pod -> lib/Encode/Details.pod + Renamed. ++ Encode/ibm-125?.ucm + Added from icu distibution with any occurance of + "IBM-125?" to "cp125?". Filenames remain unchanged to pay + some respect to icu staff, however. ++ lib/Encode/Alias.pm +! Encode.pm + Alias difinitions in Encode.pm relocated. +! AUTHORS +! Encode.xs + packWARN patch from Paul Marquess via jhi + Message-Id: <20020321010101.O28978@alpha.hut.fi> + Paul added to AUTHORS as a result. +! t/CJKalias.t -> t/Aliases.t + Renamed. Checks even more aliases and alias overloading +! Encode.pm +! CN/CN.pm + duplicate alias for ujis => euc-jp removed (Encode::JP has one) + gbk => cp936 relocated to CN.pm +! t/CJKalias.t + Test::More with plans (by jhi) + +0.94 Thu Mar 21 2002 ++ lib/Encode/Description.pod +! lib/Encode/Encoding.pm + Now the pod in Encode.pm is abridged as programming references. + lib/Encode/Description.pod contains the original, detailed description + and Encode::Encoding explains how to write your own module to + add new encodings. So far, lib/Encode/Description.pod contains + the whole pod once in Encode.pm. This is intentional. +! Encode.pm + Pod revisions by Anton Tagunov + Message-Id: <517178431.20020320174824@motor.ru> +! lib/Encode/Tcl.pm + all occrance of Encode::Tcl::Extended removed including pod +! t/CJKalias.t + test now checks $encoding->name only; $encoding->{name} are + no longer check to find the canonical name. +! lib/Encode/JP/JIS.pm +! lib/Encode/JP/ISO_2022_JP.pm + ->name() added to be more compliant with API +! CN/CN.pm +! JP/JP.pm +! KR/KR.pm +! TW/TW.pm +! t/CJKalias.t + Patch by Autrijus to add aliases to TW and fixes to POD + Message-Id: <20020320090619.GA24774@not.autrijus.org> +! AUTHORS + SADAHIRO Tomoyuki added as should. My apologies. + +0.93 Wed Mar 20 2002 +* First release to be uploaded to CPAN. For prehistoric changes, + please see Changes file of perl distibution as well as + perl-unicode@perl.org archive, available at: + http://archive.develooper.com/perl-unicode@perl.org/ + + Changes Since 0.92 includes; ++ Changes ++ AUTHORS +! Encode.pm +! README + + Mention to perl-unicode@perl.org added +! JP/JP.pm + + Encoding aliases added so you can feed locale names + and MIME Charset="" directly. + - Mention to JISX0212 removed because it's fixed +! CN/CN.pm +! KR/KR.pm + + Encoding aliases added. Note TW is left untouched because + euc-tw is not implemented in TW but in Encode::HanExtra. + Autrijus, you may fix Encode::HanExtra. ++ t/CJKalias.t + + to test encode aliases added diff --git a/EBCDIC/EBCDIC.pm b/EBCDIC/EBCDIC.pm new file mode 100644 index 0000000..8024c13 --- /dev/null +++ b/EBCDIC/EBCDIC.pm @@ -0,0 +1,45 @@ +package Encode::EBCDIC; +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +1; +__END__ + +=head1 NAME + +Encode::EBCDIC - EBCDIC Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $posix_bc = encode("posix-bc", $utf8); # loads Encode::EBCDIC implicitly + $utf8 = decode("", $posix_bc); # ditto + +=head1 ABSTRACT + +This module implements various EBCDIC-Based encodings. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + cp37 + cp500 + cp875 + cp1026 + cp1047 + posix-bc + +=head1 DESCRIPTION + +To find how to use this module in detail, see L. + +=head1 SEE ALSO + +L, L + +=cut diff --git a/EBCDIC/Makefile.PL b/EBCDIC/Makefile.PL new file mode 100644 index 0000000..e9f59a6 --- /dev/null +++ b/EBCDIC/Makefile.PL @@ -0,0 +1,157 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +my $name = 'EBCDIC'; +my %tables = ( + ebcdic_t => + ['posix-bc.ucm', + qw(cp037.ucm cp1026.ucm cp1047.ucm cp500.ucm cp875.ucm), + ], + ); + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q" -"O"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/Encode.pm b/Encode.pm new file mode 100644 index 0000000..f90f929 --- /dev/null +++ b/Encode.pm @@ -0,0 +1,1093 @@ +# +# $Id: Encode.pm,v 2.97 2018/02/21 12:14:24 dankogai Exp $ +# +package Encode; +use strict; +use warnings; +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; +our $VERSION; +BEGIN { + $VERSION = sprintf "%d.%02d", q$Revision: 2.97 $ =~ /(\d+)/g; + require XSLoader; + XSLoader::load( __PACKAGE__, $VERSION ); +} + +use Exporter 5.57 'import'; + +our @CARP_NOT = qw(Encode::Encoder); + +# Public, encouraged API is exported by default + +our @EXPORT = qw( + decode decode_utf8 encode encode_utf8 str2bytes bytes2str + encodings find_encoding find_mime_encoding clone_encoding +); +our @FB_FLAGS = qw( + DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC + PERLQQ HTMLCREF XMLCREF STOP_AT_PARTIAL +); +our @FB_CONSTS = qw( + FB_DEFAULT FB_CROAK FB_QUIET FB_WARN + FB_PERLQQ FB_HTMLCREF FB_XMLCREF +); +our @EXPORT_OK = ( + qw( + _utf8_off _utf8_on define_encoding from_to is_16bit is_8bit + is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade + ), + @FB_FLAGS, @FB_CONSTS, +); + +our %EXPORT_TAGS = ( + all => [ @EXPORT, @EXPORT_OK ], + default => [ @EXPORT ], + fallbacks => [ @FB_CONSTS ], + fallback_all => [ @FB_CONSTS, @FB_FLAGS ], +); + +# Documentation moved after __END__ for speed - NI-S + +our $ON_EBCDIC = ( ord("A") == 193 ); + +use Encode::Alias (); +use Encode::MIME::Name; + +use Storable; + +# Make a %Encoding package variable to allow a certain amount of cheating +our %Encoding; +our %ExtModule; +require Encode::Config; +# See +# https://bugzilla.redhat.com/show_bug.cgi?id=435505#c2 +# to find why sig handlers inside eval{} are disabled. +eval { + local $SIG{__DIE__}; + local $SIG{__WARN__}; + local @INC = @INC; + pop @INC if $INC[-1] eq '.'; + require Encode::ConfigLocal; +}; + +sub encodings { + my %enc; + my $arg = $_[1] || ''; + if ( $arg eq ":all" ) { + %enc = ( %Encoding, %ExtModule ); + } + else { + %enc = %Encoding; + for my $mod ( map { m/::/ ? $_ : "Encode::$_" } @_ ) { + DEBUG and warn $mod; + for my $enc ( keys %ExtModule ) { + $ExtModule{$enc} eq $mod and $enc{$enc} = $mod; + } + } + } + return sort { lc $a cmp lc $b } + grep { !/^(?:Internal|Unicode|Guess)$/o } keys %enc; +} + +sub perlio_ok { + my $obj = ref( $_[0] ) ? $_[0] : find_encoding( $_[0] ); + $obj->can("perlio_ok") and return $obj->perlio_ok(); + return 0; # safety net +} + +sub define_encoding { + my $obj = shift; + my $name = shift; + $Encoding{$name} = $obj; + my $lc = lc($name); + define_alias( $lc => $obj ) unless $lc eq $name; + while (@_) { + my $alias = shift; + define_alias( $alias, $obj ); + } + my $class = ref($obj); + push @Encode::CARP_NOT, $class unless grep { $_ eq $class } @Encode::CARP_NOT; + push @Encode::Encoding::CARP_NOT, $class unless grep { $_ eq $class } @Encode::Encoding::CARP_NOT; + return $obj; +} + +sub getEncoding { + my ( $class, $name, $skip_external ) = @_; + + defined($name) or return; + + $name =~ s/\s+//g; # https://rt.cpan.org/Ticket/Display.html?id=65796 + + ref($name) && $name->can('renew') and return $name; + exists $Encoding{$name} and return $Encoding{$name}; + my $lc = lc $name; + exists $Encoding{$lc} and return $Encoding{$lc}; + + my $oc = $class->find_alias($name); + defined($oc) and return $oc; + $lc ne $name and $oc = $class->find_alias($lc); + defined($oc) and return $oc; + + unless ($skip_external) { + if ( my $mod = $ExtModule{$name} || $ExtModule{$lc} ) { + $mod =~ s,::,/,g; + $mod .= '.pm'; + eval { require $mod; }; + exists $Encoding{$name} and return $Encoding{$name}; + } + } + return; +} + +# HACK: These two functions must be defined in Encode and because of +# cyclic dependency between Encode and Encode::Alias, Exporter does not work +sub find_alias { + goto &Encode::Alias::find_alias; +} +sub define_alias { + goto &Encode::Alias::define_alias; +} + +sub find_encoding($;$) { + my ( $name, $skip_external ) = @_; + return __PACKAGE__->getEncoding( $name, $skip_external ); +} + +sub find_mime_encoding($;$) { + my ( $mime_name, $skip_external ) = @_; + my $name = Encode::MIME::Name::get_encode_name( $mime_name ); + return find_encoding( $name, $skip_external ); +} + +sub resolve_alias($) { + my $obj = find_encoding(shift); + defined $obj and return $obj->name; + return; +} + +sub clone_encoding($) { + my $obj = find_encoding(shift); + ref $obj or return; + return Storable::dclone($obj); +} + +sub encode($$;$) { + my ( $name, $string, $check ) = @_; + return undef unless defined $string; + $string .= ''; # stringify; + $check ||= 0; + unless ( defined $name ) { + require Carp; + Carp::croak("Encoding name should not be undef"); + } + my $enc = find_encoding($name); + unless ( defined $enc ) { + require Carp; + Carp::croak("Unknown encoding '$name'"); + } + # For Unicode, warnings need to be caught and re-issued at this level + # so that callers can disable utf8 warnings lexically. + my $octets; + if ( ref($enc) eq 'Encode::Unicode' ) { + my $warn = ''; + { + local $SIG{__WARN__} = sub { $warn = shift }; + $octets = $enc->encode( $string, $check ); + } + warnings::warnif('utf8', $warn) if length $warn; + } + else { + $octets = $enc->encode( $string, $check ); + } + $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC ); + return $octets; +} +*str2bytes = \&encode; + +sub decode($$;$) { + my ( $name, $octets, $check ) = @_; + return undef unless defined $octets; + $octets .= ''; + $check ||= 0; + my $enc = find_encoding($name); + unless ( defined $enc ) { + require Carp; + Carp::croak("Unknown encoding '$name'"); + } + # For Unicode, warnings need to be caught and re-issued at this level + # so that callers can disable utf8 warnings lexically. + my $string; + if ( ref($enc) eq 'Encode::Unicode' ) { + my $warn = ''; + { + local $SIG{__WARN__} = sub { $warn = shift }; + $string = $enc->decode( $octets, $check ); + } + warnings::warnif('utf8', $warn) if length $warn; + } + else { + $string = $enc->decode( $octets, $check ); + } + $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC ); + return $string; +} +*bytes2str = \&decode; + +sub from_to($$$;$) { + my ( $string, $from, $to, $check ) = @_; + return undef unless defined $string; + $check ||= 0; + my $f = find_encoding($from); + unless ( defined $f ) { + require Carp; + Carp::croak("Unknown encoding '$from'"); + } + my $t = find_encoding($to); + unless ( defined $t ) { + require Carp; + Carp::croak("Unknown encoding '$to'"); + } + + # For Unicode, warnings need to be caught and re-issued at this level + # so that callers can disable utf8 warnings lexically. + my $uni; + if ( ref($f) eq 'Encode::Unicode' ) { + my $warn = ''; + { + local $SIG{__WARN__} = sub { $warn = shift }; + $uni = $f->decode($string); + } + warnings::warnif('utf8', $warn) if length $warn; + } + else { + $uni = $f->decode($string); + } + + if ( ref($t) eq 'Encode::Unicode' ) { + my $warn = ''; + { + local $SIG{__WARN__} = sub { $warn = shift }; + $_[0] = $string = $t->encode( $uni, $check ); + } + warnings::warnif('utf8', $warn) if length $warn; + } + else { + $_[0] = $string = $t->encode( $uni, $check ); + } + + return undef if ( $check && length($uni) ); + return defined( $_[0] ) ? length($string) : undef; +} + +sub encode_utf8($) { + my ($str) = @_; + return undef unless defined $str; + utf8::encode($str); + return $str; +} + +my $utf8enc; + +sub decode_utf8($;$) { + my ( $octets, $check ) = @_; + return undef unless defined $octets; + $octets .= ''; + $check ||= 0; + $utf8enc ||= find_encoding('utf8'); + my $string = $utf8enc->decode( $octets, $check ); + $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC ); + return $string; +} + +onBOOT; + +if ($ON_EBCDIC) { + package Encode::UTF_EBCDIC; + use parent 'Encode::Encoding'; + my $obj = bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC"; + Encode::define_encoding($obj, 'Unicode'); + sub decode { + my ( undef, $str, $chk ) = @_; + my $res = ''; + for ( my $i = 0 ; $i < length($str) ; $i++ ) { + $res .= + chr( + utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) ) + ); + } + $_[1] = '' if $chk; + return $res; + } + sub encode { + my ( undef, $str, $chk ) = @_; + my $res = ''; + for ( my $i = 0 ; $i < length($str) ; $i++ ) { + $res .= + chr( + utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) ) + ); + } + $_[1] = '' if $chk; + return $res; + } +} else { + package Encode::Internal; + use parent 'Encode::Encoding'; + my $obj = bless { Name => "Internal" } => "Encode::Internal"; + Encode::define_encoding($obj, 'Unicode'); + sub decode { + my ( undef, $str, $chk ) = @_; + utf8::upgrade($str); + $_[1] = '' if $chk; + return $str; + } + *encode = \&decode; +} + +{ + # https://rt.cpan.org/Public/Bug/Display.html?id=103253 + package Encode::XS; + use parent 'Encode::Encoding'; +} + +{ + package Encode::utf8; + use parent 'Encode::Encoding'; + my %obj = ( + 'utf8' => { Name => 'utf8' }, + 'utf-8-strict' => { Name => 'utf-8-strict', strict_utf8 => 1 } + ); + for ( keys %obj ) { + bless $obj{$_} => __PACKAGE__; + Encode::define_encoding( $obj{$_} => $_ ); + } + sub cat_decode { + # ($obj, $dst, $src, $pos, $trm, $chk) + # currently ignores $chk + my ( undef, undef, undef, $pos, $trm ) = @_; + my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; + use bytes; + if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { + $$rdst .= + substr( $$rsrc, $pos, $npos - $pos + length($trm) ); + $$rpos = $npos + length($trm); + return 1; + } + $$rdst .= substr( $$rsrc, $pos ); + $$rpos = length($$rsrc); + return ''; + } +} + +1; + +__END__ + +=head1 NAME + +Encode - character encodings in Perl + +=head1 SYNOPSIS + + use Encode qw(decode encode); + $characters = decode('UTF-8', $octets, Encode::FB_CROAK); + $octets = encode('UTF-8', $characters, Encode::FB_CROAK); + +=head2 Table of Contents + +Encode consists of a collection of modules whose details are too extensive +to fit in one document. This one itself explains the top-level APIs +and general topics at a glance. For other topics and more details, +see the documentation for these modules: + +=over 2 + +=item L - Alias definitions to encodings + +=item L - Encode Implementation Base Class + +=item L - List of Supported Encodings + +=item L - Simplified Chinese Encodings + +=item L - Japanese Encodings + +=item L - Korean Encodings + +=item L - Traditional Chinese Encodings + +=back + +=head1 DESCRIPTION + +The C module provides the interface between Perl strings +and the rest of the system. Perl strings are sequences of +I. + +The repertoire of characters that Perl can represent is a superset of those +defined by the Unicode Consortium. On most platforms the ordinal +values of a character as returned by C)> is the I for that character. The exceptions are platforms where +the legacy encoding is some variant of EBCDIC rather than a superset +of ASCII; see L. + +During recent history, data is moved around a computer in 8-bit chunks, +often called "bytes" but also known as "octets" in standards documents. +Perl is widely used to manipulate data of many types: not only strings of +characters representing human or computer languages, but also "binary" +data, being the machine's representation of numbers, pixels in an image, or +just about anything. + +When Perl is processing "binary data", the programmer wants Perl to +process "sequences of bytes". This is not a problem for Perl: because a +byte has 256 possible values, it easily fits in Perl's much larger +"logical character". + +This document mostly explains the I. L and L +explain the I. + +=head2 TERMINOLOGY + +=head3 character + +A character in the range 0 .. 2**32-1 (or more); +what Perl's strings are made of. + +=head3 byte + +A character in the range 0..255; +a special case of a Perl character. + +=head3 octet + +8 bits of data, with ordinal values 0..255; +term for bytes passed to or from a non-Perl context, such as a disk file, +standard I/O stream, database, command-line argument, environment variable, +socket etc. + +=head1 THE PERL ENCODING API + +=head2 Basic methods + +=head3 encode + + $octets = encode(ENCODING, STRING[, CHECK]) + +Encodes the scalar value I from Perl's internal form into +I and returns a sequence of octets. I can be either a +canonical name or an alias. For encoding names and aliases, see +L. For CHECK, see L. + +B: the input scalar I might be modified in-place depending +on what is set in CHECK. See L if you want your inputs to be +left unchanged. + +For example, to convert a string from Perl's internal format into +ISO-8859-1, also known as Latin1: + + $octets = encode("iso-8859-1", $string); + +B: When you run C<$octets = encode("UTF-8", $string)>, then +$octets I $string. Though both contain the +same data, the UTF8 flag for $octets is I off. When you +encode anything, the UTF8 flag on the result is always off, even when it +contains a completely valid UTF-8 string. See L below. + +If the $string is C, then C is returned. + +C may be used as an alias for C. + +=head3 decode + + $string = decode(ENCODING, OCTETS[, CHECK]) + +This function returns the string that results from decoding the scalar +value I, assumed to be a sequence of octets in I, into +Perl's internal form. As with encode(), +I can be either a canonical name or an alias. For encoding names +and aliases, see L; for I, see L. + +B: the input scalar I might be modified in-place depending +on what is set in CHECK. See L if you want your inputs to be +left unchanged. + +For example, to convert ISO-8859-1 data into a string in Perl's +internal format: + + $string = decode("iso-8859-1", $octets); + +B: When you run C<$string = decode("UTF-8", $octets)>, then $string +I $octets. Though both contain the same data, the +UTF8 flag for $string is on. See L +below. + +If the $string is C, then C is returned. + +C may be used as an alias for C. + +=head3 find_encoding + + [$obj =] find_encoding(ENCODING) + +Returns the I corresponding to I. Returns +C if no matching I is find. The returned object is +what does the actual encoding or decoding. + + $string = decode($name, $bytes); + +is in fact + + $string = do { + $obj = find_encoding($name); + croak qq(encoding "$name" not found) unless ref $obj; + $obj->decode($bytes); + }; + +with more error checking. + +You can therefore save time by reusing this object as follows; + + my $enc = find_encoding("iso-8859-1"); + while(<>) { + my $string = $enc->decode($_); + ... # now do something with $string; + } + +Besides L and L, other methods are +available as well. For instance, C returns the canonical +name of the encoding object. + + find_encoding("latin1")->name; # iso-8859-1 + +See L for details. + +=head3 find_mime_encoding + + [$obj =] find_mime_encoding(MIME_ENCODING) + +Returns the I corresponding to I. Acts +same as C but C of returned object must +match to I. So as opposite of C +canonical names and aliases are not used when searching for object. + + find_mime_encoding("utf8"); # returns undef because "utf8" is not valid I + find_mime_encoding("utf-8"); # returns encode object "utf-8-strict" + find_mime_encoding("UTF-8"); # same as "utf-8" because I is case insensitive + find_mime_encoding("utf-8-strict"); returns undef because "utf-8-strict" is not valid I + +=head3 from_to + + [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK]) + +Converts I data between two encodings. The data in $octets +must be encoded as octets and I as characters in Perl's internal +format. For example, to convert ISO-8859-1 data into Microsoft's CP1250 +encoding: + + from_to($octets, "iso-8859-1", "cp1250"); + +and to convert it back: + + from_to($octets, "cp1250", "iso-8859-1"); + +Because the conversion happens in place, the data to be +converted cannot be a string constant: it must be a scalar variable. + +C returns the length of the converted string in octets on success, +and C on error. + +B: The following operations may look the same, but are not: + + from_to($data, "iso-8859-1", "UTF-8"); #1 + $data = decode("iso-8859-1", $data); #2 + +Both #1 and #2 make $data consist of a completely valid UTF-8 string, +but only #2 turns the UTF8 flag on. #1 is equivalent to: + + $data = encode("UTF-8", decode("iso-8859-1", $data)); + +See L below. + +Also note that: + + from_to($octets, $from, $to, $check); + +is equivalent to: + + $octets = encode($to, decode($from, $octets), $check); + +Yes, it does I respect the $check during decoding. It is +deliberately done that way. If you need minute control, use C +followed by C as follows: + + $octets = encode($to, decode($from, $octets, $check_from), $check_to); + +=head3 encode_utf8 + + $octets = encode_utf8($string); + +Equivalent to C<$octets = encode("utf8", $string)>. The characters in +$string are encoded in Perl's internal format, and the result is returned +as a sequence of octets. Because all possible characters in Perl have a +(loose, not strict) utf8 representation, this function cannot fail. + +B: do not use this function for data exchange as it can produce +not strict utf8 $octets! For strictly valid UTF-8 output use +C<$octets = encode("UTF-8", $string)>. + +=head3 decode_utf8 + + $string = decode_utf8($octets [, CHECK]); + +Equivalent to C<$string = decode("utf8", $octets [, CHECK])>. +The sequence of octets represented by $octets is decoded +from (loose, not strict) utf8 into a sequence of logical characters. +Because not all sequences of octets are valid not strict utf8, +it is quite possible for this function to fail. +For CHECK, see L. + +B: do not use this function for data exchange as it can produce +$string with not strict utf8 representation! For strictly valid UTF-8 +$string representation use C<$string = decode("UTF-8", $octets [, CHECK])>. + +B: the input I<$octets> might be modified in-place depending on +what is set in CHECK. See L if you want your inputs to be +left unchanged. + +=head2 Listing available encodings + + use Encode; + @list = Encode->encodings(); + +Returns a list of canonical names of available encodings that have already +been loaded. To get a list of all available encodings including those that +have not yet been loaded, say: + + @all_encodings = Encode->encodings(":all"); + +Or you can give the name of a specific module: + + @with_jp = Encode->encodings("Encode::JP"); + +When "C<::>" is not in the name, "C" is assumed. + + @ebcdic = Encode->encodings("EBCDIC"); + +To find out in detail which encodings are supported by this package, +see L. + +=head2 Defining Aliases + +To add a new alias to a given encoding, use: + + use Encode; + use Encode::Alias; + define_alias(NEWNAME => ENCODING); + +After that, I can be used as an alias for I. +I may be either the name of an encoding or an +I. + +Before you do that, first make sure the alias is nonexistent using +C, which returns the canonical name thereof. +For example: + + Encode::resolve_alias("latin1") eq "iso-8859-1" # true + Encode::resolve_alias("iso-8859-12") # false; nonexistent + Encode::resolve_alias($name) eq $name # true if $name is canonical + +C does not need C; it can be +imported via C. + +See L for details. + +=head2 Finding IANA Character Set Registry names + +The canonical name of a given encoding does not necessarily agree with +IANA Character Set Registry, commonly seen as C<< Content-Type: +text/plain; charset=I >>. For most cases, the canonical name +works, but sometimes it does not, most notably with "utf-8-strict". + +As of C version 2.21, a new method C is therefore added. + + use Encode; + my $enc = find_encoding("UTF-8"); + warn $enc->name; # utf-8-strict + warn $enc->mime_name; # UTF-8 + +See also: L + +=head1 Encoding via PerlIO + +If your perl supports C (which is the default), you can use a +C layer to decode and encode directly via a filehandle. The +following two examples are fully identical in functionality: + + ### Version 1 via PerlIO + open(INPUT, "< :encoding(shiftjis)", $infile) + || die "Can't open < $infile for reading: $!"; + open(OUTPUT, "> :encoding(euc-jp)", $outfile) + || die "Can't open > $output for writing: $!"; + while () { # auto decodes $_ + print OUTPUT; # auto encodes $_ + } + close(INPUT) || die "can't close $infile: $!"; + close(OUTPUT) || die "can't close $outfile: $!"; + + ### Version 2 via from_to() + open(INPUT, "< :raw", $infile) + || die "Can't open < $infile for reading: $!"; + open(OUTPUT, "> :raw", $outfile) + || die "Can't open > $output for writing: $!"; + + while () { + from_to($_, "shiftjis", "euc-jp", 1); # switch encoding + print OUTPUT; # emit raw (but properly encoded) data + } + close(INPUT) || die "can't close $infile: $!"; + close(OUTPUT) || die "can't close $outfile: $!"; + +In the first version above, you let the appropriate encoding layer +handle the conversion. In the second, you explicitly translate +from one encoding to the other. + +Unfortunately, it may be that encodings are not C-savvy. You can check +to see whether your encoding is supported by C by invoking the +C method on it: + + Encode::perlio_ok("hz"); # false + find_encoding("euc-cn")->perlio_ok; # true wherever PerlIO is available + + use Encode qw(perlio_ok); # imported upon request + perlio_ok("euc-jp") + +Fortunately, all encodings that come with C core are C-savvy +except for C and C. For the gory details, see +L and L. + +=head1 Handling Malformed Data + +The optional I argument tells C what to do when +encountering malformed data. Without I, C +(== 0) is assumed. + +As of version 2.12, C supports coderef values for C; +see below. + +B Not all encodings support this feature. +Some encodings ignore the I argument. For example, +L ignores I and it always croaks on error. + +=head2 List of I values + +=head3 FB_DEFAULT + + I = Encode::FB_DEFAULT ( == 0) + +If I is 0, encoding and decoding replace any malformed character +with a I. When you encode, I is used. +When you decode, the Unicode REPLACEMENT CHARACTER, code point U+FFFD, is +used. If the data is supposed to be UTF-8, an optional lexical warning of +warning category C<"utf8"> is given. + +=head3 FB_CROAK + + I = Encode::FB_CROAK ( == 1) + +If I is 1, methods immediately die with an error +message. Therefore, when I is 1, you should trap +exceptions with C, unless you really want to let it C. + +=head3 FB_QUIET + + I = Encode::FB_QUIET + +If I is set to C, encoding and decoding immediately +return the portion of the data that has been processed so far when an +error occurs. The data argument is overwritten with everything +after that point; that is, the unprocessed portion of the data. This is +handy when you have to call C repeatedly in the case where your +source data may contain partial multi-byte character sequences, +(that is, you are reading with a fixed-width buffer). Here's some sample +code to do exactly that: + + my($buffer, $string) = ("", ""); + while (read($fh, $buffer, 256, length($buffer))) { + $string .= decode($encoding, $buffer, Encode::FB_QUIET); + # $buffer now contains the unprocessed partial character + } + +=head3 FB_WARN + + I = Encode::FB_WARN + +This is the same as C above, except that instead of being silent +on errors, it issues a warning. This is handy for when you are debugging. + +=head3 FB_PERLQQ FB_HTMLCREF FB_XMLCREF + +=over 2 + +=item perlqq mode (I = Encode::FB_PERLQQ) + +=item HTML charref mode (I = Encode::FB_HTMLCREF) + +=item XML charref mode (I = Encode::FB_XMLCREF) + +=back + +For encodings that are implemented by the C module, C C<==> +C puts C and C into C fallback mode. + +When you decode, C<\xI> is inserted for a malformed character, where +I is the hex representation of the octet that could not be decoded to +utf8. When you encode, C<\x{I}> will be inserted, where I is +the Unicode code point (in any number of hex digits) of the character that +cannot be found in the character repertoire of the encoding. + +The HTML/XML character reference modes are about the same. In place of +C<\x{I}>, HTML uses C<&#I;> where I is a decimal number, and +XML uses C<&#xI;> where I is the hexadecimal number. + +In C 2.10 or later, C is also implied. + +=head3 The bitmask + +These modes are all actually set via a bitmask. Here is how the C> +constants are laid out. You can import the C> constants via +C, and you can import the generic bitmask +constants via C. + + FB_DEFAULT FB_CROAK FB_QUIET FB_WARN FB_PERLQQ + DIE_ON_ERR 0x0001 X + WARN_ON_ERR 0x0002 X + RETURN_ON_ERR 0x0004 X X + LEAVE_SRC 0x0008 X + PERLQQ 0x0100 X + HTMLCREF 0x0200 + XMLCREF 0x0400 + +=head3 LEAVE_SRC + + Encode::LEAVE_SRC + +If the C bit is I set but I is set, then the +source string to encode() or decode() will be overwritten in place. +If you're not interested in this, then bitwise-OR it with the bitmask. + +=head2 coderef for CHECK + +As of C 2.12, C can also be a code reference which takes the +ordinal value of the unmapped character as an argument and returns +octets that represent the fallback character. For instance: + + $ascii = encode("ascii", $utf8, sub{ sprintf "", shift }); + +Acts like C but U+I is used instead of C<\x{I}>. + +Fallback for C must return decoded string (sequence of characters) +and takes a list of ordinal values as its arguments. So for +example if you wish to decode octets as UTF-8, and use ISO-8859-15 as +a fallback for bytes that are not valid UTF-8, you could write + + $str = decode 'UTF-8', $octets, sub { + my $tmp = join '', map chr, @_; + return decode 'ISO-8859-15', $tmp; + }; + +=head1 Defining Encodings + +To define a new encoding, use: + + use Encode qw(define_encoding); + define_encoding($object, CANONICAL_NAME [, alias...]); + +I will be associated with I<$object>. The object +should provide the interface described in L. +If more than two arguments are provided, additional +arguments are considered aliases for I<$object>. + +See L for details. + +=head1 The UTF8 flag + +Before the introduction of Unicode support in Perl, The C operator +just compared the strings represented by two scalars. Beginning with +Perl 5.8, C compares two strings with simultaneous consideration of +I. To explain why we made it so, I quote from page 402 of +I + +=over 2 + +=item Goal #1: + +Old byte-oriented programs should not spontaneously break on the old +byte-oriented data they used to work on. + +=item Goal #2: + +Old byte-oriented programs should magically start working on the new +character-oriented data when appropriate. + +=item Goal #3: + +Programs should run just as fast in the new character-oriented mode +as in the old byte-oriented mode. + +=item Goal #4: + +Perl should remain one language, rather than forking into a +byte-oriented Perl and a character-oriented Perl. + +=back + +When I was written, not even Perl 5.6.0 had been +born yet, many features documented in the book remained unimplemented for a +long time. Perl 5.8 corrected much of this, and the introduction of the +UTF8 flag is one of them. You can think of there being two fundamentally +different kinds of strings and string-operations in Perl: one a +byte-oriented mode for when the internal UTF8 flag is off, and the other a +character-oriented mode for when the internal UTF8 flag is on. + +This UTF8 flag is not visible in Perl scripts, exactly for the same reason +you cannot (or rather, you I) see whether a scalar contains +a string, an integer, or a floating-point number. But you can still peek +and poke these if you will. See the next section. + +=head2 Messing with Perl's Internals + +The following API uses parts of Perl's internals in the current +implementation. As such, they are efficient but may change in a future +release. + +=head3 is_utf8 + + is_utf8(STRING [, CHECK]) + +[INTERNAL] Tests whether the UTF8 flag is turned on in the I. +If I is true, also checks whether I contains well-formed +UTF-8. Returns true if successful, false otherwise. + +Typically only necessary for debugging and testing. Don't use this flag as +a marker to distinguish character and binary data, that should be decided +for each variable when you write your code. + +B: If I has UTF8 flag set, it does B mean that +I is UTF-8 encoded and vice-versa. + +As of Perl 5.8.1, L also has the C function. + +=head3 _utf8_on + + _utf8_on(STRING) + +[INTERNAL] Turns the I's internal UTF8 flag B. The I +is I checked for containing only well-formed UTF-8. Do not use this +unless you I that the STRING holds only +well-formed UTF-8. Returns the previous state of the UTF8 flag (so please +don't treat the return value as indicating success or failure), or C +if I is not a string. + +B: For security reasons, this function does not work on tainted values. + +=head3 _utf8_off + + _utf8_off(STRING) + +[INTERNAL] Turns the I's internal UTF8 flag B. Do not use +frivolously. Returns the previous state of the UTF8 flag, or C if +I is not a string. Do not treat the return value as indicative of +success or failure, because that isn't what it means: it is only the +previous setting. + +B: For security reasons, this function does not work on tainted values. + +=head1 UTF-8 vs. utf8 vs. UTF8 + + ....We now view strings not as sequences of bytes, but as sequences + of numbers in the range 0 .. 2**32-1 (or in the case of 64-bit + computers, 0 .. 2**64-1) -- Programming Perl, 3rd ed. + +That has historically been Perl's notion of UTF-8, as that is how UTF-8 was +first conceived by Ken Thompson when he invented it. However, thanks to +later revisions to the applicable standards, official UTF-8 is now rather +stricter than that. For example, its range is much narrower (0 .. 0x10_FFFF +to cover only 21 bits instead of 32 or 64 bits) and some sequences +are not allowed, like those used in surrogate pairs, the 31 non-character +code points 0xFDD0 .. 0xFDEF, the last two code points in I plane +(0xI_FFFE and 0xI_FFFF), all non-shortest encodings, etc. + +The former default in which Perl would always use a loose interpretation of +UTF-8 has now been overruled: + + From: Larry Wall + Date: December 04, 2004 11:51:58 JST + To: perl-unicode@perl.org + Subject: Re: Make Encode.pm support the real UTF-8 + Message-Id: <20041204025158.GA28754@wall.org> + + On Fri, Dec 03, 2004 at 10:12:12PM +0000, Tim Bunce wrote: + : I've no problem with 'utf8' being perl's unrestricted uft8 encoding, + : but "UTF-8" is the name of the standard and should give the + : corresponding behaviour. + + For what it's worth, that's how I've always kept them straight in my + head. + + Also for what it's worth, Perl 6 will mostly default to strict but + make it easy to switch back to lax. + + Larry + +Got that? As of Perl 5.8.7, B<"UTF-8"> means UTF-8 in its current +sense, which is conservative and strict and security-conscious, whereas +B<"utf8"> means UTF-8 in its former sense, which was liberal and loose and +lax. C version 2.10 or later thus groks this subtle but critically +important distinction between C<"UTF-8"> and C<"utf8">. + + encode("utf8", "\x{FFFF_FFFF}", 1); # okay + encode("UTF-8", "\x{FFFF_FFFF}", 1); # croaks + +In the C module, C<"UTF-8"> is actually a canonical name for +C<"utf-8-strict">. That hyphen between the C<"UTF"> and the C<"8"> is +critical; without it, C goes "liberal" and (perhaps overly-)permissive: + + find_encoding("UTF-8")->name # is 'utf-8-strict' + find_encoding("utf-8")->name # ditto. names are case insensitive + find_encoding("utf_8")->name # ditto. "_" are treated as "-" + find_encoding("UTF8")->name # is 'utf8'. + +Perl's internal UTF8 flag is called "UTF8", without a hyphen. It indicates +whether a string is internally encoded as "utf8", also without a hyphen. + +=head1 SEE ALSO + +L, +L, +L, +L, +L, +L, +L, L, L, L +L, +the Perl Unicode Mailing List L + +=head1 MAINTAINER + +This project was originated by the late Nick Ing-Simmons and later +maintained by Dan Kogai I<< >>. See AUTHORS +for a full list of people involved. For any questions, send mail to +I<< >> so that we can all share. + +While Dan Kogai retains the copyright as a maintainer, credit +should go to all those involved. See AUTHORS for a list of those +who submitted code to the project. + +=head1 COPYRIGHT + +Copyright 2002-2014 Dan Kogai I<< >>. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut diff --git a/Encode.xs b/Encode.xs new file mode 100644 index 0000000..774c2b1 --- /dev/null +++ b/Encode.xs @@ -0,0 +1,1160 @@ +/* + $Id: Encode.xs,v 2.43 2018/02/21 12:14:33 dankogai Exp dankogai $ + */ + +#define PERL_NO_GET_CONTEXT +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "encode.h" +#include "def_t.h" + +# define PERLIO_MODNAME "PerlIO::encoding" +# define PERLIO_FILENAME "PerlIO/encoding.pm" + +/* set 1 or more to profile. t/encoding.t dumps core because of + Perl_warner and PerlIO don't work well */ +#define ENCODE_XS_PROFILE 0 + +/* set 0 to disable floating point to calculate buffer size for + encode_method(). 1 is recommended. 2 restores NI-S original */ +#define ENCODE_XS_USEFP 1 + +#define UNIMPLEMENTED(x,y) static y x (SV *sv, char *encoding) { \ + Perl_croak_nocontext("panic_unimplemented"); \ + PERL_UNUSED_VAR(sv); \ + PERL_UNUSED_VAR(encoding); \ + return (y)0; /* fool picky compilers */ \ + } +/**/ + +UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) +UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) + +#ifndef SvIV_nomg +#define SvIV_nomg SvIV +#endif + +#ifndef UTF8_DISALLOW_ILLEGAL_INTERCHANGE +# define UTF8_DISALLOW_ILLEGAL_INTERCHANGE 0 +# define UTF8_ALLOW_NON_STRICT (UTF8_ALLOW_FE_FF|UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF) +#else +# define UTF8_ALLOW_NON_STRICT 0 +#endif + +static void +Encode_XSEncoding(pTHX_ encode_t * enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding", G_DISCARD); + SvREFCNT_dec(sv); +} + +static void +call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) +{ + /* Exists for breakpointing */ + PERL_UNUSED_VAR(routine); + PERL_UNUSED_VAR(done); + PERL_UNUSED_VAR(dest); + PERL_UNUSED_VAR(orig); +} + +static void +utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) +{ + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen)); + SvUTF8_on(tmp); + if (SvTAINTED(*src)) + SvTAINTED_on(tmp); + *src = tmp; + *s = (U8 *)SvPVX(*src); + } + if (*slen) { + if (!utf8_to_bytes(*s, slen)) + croak("Wide character"); + SvCUR_set(*src, *slen); + } + SvUTF8_off(*src); +} + +static void +utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) +{ + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen)); + if (SvTAINTED(*src)) + SvTAINTED_on(tmp); + *src = tmp; + } + sv_utf8_upgrade_nomg(*src); + *s = (U8 *)SvPV_nomg(*src, *slen); +} + +#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" +#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" +#define ERR_DECODE_STR_NOMAP "%s \"%s\" does not map to Unicode" + +static SV * +do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) +{ + dSP; + int argc; + SV *retval; + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVuv(ch))); + PUTBACK; + argc = call_sv(fallback_cb, G_SCALAR); + SPAGAIN; + if (argc != 1){ + croak("fallback sub must return scalar!"); + } + retval = POPs; + SvREFCNT_inc(retval); + PUTBACK; + FREETMPS; + LEAVE; + return retval; +} + +static SV * +do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb) +{ + dSP; + int argc; + STRLEN i; + SV *retval; + ENTER; + SAVETMPS; + PUSHMARK(sp); + for (i=0; i *offset){ /* safeguard against slen overflow */ + slen -= *offset; + }else{ + slen = 0; + } + tlen = slen; + } + + if (slen == 0){ + SvCUR_set(dst, 0); + SvPOK_only(dst); + goto ENCODE_END; + } + + while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, + trm, trmlen)) ) + { + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); + + if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || + code == ENCODE_FOUND_TERM) { + break; + } + switch (code) { + case ENCODE_NOSPACE: + { + STRLEN more = 0; /* make sure you initialize! */ + STRLEN sleft; + sdone += slen; + ddone += dlen; + sleft = tlen - sdone; +#if ENCODE_XS_PROFILE >= 2 + Perl_warn(aTHX_ + "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", + more, sdone, sleft, SvLEN(dst)); +#endif + if (sdone != 0) { /* has src ever been processed ? */ +#if ENCODE_XS_USEFP == 2 + more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone + - SvLEN(dst); +#elif ENCODE_XS_USEFP + more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); +#else + /* safe until SvLEN(dst) == MAX_INT/16 */ + more = (16*SvLEN(dst)+1)/sdone/16 * sleft; +#endif + } + more += UTF8_MAXLEN; /* insurance policy */ + d = (U8 *) SvGROW(dst, SvLEN(dst) + more); + /* dst need to grow need MORE bytes! */ + if (ddone >= SvLEN(dst)) { + Perl_croak(aTHX_ "Destination couldn't be grown."); + } + dlen = SvLEN(dst)-ddone-1; + d += ddone; + s += slen; + slen = tlen-sdone; + continue; + } + + case ENCODE_NOREP: + /* encoding */ + if (dir == enc->f_utf8) { + STRLEN clen; + UV ch = + utf8n_to_uvuni(s+slen, (tlen-sdone-slen), + &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); + /* if non-representable multibyte prefix at end of current buffer - break*/ + if (clen > tlen - sdone - slen) break; + if (check & ENCODE_DIE_ON_ERR) { + Perl_croak(aTHX_ ERR_ENCODE_NOMAP, + (UV)ch, enc->name[0]); + return &PL_sv_undef; /* never reaches but be safe */ + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); + } + if (check & ENCODE_RETURN_ON_ERR){ + goto ENCODE_SET_SRC; + } + if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + STRLEN sublen; + char *substr; + SV* subchar = + (fallback_cb != &PL_sv_undef) + ? do_fallback_cb(aTHX_ ch, fallback_cb) + : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" : + check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : + "&#x%" UVxf ";", (UV)ch); + substr = SvPV(subchar, sublen); + if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ + SvREFCNT_dec(subchar); + croak("Wide character"); + } + sdone += slen + clen; + ddone += dlen + sublen; + sv_catpvn(dst, substr, sublen); + SvREFCNT_dec(subchar); + } else { + /* fallback char */ + sdone += slen + clen; + ddone += dlen + enc->replen; + sv_catpvn(dst, (char*)enc->rep, enc->replen); + } + } + /* decoding */ + else { + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak(aTHX_ ERR_DECODE_NOMAP, + enc->name[0], (UV)s[slen]); + return &PL_sv_undef; /* never reaches but be safe */ + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner( + aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, + enc->name[0], (UV)s[slen]); + } + if (check & ENCODE_RETURN_ON_ERR){ + goto ENCODE_SET_SRC; + } + if (check & + (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + STRLEN sublen; + char *substr; + SV* subchar = + (fallback_cb != &PL_sv_undef) + ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) + : newSVpvf("\\x%02" UVXf, (UV)s[slen]); + substr = SvPVutf8(subchar, sublen); + sdone += slen + 1; + ddone += dlen + sublen; + sv_catpvn(dst, substr, sublen); + SvREFCNT_dec(subchar); + } else { + sdone += slen + 1; + ddone += dlen + strlen(FBCHAR_UTF8); + sv_catpvn(dst, FBCHAR_UTF8, strlen(FBCHAR_UTF8)); + } + } + /* settle variables when fallback */ + d = (U8 *)SvEND(dst); + dlen = SvLEN(dst) - ddone - 1; + s = (U8*)SvPVX(src) + sdone; + slen = tlen - sdone; + break; + + default: + Perl_croak(aTHX_ "Unexpected code %d converting %s %s", + code, (dir == enc->f_utf8) ? "to" : "from", + enc->name[0]); + return &PL_sv_undef; + } + } /* End of looping through the string */ + ENCODE_SET_SRC: + if (check && !(check & ENCODE_LEAVE_SRC)){ + sdone = SvCUR(src) - (slen+sdone); + if (sdone) { + sv_setpvn(src, (char*)s+slen, sdone); + } + SvCUR_set(src, sdone); + SvSETMAGIC(src); + } + /* warn("check = 0x%X, code = 0x%d\n", check, code); */ + + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); + +#if ENCODE_XS_PROFILE + if (SvCUR(dst) > SvCUR(src)){ + Perl_warn(aTHX_ + "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", + SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), + (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); + } +#endif + + if (offset) + *offset += sdone + slen; + + ENCODE_END: + *SvEND(dst) = '\0'; + if (retcode) *retcode = code; + return dst; +} + +static bool +strict_utf8(pTHX_ SV* sv) +{ + HV* hv; + SV** svp; + sv = SvRV(sv); + if (!sv || SvTYPE(sv) != SVt_PVHV) + return 0; + hv = (HV*)sv; + svp = hv_fetch(hv, "strict_utf8", 11, 0); + if (!svp) + return 0; + return SvTRUE(*svp); +} + +/* Modern perls have the capability to do this more efficiently and portably */ +#ifdef utf8n_to_uvchr_msgs +# define CAN_USE_BASE_PERL +#endif + +#ifndef CAN_USE_BASE_PERL + +/* + * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126 + */ +#ifndef UNICODE_IS_NONCHAR +#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE) +#endif + +#ifndef UNICODE_IS_SUPER +#define UNICODE_IS_SUPER(c) (c > PERL_UNICODE_MAX) +#endif + +#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c)) + +#ifndef UTF_ACCUMULATION_OVERFLOW_MASK +#ifndef CHARBITS +#define CHARBITS CHAR_BIT +#endif +#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT)) +#endif + +/* + * Convert non strict utf8 sequence of len >= 2 to unicode codepoint + */ +static UV +convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) +{ + UV uv; + U8 *ptr = s; + bool overflowed = 0; + + uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s)); + + len--; + s++; + + while (len--) { + if (!UTF8_IS_CONTINUATION(*s)) { + *rlen = s-ptr; + return 0; + } + if (uv & UTF_ACCUMULATION_OVERFLOW_MASK) + overflowed = 1; + uv = UTF8_ACCUMULATE(uv, *s); + s++; + } + + *rlen = s-ptr; + + if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) { + return 0; + } + + return uv; +} + +#endif /* CAN_USE_BASE_PERL */ + +static U8* +process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, + bool encode, bool strict, bool stop_at_partial) +{ + /* Copies the purportedly UTF-8 encoded string starting at 's' and ending + * at 'e' - 1 to 'dst', checking as it goes along that the string actually + * is valid UTF-8. There are two levels of strictness checking. If + * 'strict' is FALSE, the string is checked for being well-formed UTF-8, as + * extended by Perl. Additionally, if 'strict' is TRUE, above-Unicode code + * points, surrogates, and non-character code points are checked for. When + * invalid input is encountered, some action is taken, exactly what depends + * on the flags in 'check_sv'. 'encode' gives if this is from an encode + * operation (if TRUE), or a decode one. This function returns the + * position in 's' of the start of the next character beyond where it got + * to. If there were no problems, that will be 'e'. If 'stop_at_partial' + * is TRUE, if the final character before 'e' is incomplete, but valid as + * far as is available, no action will be taken on that partial character, + * and the return value will point to its first byte */ + + UV uv; + STRLEN ulen; + SV *fallback_cb; + int check; + U8 *d; + STRLEN dlen; + char esc[UTF8_MAXLEN * 6 + 1]; + STRLEN i; + const U32 flags = (strict) + ? UTF8_DISALLOW_ILLEGAL_INTERCHANGE + : UTF8_ALLOW_NON_STRICT; + + if (SvROK(check_sv)) { + /* croak("UTF-8 decoder doesn't support callback CHECK"); */ + fallback_cb = check_sv; + check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */ + } + else { + fallback_cb = &PL_sv_undef; + check = SvIV_nomg(check_sv); + } + + SvPOK_only(dst); + SvCUR_set(dst,0); + + dlen = (s && e && s < e) ? e-s+1 : 1; + d = (U8 *) SvGROW(dst, dlen); + + stop_at_partial = stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL); + + while (s < e) { + +#ifdef CAN_USE_BASE_PERL /* Use the much faster, portable implementation if + available */ + + /* If there were no errors, this will be 'e'; otherwise it will point + * to the first byte of the erroneous input */ + const U8* e_or_where_failed; + bool valid = is_utf8_string_loc_flags(s, e - s, &e_or_where_failed, flags); + STRLEN len = e_or_where_failed - s; + + /* Copy as far as was successful */ + Move(s, d, len, U8); + d += len; + s = (U8 *) e_or_where_failed; + + /* Are done if it was valid, or we are accepting partial characters and + * the only error is that the final bytes form a partial character */ + if ( LIKELY(valid) + || ( stop_at_partial + && is_utf8_valid_partial_char_flags(s, e, flags))) + { + break; + } + + /* Here, was not valid. If is 'strict', and is legal extended UTF-8, + * we know it is a code point whose value we can calculate, just not + * one accepted under strict. Otherwise, it is malformed in some way. + * In either case, the system function can calculate either the code + * point, or the best substitution for it */ + uv = utf8n_to_uvchr(s, e - s, &ulen, UTF8_ALLOW_ANY); + +#else /* Use code for earlier perls */ + + ((void)sizeof(flags)); /* Avoid compiler warning */ + + if (UTF8_IS_INVARIANT(*s)) { + *d++ = *s++; + continue; + } + + uv = 0; + ulen = 1; + if (! UTF8_IS_CONTINUATION(*s)) { + /* Not an invariant nor a continuation; must be a start byte. (We + * can't test for UTF8_IS_START as that excludes things like \xC0 + * which are start bytes, but always lead to overlongs */ + + U8 skip = UTF8SKIP(s); + if ((s + skip) > e) { + /* just calculate ulen, in pathological cases can be smaller then e-s */ + if (e-s >= 2) + convert_utf8_multi_seq(s, e-s, &ulen); + else + ulen = 1; + + if (stop_at_partial && ulen == (STRLEN)(e-s)) + break; + + goto malformed_byte; + } + + uv = convert_utf8_multi_seq(s, skip, &ulen); + if (uv == 0) + goto malformed_byte; + else if (strict && !UNICODE_IS_STRICT(uv)) + goto malformed; + + + /* Whole char is good */ + memcpy(d, s, skip); + d += skip; + s += skip; + continue; + } + + /* If we get here there is something wrong with alleged UTF-8 */ + /* uv is used only when encoding */ + malformed_byte: + if (uv == 0) + uv = (UV)*s; + if (encode || ulen == 0) + ulen = 1; + + malformed: + +#endif /* The two versions for processing come back together here, for the + * error handling code. + * + * Here, we are looping through the input and found an error. + * 'uv' is the code point in error if calculable, or the REPLACEMENT + * CHARACTER if not. + * 'ulen' is how many bytes of input this iteration of the loop + * consumes */ + + if (!encode && (check & (ENCODE_DIE_ON_ERR|ENCODE_WARN_ON_ERR|ENCODE_PERLQQ))) + for (i=0; i0?slen:1)); /* newSV() abhors 0 -- inaba */ + s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); + + /* Clear out translated part of source unless asked not to */ + if (modify) { + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + SvSETMAGIC(src); + } + SvUTF8_on(dst); + if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ + ST(0) = dst; + XSRETURN(1); + +void +Method_encode(obj,src,check_sv = &PL_sv_no) +SV * obj +SV * src +SV * check_sv +PREINIT: + STRLEN slen; + U8 *s; + U8 *e; + SV *dst; + int check; + bool modify; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + modify = (check && !(check & ENCODE_LEAVE_SRC)); +PPCODE: + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); + e = s+slen; + dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ + if (SvUTF8(src)) { + /* Already encoded */ + if (strict_utf8(aTHX_ obj)) { + s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0); + } + else { + /* trust it and just copy the octets */ + sv_setpvn(dst,(char *)s,(e-s)); + s = e; + } + } + else { + /* Native bytes - can always encode */ + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ + while (s < e) { +#ifdef append_utf8_from_native_byte + append_utf8_from_native_byte(*s, &d); + s++; +#else + UV uv = NATIVE_TO_UNI((UV) *s); + s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ + if (UNI_IS_INVARIANT(uv)) + *d++ = (U8)UTF_TO_NATIVE(uv); + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); + } +#endif + } + SvCUR_set(dst, d- (U8 *)SvPVX(dst)); + *SvEND(dst) = '\0'; + } + + /* Clear out translated part of source unless asked not to */ + if (modify) { + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + SvSETMAGIC(src); + } + SvPOK_only(dst); + SvUTF8_off(dst); + if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ + ST(0) = dst; + XSRETURN(1); + +MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ + +PROTOTYPES: DISABLE + +SV * +Method_renew(obj) +SV * obj +CODE: + PERL_UNUSED_VAR(obj); + RETVAL = newSVsv(obj); +OUTPUT: + RETVAL + +int +Method_renewed(obj) +SV * obj +CODE: + RETVAL = 0; + PERL_UNUSED_VAR(obj); +OUTPUT: + RETVAL + +SV * +Method_name(obj) +SV * obj +PREINIT: + encode_t *enc; +INIT: + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); +CODE: + RETVAL = newSVpvn(enc->name[0], strlen(enc->name[0])); +OUTPUT: + RETVAL + +bool +Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) +SV * obj +SV * dst +SV * src +SV * off +SV * term +SV * check_sv +PREINIT: + int check; + SV *fallback_cb; + bool modify; + encode_t *enc; + STRLEN offset; + int code = 0; + U8 *s; + STRLEN slen; + SV *tmp; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + modify = (check && !(check & ENCODE_LEAVE_SRC)); + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + offset = (STRLEN)SvIV(off); +CODE: + if (!SvOK(src)) + XSRETURN_NO; + s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + tmp = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, + &offset, term, &code, fallback_cb); + sv_catsv(dst, tmp); + SvREFCNT_dec(tmp); + SvIV_set(off, (IV)offset); + RETVAL = (code == ENCODE_FOUND_TERM); +OUTPUT: + RETVAL + +SV * +Method_decode(obj,src,check_sv = &PL_sv_no) +SV * obj +SV * src +SV * check_sv +PREINIT: + int check; + SV *fallback_cb; + bool modify; + encode_t *enc; + U8 *s; + STRLEN slen; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + modify = (check && !(check & ENCODE_LEAVE_SRC)); + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); +CODE: + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, + NULL, Nullsv, NULL, fallback_cb); + SvUTF8_on(RETVAL); +OUTPUT: + RETVAL + +SV * +Method_encode(obj,src,check_sv = &PL_sv_no) +SV * obj +SV * src +SV * check_sv +PREINIT: + int check; + SV *fallback_cb; + bool modify; + encode_t *enc; + U8 *s; + STRLEN slen; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + modify = (check && !(check & ENCODE_LEAVE_SRC)); + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); +CODE: + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); + if (!SvUTF8(src)) + utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify); + RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check, + NULL, Nullsv, NULL, fallback_cb); +OUTPUT: + RETVAL + +bool +Method_needs_lines(obj) +SV * obj +CODE: + PERL_UNUSED_VAR(obj); + RETVAL = FALSE; +OUTPUT: + RETVAL + +bool +Method_perlio_ok(obj) +SV * obj +PREINIT: + SV *sv; +CODE: + PERL_UNUSED_VAR(obj); + sv = eval_pv("require PerlIO::encoding", 0); + RETVAL = SvTRUE(sv); +OUTPUT: + RETVAL + +SV * +Method_mime_name(obj) +SV * obj +PREINIT: + encode_t *enc; +INIT: + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); +CODE: + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); + PUTBACK; + call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); + SPAGAIN; + RETVAL = newSVsv(POPs); + PUTBACK; + FREETMPS; + LEAVE; +OUTPUT: + RETVAL + +MODULE = Encode PACKAGE = Encode + +PROTOTYPES: ENABLE + +I32 +_bytes_to_utf8(sv, ...) +SV * sv +PREINIT: + SV * encoding; +INIT: + encoding = items == 2 ? ST(1) : Nullsv; +CODE: + if (encoding) + RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); + else { + STRLEN len; + U8* s = (U8*)SvPV(sv, len); + U8* converted; + + converted = bytes_to_utf8(s, &len); /* This allocs */ + sv_setpvn(sv, (char *)converted, len); + SvUTF8_on(sv); /* XXX Should we? */ + Safefree(converted); /* ... so free it */ + RETVAL = len; + } +OUTPUT: + RETVAL + +I32 +_utf8_to_bytes(sv, ...) +SV * sv +PREINIT: + SV * to; + SV * check; +INIT: + to = items > 1 ? ST(1) : Nullsv; + check = items > 2 ? ST(2) : Nullsv; +CODE: + if (to) { + RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); + } else { + STRLEN len; + U8 *s = (U8*)SvPV(sv, len); + + RETVAL = 0; + if (SvTRUE(check)) { + /* Must do things the slow way */ + U8 *dest; + /* We need a copy to pass to check() */ + U8 *src = s; + U8 *send = s + len; + U8 *d0; + + New(83, dest, len, U8); /* I think */ + d0 = dest; + + while (s < send) { + if (*s < 0x80){ + *dest++ = *s++; + } else { + STRLEN ulen; + UV uv = *s++; + + /* Have to do it all ourselves because of error routine, + aargh. */ + if (!(uv & 0x40)){ goto failure; } + if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } + else if (!(uv & 0x01)) { ulen = 7; uv = 0; } + else { ulen = 13; uv = 0; } + + /* Note change to utf8.c variable naming, for variety */ + while (ulen--) { + if ((*s & 0xc0) != 0x80){ + goto failure; + } else { + uv = (uv << 6) | (*s++ & 0x3f); + } + } + if (uv > 256) { + failure: + call_failure(check, s, dest, src); + /* Now what happens? */ + } + *dest++ = (U8)uv; + } + } + RETVAL = dest - d0; + sv_usepvn(sv, (char *)dest, RETVAL); + SvUTF8_off(sv); + } else { + RETVAL = (utf8_to_bytes(s, &len) ? len : 0); + } + } +OUTPUT: + RETVAL + +bool +is_utf8(sv, check = 0) +SV * sv +int check +PREINIT: + char *str; + STRLEN len; +CODE: + SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */ + str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */ + RETVAL = SvUTF8(sv) ? TRUE : FALSE; + if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len))) + RETVAL = FALSE; +OUTPUT: + RETVAL + +SV * +_utf8_on(sv) +SV * sv +CODE: + SvGETMAGIC(sv); + if (!SvTAINTED(sv) && SvPOKp(sv)) { + if (SvTHINKFIRST(sv)) sv_force_normal(sv); + RETVAL = boolSV(SvUTF8(sv)); + SvUTF8_on(sv); + SvSETMAGIC(sv); + } else { + RETVAL = &PL_sv_undef; + } +OUTPUT: + RETVAL + +SV * +_utf8_off(sv) +SV * sv +CODE: + SvGETMAGIC(sv); + if (!SvTAINTED(sv) && SvPOKp(sv)) { + if (SvTHINKFIRST(sv)) sv_force_normal(sv); + RETVAL = boolSV(SvUTF8(sv)); + SvUTF8_off(sv); + SvSETMAGIC(sv); + } else { + RETVAL = &PL_sv_undef; + } +OUTPUT: + RETVAL + +void +onBOOT() +CODE: +{ +#include "def_t.exh" +} + +BOOT: +{ + HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD); + newCONSTSUB(stash, "DIE_ON_ERR", newSViv(ENCODE_DIE_ON_ERR)); + newCONSTSUB(stash, "WARN_ON_ERR", newSViv(ENCODE_WARN_ON_ERR)); + newCONSTSUB(stash, "RETURN_ON_ERR", newSViv(ENCODE_RETURN_ON_ERR)); + newCONSTSUB(stash, "LEAVE_SRC", newSViv(ENCODE_LEAVE_SRC)); + newCONSTSUB(stash, "PERLQQ", newSViv(ENCODE_PERLQQ)); + newCONSTSUB(stash, "HTMLCREF", newSViv(ENCODE_HTMLCREF)); + newCONSTSUB(stash, "XMLCREF", newSViv(ENCODE_XMLCREF)); + newCONSTSUB(stash, "STOP_AT_PARTIAL", newSViv(ENCODE_STOP_AT_PARTIAL)); + newCONSTSUB(stash, "FB_DEFAULT", newSViv(ENCODE_FB_DEFAULT)); + newCONSTSUB(stash, "FB_CROAK", newSViv(ENCODE_FB_CROAK)); + newCONSTSUB(stash, "FB_QUIET", newSViv(ENCODE_FB_QUIET)); + newCONSTSUB(stash, "FB_WARN", newSViv(ENCODE_FB_WARN)); + newCONSTSUB(stash, "FB_PERLQQ", newSViv(ENCODE_FB_PERLQQ)); + newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF)); + newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF)); +} diff --git a/Encode/Changes.e2x b/Encode/Changes.e2x new file mode 100644 index 0000000..5c67c55 --- /dev/null +++ b/Encode/Changes.e2x @@ -0,0 +1,7 @@ +# +# $Id: Changes.e2x,v 2.0 2004/05/16 20:55:15 dankogai Exp $ +# Revision history for Perl extension Encode::$_Name_. +# + +0.01 $_Now_ + Autogenerated by enc2xs version $_Version_. diff --git a/Encode/ConfigLocal_PM.e2x b/Encode/ConfigLocal_PM.e2x new file mode 100644 index 0000000..e203dfd --- /dev/null +++ b/Encode/ConfigLocal_PM.e2x @@ -0,0 +1,13 @@ +# +# Local demand-load module list +# +# You should not edit this file by hand! use "enc2xs -C" +# +package Encode::ConfigLocal; +our $VERSION = $_LocalVer_; + +use strict; + +$_ModLines_ + +1; diff --git a/Encode/Makefile_PL.e2x b/Encode/Makefile_PL.e2x new file mode 100644 index 0000000..c17a509 --- /dev/null +++ b/Encode/Makefile_PL.e2x @@ -0,0 +1,190 @@ +# +# This file is auto-generated by: +# enc2xs version $_Version_ +# $_Now_ +# +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; +use Config; + +# Please edit the following to the taste! +my $name = '$_Name_'; +my %tables = ( + $_Name__t => [ $_TableFiles_ ], + ); + +#### DO NOT EDIT BEYOND THIS POINT! +require File::Spec; +my ($enc2xs, $encode_h) = (); +my @path_ext = (''); +@path_ext = split(';', $ENV{PATHEXT}) if $^O eq 'MSWin32'; +PATHLOOP: +for my $d (@Config{qw/bin sitebin vendorbin/}, + (split /$Config{path_sep}/o, $ENV{PATH})){ + for my $f (qw/enc2xs enc2xs5.7.3/){ + my $path = File::Spec->catfile($d, $f); + for my $ext (@path_ext) { + my $bin = "$path$ext"; + -r "$bin" and $enc2xs = $bin and last PATHLOOP; + } + } +} +$enc2xs or die "enc2xs not found!"; +print "enc2xs is $enc2xs\n"; +my %encode_h = (); +for my $d (@INC){ + my $dir = File::Spec->catfile($d, "Encode"); + my $file = File::Spec->catfile($dir, "encode.h"); + -f $file and $encode_h{$dir} = -M $file; +} +%encode_h or die "encode.h not found!"; +# find the latest one +($encode_h) = sort {$encode_h{$b} <=> $encode_h{$a}} keys %encode_h; +print "encode.h is at $encode_h\n"; + +WriteMakefile( + INC => "-I$encode_h", +#### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! #### + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + PREREQ_PM => { + 'Encode' => "1.41", + }, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + # The next two lines to make MacPerl Happy -- dankogai via pudge + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + # $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = "."; # $self->catdir('Encode'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + my $ucopts = '-"Q"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/Encode/README.e2x b/Encode/README.e2x new file mode 100644 index 0000000..28a31a6 --- /dev/null +++ b/Encode/README.e2x @@ -0,0 +1,31 @@ +Encode::$_Name_ version 0.1 +======== + +NAME + Encode::$_Name_ - + +SYNOPSIS + use Encode::$_Name_; + # +ABSTRACT + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires perl version 5.7.3 or later. + +COPYRIGHT AND LICENCE + +Copyright (C) 2002 Your Name + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + diff --git a/Encode/_PM.e2x b/Encode/_PM.e2x new file mode 100644 index 0000000..eb59cd1 --- /dev/null +++ b/Encode/_PM.e2x @@ -0,0 +1,23 @@ +package Encode::$_Name_; +our $VERSION = "0.01"; + +use Encode; +use XSLoader; +XSLoader::load(__PACKAGE__,$VERSION); + +1; +__END__ + +=head1 NAME + +Encode::$_Name_ - New Encoding + +=head1 SYNOPSIS + +You got to fill this in! + +=head1 SEE ALSO + +L + +=cut diff --git a/Encode/_T.e2x b/Encode/_T.e2x new file mode 100644 index 0000000..7b9a67e --- /dev/null +++ b/Encode/_T.e2x @@ -0,0 +1,9 @@ +use strict; +# Adjust the number here! +use Test::More tests => 2; + +BEGIN { + use_ok('Encode'); + use_ok('Encode::$_Name_'); +} +# Add more test here! diff --git a/Encode/encode.h b/Encode/encode.h new file mode 100644 index 0000000..df5554f --- /dev/null +++ b/Encode/encode.h @@ -0,0 +1,115 @@ +#ifndef ENCODE_H +#define ENCODE_H + +#ifndef H_PERL +/* check whether we're "in perl" so that we can do data parts without + getting extern references to the code parts +*/ +typedef unsigned char U8; +#endif + +typedef struct encpage_s encpage_t; + +struct encpage_s +{ + /* fields ordered to pack nicely on 32-bit machines */ + const U8 *const seq; /* Packed output sequences we generate + if we match */ + const encpage_t *const next; /* Page to go to if we match */ + const U8 min; /* Min value of octet to match this entry */ + const U8 max; /* Max value of octet to match this entry */ + const U8 dlen; /* destination length - + size of entries in seq */ + const U8 slen; /* source length - + number of source octets needed */ +}; + +/* + At any point in a translation there is a page pointer which points + at an array of the above structures. + + Basic operation : + get octet from source stream. + if (octet >= min && octet < max) { + if slen is 0 then we cannot represent this character. + if we have less than slen octets (including this one) then + we have a partial character. + otherwise + copy dlen octets from seq + dlen*(octet-min) to output + (dlen may be zero if we don't know yet.) + load page pointer with next to continue. + (is slen is one this is end of a character) + get next octet. + } + else { + increment the page pointer to look at next slot in the array + } + + arrays SHALL be constructed so there is an entry which matches + ..0xFF at the end, and either maps it or indicates no + representation. + + if MSB of slen is set then mapping is an approximate "FALLBACK" entry. + +*/ + + +typedef struct encode_s encode_t; +struct encode_s +{ + const encpage_t *const t_utf8; /* Starting table for translation from + the encoding to UTF-8 form */ + const encpage_t *const f_utf8; /* Starting table for translation + from UTF-8 to the encoding */ + const U8 *const rep; /* Replacement character in this + encoding e.g. "?" */ + int replen; /* Number of octets in rep */ + U8 min_el; /* Minimum octets to represent a + character */ + U8 max_el; /* Maximum octets to represent a + character */ + const char *const name[2]; /* name(s) of this encoding */ +}; + +#ifdef H_PERL +/* See comment at top of file for deviousness */ + +extern int do_encode(const encpage_t *enc, const U8 *src, STRLEN *slen, + U8 *dst, STRLEN dlen, STRLEN *dout, int approx, + const U8 *term, STRLEN tlen); + +extern void Encode_DefineEncoding(encode_t *enc); + +#endif /* H_PERL */ + +#define ENCODE_NOSPACE 1 +#define ENCODE_PARTIAL 2 +#define ENCODE_NOREP 3 +#define ENCODE_FALLBACK 4 +#define ENCODE_FOUND_TERM 5 + +/* Use the perl core value if available; it is portable to EBCDIC */ +#ifdef REPLACEMENT_CHARACTER_UTF8 +# define FBCHAR_UTF8 REPLACEMENT_CHARACTER_UTF8 +#else +# define FBCHAR_UTF8 "\xEF\xBF\xBD" +#endif + +#define ENCODE_DIE_ON_ERR 0x0001 /* croaks immediately */ +#define ENCODE_WARN_ON_ERR 0x0002 /* warn on error; may proceed */ +#define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */ +#define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */ +#define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */ +#define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */ +#define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */ +#define ENCODE_STOP_AT_PARTIAL 0x0800 /* stop at partial explicitly */ + +#define ENCODE_FB_DEFAULT 0x0000 +#define ENCODE_FB_CROAK 0x0001 +#define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR +#define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR) +#define ENCODE_FB_PERLQQ (ENCODE_PERLQQ|ENCODE_LEAVE_SRC) +#define ENCODE_FB_HTMLCREF (ENCODE_HTMLCREF|ENCODE_LEAVE_SRC) +#define ENCODE_FB_XMLCREF (ENCODE_XMLCREF|ENCODE_LEAVE_SRC) + +#endif /* ENCODE_H */ diff --git a/JP/JP.pm b/JP/JP.pm new file mode 100644 index 0000000..4251170 --- /dev/null +++ b/JP/JP.pm @@ -0,0 +1,95 @@ +package Encode::JP; +BEGIN { + if ( ord("A") == 193 ) { + die "Encode::JP not supported on EBCDIC\n"; + } +} +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +use Encode::JP::JIS7; + +1; +__END__ + +=head1 NAME + +Encode::JP - Japanese Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $euc_jp = encode("euc-jp", $utf8); # loads Encode::JP implicitly + $utf8 = decode("euc-jp", $euc_jp); # ditto + +=head1 ABSTRACT + +This module implements Japanese charset encodings. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + euc-jp /\beuc.*jp$/i EUC (Extended Unix Character) + /\bjp.*euc/i + /\bujis$/i + shiftjis /\bshift.*jis$/i Shift JIS (aka MS Kanji) + /\bsjis$/i + 7bit-jis /\bjis$/i 7bit JIS + iso-2022-jp ISO-2022-JP [RFC1468] + = 7bit JIS with all Halfwidth Kana + converted to Fullwidth + iso-2022-jp-1 ISO-2022-JP-1 [RFC2237] + = ISO-2022-JP with JIS X 0212-1990 + support. See below + MacJapanese Shift JIS + Apple vendor mappings + cp932 /\bwindows-31j$/i Code Page 932 + = Shift JIS + MS/IBM vendor mappings + jis0201-raw JIS0201, raw format + jis0208-raw JIS0201, raw format + jis0212-raw JIS0201, raw format + -------------------------------------------------------------------- + +=head1 DESCRIPTION + +To find out how to use this module in detail, see L. + +=head1 Note on ISO-2022-JP(-1)? + +ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which +adds support for JIS X 0212-1990. That means you can use the same +code to decode to utf8 but not vice versa. + + $utf8 = decode('iso-2022-jp-1', $stream); + +and + + $utf8 = decode('iso-2022-jp', $stream); + +yield the same result but + + $with_0212 = encode('iso-2022-jp-1', $utf8); + +is now different from + + $without_0212 = encode('iso-2022-jp', $utf8 ); + +In the latter case, characters that map to 0212 are first converted +to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or +'geta mark') then fed to the decoding engine. U+FFFD is not used, +in order to preserve text layout as much as possible. + +=head1 BUGS + +The ASCII region (0x00-0x7f) is preserved for all encodings, even +though this conflicts with mappings by the Unicode Consortium. + +=head1 SEE ALSO + +L + +=cut diff --git a/JP/Makefile.PL b/JP/Makefile.PL new file mode 100644 index 0000000..da1807b --- /dev/null +++ b/JP/Makefile.PL @@ -0,0 +1,176 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; +use strict; + +my %tables = ( + euc_jp_t => ['euc-jp.ucm'], + sjis_t => ['shiftjis.ucm', + 'macJapanese.ucm', + 'cp932.ucm'], + raw_t => [ + qw(jis0201.ucm jis0208.ucm jis0212.ucm) + ], + ); + +unless ($ENV{AGGREGATE_TABLES}){ + my @ucm; + for my $k (keys %tables){ + push @ucm, @{$tables{$k}}; + } + %tables = (); + my $seq = 0; + for my $ucm (sort @ucm){ + # 8.3 compliance ! + my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++); + $tables{$t} = [ $ucm ]; + } +} + +my $name = 'JP'; + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/KR/KR.pm b/KR/KR.pm new file mode 100644 index 0000000..cf2c117 --- /dev/null +++ b/KR/KR.pm @@ -0,0 +1,69 @@ +package Encode::KR; +BEGIN { + if ( ord("A") == 193 ) { + die "Encode::KR not supported on EBCDIC\n"; + } +} +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +use Encode::KR::2022_KR; + +1; +__END__ + +=head1 NAME + +Encode::KR - Korean Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $euc_kr = encode("euc-kr", $utf8); # loads Encode::KR implicitly + $utf8 = decode("euc-kr", $euc_kr); # ditto + +=head1 DESCRIPTION + +This module implements Korean charset encodings. Encodings supported +are as follows. + + + Canonical Alias Description + -------------------------------------------------------------------- + euc-kr /\beuc.*kr$/i EUC (Extended Unix Character) + /\bkr.*euc$/i + ksc5601-raw Korean standard code set (as is) + cp949 /(?:x-)?uhc$/i + /(?:x-)?windows-949$/i + /\bks_c_5601-1987$/i + Code Page 949 (EUC-KR + 8,822 + (additional Hangul syllables) + MacKorean EUC-KR + Apple Vendor Mappings + johab JOHAB A supplementary encoding defined in + Annex 3 of KS X 1001:1998 + iso-2022-kr iso-2022-kr [RFC1557] + -------------------------------------------------------------------- + +To find how to use this module in detail, see L. + +=head1 BUGS + +When you see C on mails and web pages, they really +mean "cp949" encodings. To fix that, the following aliases are set; + + qr/(?:x-)?uhc$/i => '"cp949"' + qr/(?:x-)?windows-949$/i => '"cp949"' + qr/ks_c_5601-1987$/i => '"cp949"' + +The ASCII region (0x00-0x7f) is preserved for all encodings, even +though this conflicts with mappings by the Unicode Consortium. + +=head1 SEE ALSO + +L + +=cut diff --git a/KR/Makefile.PL b/KR/Makefile.PL new file mode 100644 index 0000000..85c056b --- /dev/null +++ b/KR/Makefile.PL @@ -0,0 +1,174 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; +use strict; + +my %tables = (euc_kr_t => ['euc-kr.ucm', + 'macKorean.ucm', + 'cp949.ucm', + ], + '5601_t' => ['ksc5601.ucm'], + johab_t => ['johab.ucm'], + ); + +unless ($ENV{AGGREGATE_TABLES}){ + my @ucm; + for my $k (keys %tables){ + push @ucm, @{$tables{$k}}; + } + %tables = (); + my $seq = 0; + for my $ucm (sort @ucm){ + # 8.3 compliance ! + my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++); + $tables{$t} = [ $ucm ]; + } +} + +my $name = 'KR'; + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..48030db --- /dev/null +++ b/MANIFEST @@ -0,0 +1,224 @@ +AUTHORS List of authors +Byte/Byte.pm Encode extension +Byte/Makefile.PL Encode extension +CN/CN.pm Encode extension +CN/Makefile.PL Encode extension +Changes Change Log +EBCDIC/EBCDIC.pm Encode extension +EBCDIC/Makefile.PL Encode extension +Encode.pm Mother of all Encode extensions +Encode.xs Encode extension +Encode/Changes.e2x Skeleton file for enc2xs +Encode/ConfigLocal_PM.e2x Skeleton file for enc2xs +Encode/Makefile_PL.e2x Skeleton file for enc2xs +Encode/README.e2x Skeleton file for enc2xs +Encode/_PM.e2x Skeleton file for enc2xs +Encode/_T.e2x Skeleton file for enc2xs +Encode/encode.h Encode extension header file +JP/JP.pm Encode extension +JP/Makefile.PL Encode extension +KR/KR.pm Encode extension +KR/Makefile.PL Encode extension +MANIFEST Encode extension +Makefile.PL Encode extension makefile writer +README Encode extension +Symbol/Makefile.PL Encode extension +Symbol/Symbol.pm Encode extension +TW/Makefile.PL Encode extension +TW/TW.pm Encode extension +Unicode/Makefile.PL Encode extension +Unicode/Unicode.pm Encode extension +Unicode/Unicode.xs Encode extension +bin/enc2xs Encode module generator +bin/encguess Guess the encoding of file(s) +bin/piconv iconv by perl +bin/ucm2table Table Generator for testing +bin/ucmlint A UCM Lint utility +bin/ucmsort Sorts UCM lines +bin/unidump Unicode Dump like hexdump(1) +encengine.c Encode extension +encoding.pm Perl Pragmactic Module +lib/Encode/Alias.pm Encode extension +lib/Encode/CJKConstants.pm Encode extension +lib/Encode/CN/HZ.pm Encode extension +lib/Encode/Config.pm Encode configuration module +lib/Encode/Encoder.pm OO Encoder +lib/Encode/Encoding.pm Encode extension +lib/Encode/GSM0338.pm Encode extension +lib/Encode/Guess.pm Encode Extension +lib/Encode/JP/H2Z.pm Encode extension +lib/Encode/JP/JIS7.pm Encode extension +lib/Encode/KR/2022_KR.pm Encode extension +lib/Encode/MIME/Header.pm Encode extension +lib/Encode/MIME/Header/ISO_2022_JP.pm Encode extension +lib/Encode/MIME/Name.pm Encode extension +lib/Encode/PerlIO.pod Documents for Encode & PerlIO +lib/Encode/Supported.pod Documents for supported encodings +lib/Encode/Unicode/UTF7.pm Encode Extension +t/Aliases.t test script +t/CJKT.t test script +t/Encode.t test script +t/Encoder.t test script +t/Mod_EUCJP.pm module that t/enc_module.enc uses +t/Unicode.t test script +t/at-cn.t test script +t/at-tw.t test script +t/big5-eten.enc test data +t/big5-eten.utf test data +t/big5-hkscs.enc test data +t/big5-hkscs.utf test data +t/cow.t test script +t/decode.t test script +t/enc_data.t test script for encoding.pm vs. DATA fh +t/enc_eucjp.t test script +t/enc_module.enc test data for t/enc_module.t +t/enc_module.t test script +t/enc_utf8.t test script +t/encoding.t test script +t/encoding-locale.t test script +t/fallback.t test script +t/from_to.t test script +t/gb2312.enc test data +t/gb2312.utf test data +t/grow.t test script +t/gsm0338.t test script +t/guess.t test script +t/isa.t test script +t/jis7-fallback.t test script +t/jisx0201.enc test data +t/jisx0201.utf test data +t/jisx0208.enc test data +t/jisx0208.utf test data +t/jisx0212.enc test data +t/jisx0212.utf test data +t/jperl.t test script +t/ksc5601.enc test data +t/ksc5601.utf test data +t/magic.t test script +t/mime-header.t test script +t/mime-name.t test script +t/mime_header_iso2022jp.t test script +t/perlio.t test script +t/piconv.t test script +t/rt.pl even more test script +t/rt65541.t test script +t/rt76824.t test script +t/rt85489.t test script +t/rt86327.t test script +t/rt113164.t test script +t/taint.t test script +t/truncated_utf8.t test script +t/undef.t test script +t/unibench.pl benchmark script +t/use-Encode-Alias.t test script +t/utf8messages.t test script +t/utf8ref.t test script +t/utf8strict.t test script +t/utf8warnings.t test script +t/whatwg-aliases.t test script +t/whatwg-aliases.json test data +ucm/8859-1.ucm Unicode Character Map +ucm/8859-10.ucm Unicode Character Map +ucm/8859-11.ucm Unicode Character Map +ucm/8859-13.ucm Unicode Character Map +ucm/8859-14.ucm Unicode Character Map +ucm/8859-15.ucm Unicode Character Map +ucm/8859-16.ucm Unicode Character Map +ucm/8859-2.ucm Unicode Character Map +ucm/8859-3.ucm Unicode Character Map +ucm/8859-4.ucm Unicode Character Map +ucm/8859-5.ucm Unicode Character Map +ucm/8859-6.ucm Unicode Character Map +ucm/8859-7.ucm Unicode Character Map +ucm/8859-8.ucm Unicode Character Map +ucm/8859-9.ucm Unicode Character Map +ucm/adobeStdenc.ucm Unicode Character Map +ucm/adobeSymbol.ucm Unicode Character Map +ucm/adobeZdingbat.ucm Unicode Character Map +ucm/ascii.ucm Unicode Character Map +ucm/big5-eten.ucm Unicode Character Map +ucm/big5-hkscs.ucm Unicode Character Map +ucm/cp037.ucm Unicode Character Map +ucm/cp1006.ucm Unicode Character Map +ucm/cp1026.ucm Unicode Character Map +ucm/cp1047.ucm Unicode Character Map +ucm/cp1250.ucm Unicode Character Map +ucm/cp1251.ucm Unicode Character Map +ucm/cp1252.ucm Unicode Character Map +ucm/cp1253.ucm Unicode Character Map +ucm/cp1254.ucm Unicode Character Map +ucm/cp1255.ucm Unicode Character Map +ucm/cp1256.ucm Unicode Character Map +ucm/cp1257.ucm Unicode Character Map +ucm/cp1258.ucm Unicode Character Map +ucm/cp424.ucm Unicode Character Map +ucm/cp437.ucm Unicode Character Map +ucm/cp500.ucm Unicode Character Map +ucm/cp737.ucm Unicode Character Map +ucm/cp775.ucm Unicode Character Map +ucm/cp850.ucm Unicode Character Map +ucm/cp852.ucm Unicode Character Map +ucm/cp855.ucm Unicode Character Map +ucm/cp856.ucm Unicode Character Map +ucm/cp857.ucm Unicode Character Map +ucm/cp858.ucm Unicode Character Map +ucm/cp860.ucm Unicode Character Map +ucm/cp861.ucm Unicode Character Map +ucm/cp862.ucm Unicode Character Map +ucm/cp863.ucm Unicode Character Map +ucm/cp864.ucm Unicode Character Map +ucm/cp865.ucm Unicode Character Map +ucm/cp866.ucm Unicode Character Map +ucm/cp869.ucm Unicode Character Map +ucm/cp874.ucm Unicode Character Map +ucm/cp875.ucm Unicode Character Map +ucm/cp932.ucm Unicode Character Map +ucm/cp936.ucm Unicode Character Map +ucm/cp949.ucm Unicode Character Map +ucm/cp950.ucm Unicode Character Map +ucm/ctrl.ucm Unicode Character Map +ucm/dingbats.ucm Unicode Character Map +ucm/euc-cn.ucm Unicode Character Map +ucm/euc-jp.ucm Unicode Character Map +ucm/euc-kr.ucm Unicode Character Map +ucm/gb12345.ucm Unicode Character Map +ucm/gb2312.ucm Unicode Character Map +ucm/hp-roman8.ucm Unicode Character Map +ucm/ir-165.ucm Unicode Character Map +ucm/jis0201.ucm Unicode Character Map +ucm/jis0208.ucm Unicode Character Map +ucm/jis0212.ucm Unicode Character Map +ucm/johab.ucm Unicode Character Map +ucm/koi8-f.ucm Unicode Character Map +ucm/koi8-r.ucm Unicode Character Map +ucm/koi8-u.ucm Unicode Character Map +ucm/ksc5601.ucm Unicode Character Map +ucm/macArabic.ucm Unicode Character Map +ucm/macCentEuro.ucm Unicode Character Map +ucm/macChinsimp.ucm Unicode Character Map +ucm/macChintrad.ucm Unicode Character Map +ucm/macCroatian.ucm Unicode Character Map +ucm/macCyrillic.ucm Unicode Character Map +ucm/macDingbats.ucm Unicode Character Map +ucm/macFarsi.ucm Unicode Character Map +ucm/macGreek.ucm Unicode Character Map +ucm/macHebrew.ucm Unicode Character Map +ucm/macIceland.ucm Unicode Character Map +ucm/macJapanese.ucm Unicode Character Map +ucm/macKorean.ucm Unicode Character Map +ucm/macROMnn.ucm Unicode Character Map +ucm/macRUMnn.ucm Unicode Character Map +ucm/macRoman.ucm Unicode Character Map +ucm/macSami.ucm Unicode Character Map +ucm/macSymbol.ucm Unicode Character Map +ucm/macThai.ucm Unicode Character Map +ucm/macTurkish.ucm Unicode Character Map +ucm/macUkraine.ucm Unicode Character Map +ucm/nextstep.ucm Unicode Character Map +ucm/null.ucm Unicode Character Map +ucm/posix-bc.ucm Unicode Character Map +ucm/shiftjis.ucm Unicode Character Map +ucm/symbol.ucm Unicode Character Map +ucm/viscii.ucm Unicode Character Map +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..ce7a73c --- /dev/null +++ b/META.json @@ -0,0 +1,135 @@ +{ + "abstract" : "character encodings in Perl", + "author" : [ + "Dan Kogai " + ], + "dynamic_config" : 1, + "generated_by" : "ExtUtils::MakeMaker version 7.1002, CPAN::Meta::Converter version 2.150005", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Encode", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0", + "Test::More" : "0.81_01" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Exporter" : "5.57", + "Storable" : "0", + "parent" : "0.221" + } + } + }, + "release_status" : "stable", + "resources" : { + "repository" : { + "url" : "https://github.com/dankogai/p5-encode" + } + }, + "version" : "2.97", + "x_contributors" : [ + "Alex Davies ", + "Alex Kapranoff ", + "Alex Vandiver ", + "Andreas J. Koenig ", + "Andrew Pennebaker ", + "Andy Grundman ", + "Anton Tagunov ", + "Autrijus Tang ", + "Benjamin Goldberg ", + "Bjoern Hoehrmann ", + "Bjoern Jacke ", + "bulk88 ", + "Craig A. Berry ", + "Curtis Jewell ", + "Dan Kogai ", + "Dave Evans ", + "David Golden ", + "David Steinbrunner ", + "Deng Liu ", + "Dominic Dunlop ", + "drry", + "Elizabeth Mattijsen ", + "Flavio Poletti ", + "Gerrit P. Haase ", + "Gisle Aas ", + "Graham Barr ", + "Graham Knop ", + "Graham Ollis ", + "Gurusamy Sarathy ", + "H.Merijn Brand ", + "Hugo van der Sanden ", + "chansen ", + "Chris Nandor ", + "Inaba Hiroto ", + "Jarkko Hietaniemi ", + "Jesse Vincent ", + "Jungshik Shin ", + "Karen Etheridge ", + "Karl Williamson ", + "Kenichi Ishigaki ", + "KONNO Hiroharu ", + "Laszlo Molnar ", + "Makamaka ", + "Mark-Jason Dominus ", + "Masahiro Iuchi ", + "MATSUNO Tokuhiro ", + "Mattia Barbon ", + "Michael G Schwern ", + "Michael LaGrasta ", + "Miron Cuperman ", + "Moritz Lenz ", + "MORIYAMA Masayuki ", + "Nick Ing-Simmons ", + "Nicholas Clark ", + "Olivier Mengué ", + "otsune", + "Pali ", + "Paul Marquess ", + "Peter Prymmer ", + "Peter Rabbitson ", + "Philip Newton ", + "Piotr Fusik ", + "Rafael Garcia-Suarez ", + "Randy Stauner ", + "Reini Urban ", + "Robin Barker ", + "SADAHIRO Tomoyuki ", + "Simon Cozens ", + "Slaven Rezic ", + "Spider Boardman ", + "Steve Hay ", + "Steve Peters ", + "SUGAWARA Hajime ", + "SUZUKI Norio ", + "szr8 ", + "Tatsuhiko Miyagawa ", + "Tels ", + "Tony Cook ", + "Vadim Konovalov ", + "Victor ", + "Ville Skyttä ", + "Vincent van Dam ", + "Yitzchak Scott-Thoennes " + ], + "x_serialization_backend" : "JSON::PP version 2.27300_01" +} diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..dd08665 --- /dev/null +++ b/META.yml @@ -0,0 +1,112 @@ +--- +abstract: 'character encodings in Perl' +author: + - 'Dan Kogai ' +build_requires: + ExtUtils::MakeMaker: '0' + Test::More: 0.81_01 +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 1 +generated_by: 'ExtUtils::MakeMaker version 7.1002, CPAN::Meta::Converter version 2.150005' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: Encode +no_index: + directory: + - t + - inc +requires: + Exporter: '5.57' + Storable: '0' + parent: '0.221' +resources: + repository: https://github.com/dankogai/p5-encode +version: '2.97' +x_contributors: + - 'Alex Davies ' + - 'Alex Kapranoff ' + - 'Alex Vandiver ' + - 'Andreas J. Koenig ' + - 'Andrew Pennebaker ' + - 'Andy Grundman ' + - 'Anton Tagunov ' + - 'Autrijus Tang ' + - 'Benjamin Goldberg ' + - 'Bjoern Hoehrmann ' + - 'Bjoern Jacke ' + - 'bulk88 ' + - 'Craig A. Berry ' + - 'Curtis Jewell ' + - 'Dan Kogai ' + - 'Dave Evans ' + - 'David Golden ' + - 'David Steinbrunner ' + - 'Deng Liu ' + - 'Dominic Dunlop ' + - drry + - 'Elizabeth Mattijsen ' + - 'Flavio Poletti ' + - 'Gerrit P. Haase ' + - 'Gisle Aas ' + - 'Graham Barr ' + - 'Graham Knop ' + - 'Graham Ollis ' + - 'Gurusamy Sarathy ' + - 'H.Merijn Brand ' + - 'Hugo van der Sanden ' + - 'chansen ' + - 'Chris Nandor ' + - 'Inaba Hiroto ' + - 'Jarkko Hietaniemi ' + - 'Jesse Vincent ' + - 'Jungshik Shin ' + - 'Karen Etheridge ' + - 'Karl Williamson ' + - 'Kenichi Ishigaki ' + - 'KONNO Hiroharu ' + - 'Laszlo Molnar ' + - 'Makamaka ' + - 'Mark-Jason Dominus ' + - 'Masahiro Iuchi ' + - 'MATSUNO Tokuhiro ' + - 'Mattia Barbon ' + - 'Michael G Schwern ' + - 'Michael LaGrasta ' + - 'Miron Cuperman ' + - 'Moritz Lenz ' + - 'MORIYAMA Masayuki ' + - 'Nick Ing-Simmons ' + - 'Nicholas Clark ' + - 'Olivier Mengué ' + - otsune + - 'Pali ' + - 'Paul Marquess ' + - 'Peter Prymmer ' + - 'Peter Rabbitson ' + - 'Philip Newton ' + - 'Piotr Fusik ' + - 'Rafael Garcia-Suarez ' + - 'Randy Stauner ' + - 'Reini Urban ' + - 'Robin Barker ' + - 'SADAHIRO Tomoyuki ' + - 'Simon Cozens ' + - 'Slaven Rezic ' + - 'Spider Boardman ' + - 'Steve Hay ' + - 'Steve Peters ' + - 'SUGAWARA Hajime ' + - 'SUZUKI Norio ' + - 'szr8 ' + - 'Tatsuhiko Miyagawa ' + - 'Tels ' + - 'Tony Cook ' + - 'Vadim Konovalov ' + - 'Victor ' + - 'Ville Skyttä ' + - 'Vincent van Dam ' + - 'Yitzchak Scott-Thoennes ' +x_serialization_backend: 'CPAN::Meta::YAML version 0.018' diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..8c20d20 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,251 @@ +# +# $Id: Makefile.PL,v 2.22 2017/10/06 22:21:53 dankogai Exp $ +# +use 5.007003; +use strict; +use warnings; +use utf8; +use ExtUtils::MakeMaker; +use File::Spec; +use Config; + +# Just for sure :) +my %ARGV = map { my @r = split /=/,$_; defined $r[1] or $r[1]=1; @r } @ARGV; +$ARGV{DEBUG} and warn "$_ => $ARGV{$_}\n" for sort keys %ARGV; +$ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE}; +# similar strictness as in core +my $ccflags = $Config{ccflags}; +if (!$ENV{PERL_CORE}) { + if (my $gccver = $Config{gccversion}) { + $gccver =~ s/\.//g; $gccver =~ s/ .*//; + $gccver .= "0" while length $gccver < 3; + $gccver = 0+$gccver; + $ccflags .= ' -Werror=declaration-after-statement' if $gccver > 412; + $ccflags .= ' -Wpointer-sign' if !$Config{d_cplusplus} and $gccver > 400; + $ccflags .= ' -fpermissive' if $Config{d_cplusplus}; + } +} + +my %tables = + ( + def_t => [ + 'ascii.ucm', + '8859-1.ucm', + 'cp1252.ucm', + 'null.ucm', + 'ctrl.ucm', + ] + ); + +my @exe_files = qw(bin/enc2xs + bin/piconv + bin/encguess + ); +my @more_exe_files = qw( + unidump + ); +my @pmlibdirs = qw(lib Encode); + +$ARGV{MORE_SCRIPTS} and push @exe_files, @more_exe_files; +$ARGV{INSTALL_UCM} and push @pmlibdirs, "ucm"; + +WriteMakefile( + NAME => "Encode", + EXE_FILES => \@exe_files, + VERSION_FROM => 'Encode.pm', + ABSTRACT_FROM=> 'Encode.pm', + AUTHOR => 'Dan Kogai ', + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + CCFLAGS => $ccflags, + INC => '-I' . File::Spec->catfile( '.', 'Encode' ), + LICENSE => 'perl', + PREREQ_PM => { + Exporter => '5.57', # use Exporter 'import'; + parent => '0.221', # version bundled with 5.10.1 + Storable => '0', # bundled with Perl 5.7.3 + }, + TEST_REQUIRES => { + 'Test::More' => '0.81_01', + }, + PMLIBDIRS => \@pmlibdirs, + INSTALLDIRS => ($] < 5.011 ? 'perl' : 'site'), + META_MERGE => { + resources => { + repository => 'https://github.com/dankogai/p5-encode', + }, + x_contributors => [ + 'Alex Davies ', + 'Alex Kapranoff ', + 'Alex Vandiver ', + 'Andreas J. Koenig ', + 'Andrew Pennebaker ', + 'Andy Grundman ', + 'Anton Tagunov ', + 'Autrijus Tang ', + 'Benjamin Goldberg ', + 'Bjoern Hoehrmann ', + 'Bjoern Jacke ', + 'bulk88 ', + 'Craig A. Berry ', + 'Curtis Jewell ', + 'Dan Kogai ', + 'Dave Evans ', + 'David Golden ', + 'David Steinbrunner ', + 'Deng Liu ', + 'Dominic Dunlop ', + 'drry', + 'Elizabeth Mattijsen ', + 'Flavio Poletti ', + 'Gerrit P. Haase ', + 'Gisle Aas ', + 'Graham Barr ', + 'Graham Knop ', + 'Graham Ollis ', + 'Gurusamy Sarathy ', + 'H.Merijn Brand ', + 'Hugo van der Sanden ', + 'chansen ', + 'Chris Nandor ', + 'Inaba Hiroto ', + 'Jarkko Hietaniemi ', + 'Jesse Vincent ', + 'Jungshik Shin ', + 'Karen Etheridge ', + 'Karl Williamson ', + 'Kenichi Ishigaki ', + 'KONNO Hiroharu ', + 'Laszlo Molnar ', + 'Makamaka ', + 'Mark-Jason Dominus ', + 'Masahiro Iuchi ', + 'MATSUNO Tokuhiro ', + 'Mattia Barbon ', + 'Michael G Schwern ', + 'Michael LaGrasta ', + 'Miron Cuperman ', + 'Moritz Lenz ', + 'MORIYAMA Masayuki ', + 'Nick Ing-Simmons ', + 'Nicholas Clark ', + 'Olivier Mengué ', + 'otsune', + 'Pali ', + 'Paul Marquess ', + 'Peter Prymmer ', + 'Peter Rabbitson ', + 'Philip Newton ', + 'Piotr Fusik ', + 'Rafael Garcia-Suarez ', + 'Randy Stauner ', + 'Reini Urban ', + 'Robin Barker ', + 'SADAHIRO Tomoyuki ', + 'Simon Cozens ', + 'Slaven Rezic ', + 'Spider Boardman ', + 'Steve Hay ', + 'Steve Peters ', + 'SUGAWARA Hajime ', + 'SUZUKI Norio ', + 'szr8 ', + 'Tatsuhiko Miyagawa ', + 'Tels ', + 'Tony Cook ', + 'Vadim Konovalov ', + 'Victor ', + 'Ville Skyttä ', + 'Vincent van Dam ', + 'Yitzchak Scott-Thoennes ', + ], + }, +); + +package MY; + + +sub post_initialize +{ + my ($self) = @_; + my %o; + # Find existing O_FILES + foreach my $f (@{$self->{'O_FILES'}}) + { + $o{$f} = 1; + } + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + # Trick case-blind filesystems. + delete $o{'encode'.$x}; + $o{'Encode'.$x} = 1; + # Reset the variable + $self->{'O_FILES'} = [sort keys %o]; + my @files; + foreach my $table (sort keys %tables) + { + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) + { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; +} +$self->{'clean'}{'FILES'} .= join(' ',@files); +return ''; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->curdir,'ucm'); + my $str = "# Encode\$(OBJ_EXT) does not depend on .c files directly\n"; + $str .= "# (except Encode.c), but on .h and .exh files written by enc2xs\n"; + $str .= $^O eq 'MacOS' ? 'Encode.c.{$(MACPERL_BUILD_EXT_STATIC)}.o :' : 'Encode$(OBJ_EXT) :'; + $str .= ' Encode.c'; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + my $enc2xs = $self->catfile('bin', 'enc2xs'); + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q" -"O"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} diff --git a/README b/README new file mode 100644 index 0000000..0951ffa --- /dev/null +++ b/README @@ -0,0 +1,50 @@ +NAME + Encode - character encodings + +SYNOPSIS + use Encode; + +DESCRIPTION + The "Encode" module provides the interfaces between Perl's + strings and the rest of the system. Perl strings are + sequences of characters. + + See "perldoc Encode" for the rest of the story + +INSTALLATION + +To install this module, type the following: + + perl Makefile.PL + make + make test + make install + +To install scripts under bin/ directories also, + + perl Makefile.PL MORE_SCRIPTS + make && make test && make install + +By default, only enc2xs and piconv are installed. + +To install *.ucm files also, say + + perl Makefile.PL INSTALL_UCM + make && make test && make install + +By default, *.ucm are not installed. + +DEPENDENCIES + +This module requires perl5.7.3 or later. + +MAINTAINER + +This project was originated by Nick Ing-Simmons and later maintained by +Dan Kogai . See AUTHORS for the full list of people +involved. + +QUESTIONS? + +If you have any questions which "perldoc Encode" does not answer, please +feel free to ask at perl-unicode@perl.org. diff --git a/Symbol/Makefile.PL b/Symbol/Makefile.PL new file mode 100644 index 0000000..3c01be2 --- /dev/null +++ b/Symbol/Makefile.PL @@ -0,0 +1,162 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +my $name = 'Symbol'; +my %tables = ( + symbol_t => [qw( + symbol.ucm + dingbats.ucm + adobeSymbol.ucm + adobeZdingbat.ucm + macSymbol.ucm + macDingbats.ucm + ) + ], + ); + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q" -"O"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/Symbol/Symbol.pm b/Symbol/Symbol.pm new file mode 100644 index 0000000..77031aa --- /dev/null +++ b/Symbol/Symbol.pm @@ -0,0 +1,44 @@ +package Encode::Symbol; +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +1; +__END__ + +=head1 NAME + +Encode::Symbol - Symbol Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $symbol = encode("symbol", $utf8); # loads Encode::Symbol implicitly + $utf8 = decode("", $symbol); # ditto + +=head1 ABSTRACT + +This module implements symbol and dingbats encodings. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + symbol + dingbats + AdobeZDingbat + AdobeSymbol + MacDingbats + +=head1 DESCRIPTION + +To find out how to use this module in detail, see L. + +=head1 SEE ALSO + +L + +=cut diff --git a/TW/Makefile.PL b/TW/Makefile.PL new file mode 100644 index 0000000..a9f7254 --- /dev/null +++ b/TW/Makefile.PL @@ -0,0 +1,172 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; +use strict; + +my %tables = (big5_t => ['big5-eten.ucm', + 'big5-hkscs.ucm', + 'macChintrad.ucm', + 'cp950.ucm'], + ); + +unless ($ENV{AGGREGATE_TABLES}){ + my @ucm; + for my $k (keys %tables){ + push @ucm, @{$tables{$k}}; + } + %tables = (); + my $seq = 0; + for my $ucm (sort @ucm){ + # 8.3 compliance ! + my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++); + $tables{$t} = [ $ucm ]; + } +} + +my $name = 'TW'; + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{SOURCE} .= " $name.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/; + $self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')]; + my %xs; + foreach my $table (sort keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) { + push (@files,$table.$ext); + } + $self->{SOURCE} .= " $table.c" + if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/; + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#define PERL_NO_GET_CONTEXT +#include +#include +#include +#include "encode.h" +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (sort keys %tables) { + print XS qq[#include "${table}.exh"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'ucm'); + my $str = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (sort keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs'); + foreach my $table (sort keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $enc2xs Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : ''; + $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform; + my $ucopts = '-"Q"'; + $str .= + qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/TW/TW.pm b/TW/TW.pm new file mode 100644 index 0000000..c30499b --- /dev/null +++ b/TW/TW.pm @@ -0,0 +1,75 @@ +package Encode::TW; +BEGIN { + if ( ord("A") == 193 ) { + die "Encode::TW not supported on EBCDIC\n"; + } +} +use strict; +use warnings; +use Encode; +our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +1; +__END__ + +=head1 NAME + +Encode::TW - Taiwan-based Chinese Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $big5 = encode("big5", $utf8); # loads Encode::TW implicitly + $utf8 = decode("big5", $big5); # ditto + +=head1 DESCRIPTION + +This module implements tradition Chinese charset encodings as used +in Taiwan and Hong Kong. +Encodings supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + big5-eten /\bbig-?5$/i Big5 encoding (with ETen extensions) + /\bbig5-?et(en)?$/i + /\btca-?big5$/i + big5-hkscs /\bbig5-?hk(scs)?$/i + /\bhk(scs)?-?big5$/i + Big5 + Cantonese characters in Hong Kong + MacChineseTrad Big5 + Apple Vendor Mappings + cp950 Code Page 950 + = Big5 + Microsoft vendor mappings + -------------------------------------------------------------------- + +To find out how to use this module in detail, see L. + +=head1 NOTES + +Due to size concerns, C (Extended Unix Character), C +(Chinese Character Code for Information Interchange), C +(CMEX's Big5+) and C (CMEX's Big5e) are distributed separately +on CPAN, under the name L. That module also contains +extra China-based encodings. + +=head1 BUGS + +Since the original C encoding (1984) is not supported anywhere +(glibc and DOS-based systems uses C to mean C; Microsoft +uses C to mean C), a conscious decision was made to alias +C to C, which is the de facto superset of the original +big5. + +The C encoding files are not complete. For common C +manipulation, please use C in L, which contains +planes 1-7. + +The ASCII region (0x00-0x7f) is preserved for all encodings, even +though this conflicts with mappings by the Unicode Consortium. + +=head1 SEE ALSO + +L + +=cut diff --git a/Unicode/Makefile.PL b/Unicode/Makefile.PL new file mode 100644 index 0000000..b28d16b --- /dev/null +++ b/Unicode/Makefile.PL @@ -0,0 +1,11 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +WriteMakefile( + INC => "-I../Encode", + NAME => 'Encode::Unicode', + VERSION_FROM => "Unicode.pm", + MAN3PODS => {}, + ); + diff --git a/Unicode/Unicode.pm b/Unicode/Unicode.pm new file mode 100644 index 0000000..2a8b477 --- /dev/null +++ b/Unicode/Unicode.pm @@ -0,0 +1,272 @@ +package Encode::Unicode; + +use strict; +use warnings; + +our $VERSION = do { my @r = ( q$Revision: 2.17 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load( __PACKAGE__, $VERSION ); + +# +# Object Generator 8 transcoders all at once! +# + +use Encode (); + +our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32); + +for my $name ( + qw(UTF-16 UTF-16BE UTF-16LE + UTF-32 UTF-32BE UTF-32LE + UCS-2BE UCS-2LE) + ) +{ + my ( $size, $endian, $ucs2, $mask ); + $name =~ /^(\w+)-(\d+)(\w*)$/o; + if ( $ucs2 = ( $1 eq 'UCS' ) ) { + $size = 2; + } + else { + $size = $2 / 8; + } + $endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : ''; + $size == 4 and $endian = uc($endian); + + my $obj = bless { + Name => $name, + size => $size, + endian => $endian, + ucs2 => $ucs2, + } => __PACKAGE__; + Encode::define_encoding($obj, $name); +} + +use parent qw(Encode::Encoding); + +sub renew { + my $self = shift; + $BOM_Unknown{ $self->name } or return $self; + my $clone = bless {%$self} => ref($self); + $clone->{renewed}++; # so the caller knows it is renewed. + return $clone; +} + +1; +__END__ + +=head1 NAME + +Encode::Unicode -- Various Unicode Transformation Formats + +=cut + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $ucs2 = encode("UCS-2BE", $utf8); + $utf8 = decode("UCS-2BE", $ucs2); + +=head1 ABSTRACT + +This module implements all Character Encoding Schemes of Unicode that +are officially documented by Unicode Consortium (except, of course, +for UTF-8, which is a native format in perl). + +=over 4 + +=item L says: + +I A character encoding form plus byte +serialization. There are Seven character encoding schemes in Unicode: +UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32 (UCS-4), UTF-32BE (UCS-4BE) and +UTF-32LE (UCS-4LE), and UTF-7. + +Since UTF-7 is a 7-bit (re)encoded version of UTF-16BE, It is not part of +Unicode's Character Encoding Scheme. It is separately implemented in +Encode::Unicode::UTF7. For details see L. + +=item Quick Reference + + Decodes from ord(N) Encodes chr(N) to... + octet/char BOM S.P d800-dfff ord > 0xffff \x{1abcd} == + ---------------+-----------------+------------------------------ + UCS-2BE 2 N N is bogus Not Available + UCS-2LE 2 N N bogus Not Available + UTF-16 2/4 Y Y is S.P S.P BE/LE + UTF-16BE 2/4 N Y S.P S.P 0xd82a,0xdfcd + UTF-16LE 2/4 N Y S.P S.P 0x2ad8,0xcddf + UTF-32 4 Y - is bogus As is BE/LE + UTF-32BE 4 N - bogus As is 0x0001abcd + UTF-32LE 4 N - bogus As is 0xcdab0100 + UTF-8 1-4 - - bogus >= 4 octets \xf0\x9a\af\8d + ---------------+-----------------+------------------------------ + +=back + +=head1 Size, Endianness, and BOM + +You can categorize these CES by 3 criteria: size of each character, +endianness, and Byte Order Mark. + +=head2 by size + +UCS-2 is a fixed-length encoding with each character taking 16 bits. +It B support I. When a surrogate pair +is encountered during decode(), its place is filled with \x{FFFD} +if I is 0, or the routine croaks if I is 1. When a +character whose ord value is larger than 0xFFFF is encountered, +its place is filled with \x{FFFD} if I is 0, or the routine +croaks if I is 1. + +UTF-16 is almost the same as UCS-2 but it supports I. +When it encounters a high surrogate (0xD800-0xDBFF), it fetches the +following low surrogate (0xDC00-0xDFFF) and Cs them to +form a character. Bogus surrogates result in death. When \x{10000} +or above is encountered during encode(), it Cs them and +pushes the surrogate pair to the output stream. + +UTF-32 (UCS-4) is a fixed-length encoding with each character taking 32 bits. +Since it is 32-bit, there is no need for I. + +=head2 by endianness + +The first (and now failed) goal of Unicode was to map all character +repertoires into a fixed-length integer so that programmers are happy. +Since each character is either a I or I in C, you have to +pay attention to the endianness of each platform when you pass data +to one another. + +Anything marked as BE is Big Endian (or network byte order) and LE is +Little Endian (aka VAX byte order). For anything not marked either +BE or LE, a character called Byte Order Mark (BOM) indicating the +endianness is prepended to the string. + +CAVEAT: Though BOM in utf8 (\xEF\xBB\xBF) is valid, it is meaningless +and as of this writing Encode suite just leave it as is (\x{FeFF}). + +=over 4 + +=item BOM as integer when fetched in network byte order + + 16 32 bits/char + ------------------------- + BE 0xFeFF 0x0000FeFF + LE 0xFFFe 0xFFFe0000 + ------------------------- + +=back + +This modules handles the BOM as follows. + +=over 4 + +=item * + +When BE or LE is explicitly stated as the name of encoding, BOM is +simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE). + +=item * + +When BE or LE is omitted during decode(), it checks if BOM is at the +beginning of the string; if one is found, the endianness is set to +what the BOM says. + +=item * + +Default Byte Order + +When no BOM is found, Encode 2.76 and blow croaked. Since Encode +2.77, it falls back to BE accordingly to RFC2781 and the Unicode +Standard version 8.0 + +=item * + +When BE or LE is omitted during encode(), it returns a BE-encoded +string with BOM prepended. So when you want to encode a whole text +file, make sure you encode() the whole text at once, not line by line +or each line, not file, will have a BOM prepended. + +=item * + +C is an exception. Unlike others, this is an alias of UCS-2BE. +UCS-2 is already registered by IANA and others that way. + +=back + +=head1 Surrogate Pairs + +To say the least, surrogate pairs were the biggest mistake of the +Unicode Consortium. But according to the late Douglas Adams in I Trilogy, C. Their mistake was not of this +magnitude so let's forgive them. + +(I don't dare make any comparison with Unicode Consortium and the +Vogons here ;) Or, comparing Encode to Babel Fish is completely +appropriate -- if you can only stick this into your ear :) + +Surrogate pairs were born when the Unicode Consortium finally +admitted that 16 bits were not big enough to hold all the world's +character repertoires. But they already made UCS-2 16-bit. What +do we do? + +Back then, the range 0xD800-0xDFFF was not allocated. Let's split +that range in half and use the first half to represent the C and the second half to represent the C. That way, you can represent 1024 * 1024 = +1048576 more characters. Now we can store character ranges up to +\x{10ffff} even with 16-bit encodings. This pair of half-character is +now called a I and UTF-16 is the name of the encoding +that embraces them. + +Here is a formula to ensurrogate a Unicode character \x{10000} and +above; + + $hi = ($uni - 0x10000) / 0x400 + 0xD800; + $lo = ($uni - 0x10000) % 0x400 + 0xDC00; + +And to desurrogate; + + $uni = 0x10000 + ($hi - 0xD800) * 0x400 + ($lo - 0xDC00); + +Note this move has made \x{D800}-\x{DFFF} into a forbidden zone but +perl does not prohibit the use of characters within this range. To perl, +every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I. + + (*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit + integer support! + +=head1 Error Checking + +Unlike most encodings which accept various ways to handle errors, +Unicode encodings simply croaks. + + % perl -MEncode -e'$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \ + -e'Encode::from_to($_, "utf16","shift_jis", 0); print' + UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184. + % perl -MEncode -e'$a = "BOM missing"' \ + -e' Encode::from_to($a, "utf16", "shift_jis", 0); print' + UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184. + +Unlike other encodings where mappings are not one-to-one against +Unicode, UTFs are supposed to map 100% against one another. So Encode +is more strict on UTFs. + +Consider that "division by zero" of Encode :) + +=head1 SEE ALSO + +L, L, L, +L, + +RFC 2781 L, + +The whole Unicode standard L + +Ch. 15, pp. 403 of C +by Larry Wall, Tom Christiansen, Jon Orwant; +O'Reilly & Associates; ISBN 0-596-00027-8 + +=cut diff --git a/Unicode/Unicode.xs b/Unicode/Unicode.xs new file mode 100644 index 0000000..b459786 --- /dev/null +++ b/Unicode/Unicode.xs @@ -0,0 +1,471 @@ +/* + $Id: Unicode.xs,v 2.17 2018/02/08 00:26:15 dankogai Exp $ + */ + +#define PERL_NO_GET_CONTEXT +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "../Encode/encode.h" + +#define FBCHAR 0xFFFd +#define BOM_BE 0xFeFF +#define BOM16LE 0xFFFe +#define BOM32LE 0xFFFe0000 +#define issurrogate(x) (0xD800 <= (x) && (x) <= 0xDFFF ) +#define isHiSurrogate(x) (0xD800 <= (x) && (x) < 0xDC00 ) +#define isLoSurrogate(x) (0xDC00 <= (x) && (x) <= 0xDFFF ) +#define invalid_ucs2(x) ( issurrogate(x) || 0xFFFF < (x) ) + +/* For pre-5.14 source compatibility */ +#ifndef UNICODE_WARN_ILLEGAL_INTERCHANGE +# define UNICODE_WARN_ILLEGAL_INTERCHANGE 0 +# define UTF8_DISALLOW_SURROGATE 0 +# define UTF8_WARN_SURROGATE 0 +# define UTF8_DISALLOW_FE_FF 0 +# define UTF8_WARN_FE_FF 0 +# define UTF8_WARN_NONCHAR 0 +#endif + +#define PERLIO_BUFSIZ 1024 /* XXX value comes from PerlIOEncode_get_base */ + +/* Avoid wasting too much space in the result buffer */ +/* static void */ +/* shrink_buffer(SV *result) */ +/* { */ +/* if (SvLEN(result) > 42 + SvCUR(result)) { */ +/* char *buf; */ +/* STRLEN len = 1 + SvCUR(result); /\* include the NUL byte *\/ */ +/* New(0, buf, len, char); */ +/* Copy(SvPVX(result), buf, len, char); */ +/* Safefree(SvPVX(result)); */ +/* SvPV_set(result, buf); */ +/* SvLEN_set(result, len); */ +/* } */ +/* } */ + +#define shrink_buffer(result) { \ + if (SvLEN(result) > 42 + SvCUR(result)) { \ + char *newpv; \ + STRLEN newlen = 1 + SvCUR(result); /* include the NUL byte */ \ + New(0, newpv, newlen, char); \ + Copy(SvPVX(result), newpv, newlen, char); \ + Safefree(SvPVX(result)); \ + SvPV_set(result, newpv); \ + SvLEN_set(result, newlen); \ + } \ +} + +static UV +enc_unpack(pTHX_ U8 **sp, U8 *e, STRLEN size, U8 endian) +{ + U8 *s = *sp; + UV v = 0; + if (s+size > e) { + croak("Partial character %c",(char) endian); + } + switch(endian) { + case 'N': + v = *s++; + v = (v << 8) | *s++; + case 'n': + v = (v << 8) | *s++; + v = (v << 8) | *s++; + break; + case 'V': + case 'v': + v |= *s++; + v |= (*s++ << 8); + if (endian == 'v') + break; + v |= (*s++ << 16); + v |= ((UV)*s++ << 24); + break; + default: + croak("Unknown endian %c",(char) endian); + break; + } + *sp = s; + return v; +} + +static void +enc_pack(pTHX_ SV *result, STRLEN size, U8 endian, UV value) +{ + U8 *d = (U8 *) SvPV_nolen(result); + + switch(endian) { + case 'v': + case 'V': + d += SvCUR(result); + SvCUR_set(result,SvCUR(result)+size); + while (size--) { + *d++ = (U8)(value & 0xFF); + value >>= 8; + } + break; + case 'n': + case 'N': + SvCUR_set(result,SvCUR(result)+size); + d += SvCUR(result); + while (size--) { + *--d = (U8)(value & 0xFF); + value >>= 8; + } + break; + default: + croak("Unknown endian %c",(char) endian); + break; + } +} + +MODULE = Encode::Unicode PACKAGE = Encode::Unicode + +PROTOTYPES: DISABLE + +#define attr(k, l) (hv_exists((HV *)SvRV(obj),k,l) ? \ + *hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef) + +void +decode(obj, str, check = 0) +SV * obj +SV * str +IV check +CODE: +{ + SV *sve = attr("endian", 6); + U8 endian = *((U8 *)SvPV_nolen(sve)); + SV *svs = attr("size", 4); + int size = SvIV(svs); + int ucs2 = -1; /* only needed in the event of surrogate pairs */ + SV *result = newSVpvn("",0); + STRLEN usize = (size > 0 ? size : 1); /* protect against rogue size<=0 */ + STRLEN ulen; + STRLEN resultbuflen; + U8 *resultbuf; + U8 *s; + U8 *e; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); + bool temp_result; + + SvGETMAGIC(str); + if (!SvOK(str)) + XSRETURN_UNDEF; + s = modify ? (U8 *)SvPV_force_nomg(str, ulen) : (U8 *)SvPV_nomg(str, ulen); + if (SvUTF8(str)) { + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen)); + SvUTF8_on(tmp); + if (SvTAINTED(str)) + SvTAINTED_on(tmp); + str = tmp; + s = (U8 *)SvPVX(str); + } + if (ulen) { + if (!utf8_to_bytes(s, &ulen)) + croak("Wide character"); + SvCUR_set(str, ulen); + } + SvUTF8_off(str); + } + e = s+ulen; + + /* Optimise for the common case of being called from PerlIOEncode_fill() + with a standard length buffer. In this case the result SV's buffer is + only used temporarily, so we can afford to allocate the maximum needed + and not care about unused space. */ + temp_result = (ulen == PERLIO_BUFSIZ); + + ST(0) = sv_2mortal(result); + SvUTF8_on(result); + + if (!endian && s+size <= e) { + SV *sv; + UV bom; + endian = (size == 4) ? 'N' : 'n'; + bom = enc_unpack(aTHX_ &s,e,size,endian); + if (bom != BOM_BE) { + if (bom == BOM16LE) { + endian = 'v'; + } + else if (bom == BOM32LE) { + endian = 'V'; + } + else { + /* No BOM found, use big-endian fallback as specified in + * RFC2781 and the Unicode Standard version 8.0: + * + * The UTF-16 encoding scheme may or may not begin with + * a BOM. However, when there is no BOM, and in the + * absence of a higher-level protocol, the byte order + * of the UTF-16 encoding scheme is big-endian. + * + * If the first two octets of the text is not 0xFE + * followed by 0xFF, and is not 0xFF followed by 0xFE, + * then the text SHOULD be interpreted as big-endian. + */ + s -= size; + } + } +#if 1 + /* Update endian for next sequence */ + sv = attr("renewed", 7); + if (SvTRUE(sv)) { + (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0); + } +#endif + } + + if (temp_result) { + resultbuflen = 1 + ulen/usize * UTF8_MAXLEN; + } else { + /* Preallocate the buffer to the minimum possible space required. */ + resultbuflen = ulen/usize + UTF8_MAXLEN + 1; + } + resultbuf = (U8 *) SvGROW(result, resultbuflen); + + while (s < e && s+size <= e) { + UV ord = enc_unpack(aTHX_ &s,e,size,endian); + U8 *d; + if (issurrogate(ord)) { + if (ucs2 == -1) { + SV *sv = attr("ucs2", 4); + ucs2 = SvTRUE(sv); + } + if (ucs2 || size == 4) { + if (check) { + croak("%" SVf ":no surrogates allowed %" UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } + ord = FBCHAR; + } + else { + UV lo; + if (!isHiSurrogate(ord)) { + if (check) { + croak("%" SVf ":Malformed HI surrogate %" UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } + else { + ord = FBCHAR; + } + } + else if (s+size > e) { + if (check) { + if (check & ENCODE_STOP_AT_PARTIAL) { + s -= size; + break; + } + else { + croak("%" SVf ":Malformed HI surrogate %" UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } + } + else { + ord = FBCHAR; + } + } + else { + lo = enc_unpack(aTHX_ &s,e,size,endian); + if (!isLoSurrogate(lo)) { + if (check) { + croak("%" SVf ":Malformed LO surrogate %" UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } + else { + s -= size; + ord = FBCHAR; + } + } + else { + ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00); + } + } + } + } + + if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) { + if (check) { + croak("%" SVf ":Unicode character %" UVxf " is illegal", + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } else { + ord = FBCHAR; + } + } + + if (resultbuflen < SvCUR(result) + UTF8_MAXLEN + 1) { + /* Do not allocate >8Mb more than the minimum needed. + This prevents allocating too much in the rogue case of a large + input consisting initially of long sequence uft8-byte unicode + chars followed by single utf8-byte chars. */ + /* +1 + fixes Unicode.xs!decode_xs n-byte heap-overflow + */ + STRLEN remaining = (e - s)/usize + 1; /* +1 to avoid the leak */ + STRLEN max_alloc = remaining + (8*1024*1024); + STRLEN est_alloc = remaining * UTF8_MAXLEN; + STRLEN newlen = SvLEN(result) + /* min(max_alloc, est_alloc) */ + (est_alloc > max_alloc ? max_alloc : est_alloc); + resultbuf = (U8 *) SvGROW(result, newlen); + resultbuflen = SvLEN(result); + } + + d = uvchr_to_utf8_flags(resultbuf+SvCUR(result), ord, + UNICODE_WARN_ILLEGAL_INTERCHANGE); + SvCUR_set(result, d - (U8 *)SvPVX(result)); + } + + if (s < e) { + /* unlikely to happen because it's fixed-length -- dankogai */ + if (check & ENCODE_WARN_ON_ERR) { + Perl_warner(aTHX_ packWARN(WARN_UTF8),"%" SVf ":Partial character", + *hv_fetch((HV *)SvRV(obj),"Name",4,0)); + } + } + if (check && !(check & ENCODE_LEAVE_SRC)) { + if (s < e) { + Move(s,SvPVX(str),e-s,U8); + SvCUR_set(str,(e-s)); + } + else { + SvCUR_set(str,0); + } + *SvEND(str) = '\0'; + SvSETMAGIC(str); + } + + if (!temp_result) shrink_buffer(result); + if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */ + XSRETURN(1); +} + +void +encode(obj, utf8, check = 0) +SV * obj +SV * utf8 +IV check +CODE: +{ + SV *sve = attr("endian", 6); + U8 endian = *((U8 *)SvPV_nolen(sve)); + SV *svs = attr("size", 4); + const int size = SvIV(svs); + int ucs2 = -1; /* only needed if there is invalid_ucs2 input */ + const STRLEN usize = (size > 0 ? size : 1); + SV *result = newSVpvn("", 0); + STRLEN ulen; + U8 *s; + U8 *e; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); + bool temp_result; + + SvGETMAGIC(utf8); + if (!SvOK(utf8)) + XSRETURN_UNDEF; + s = modify ? (U8 *)SvPV_force_nomg(utf8, ulen) : (U8 *)SvPV_nomg(utf8, ulen); + if (!SvUTF8(utf8)) { + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen)); + if (SvTAINTED(utf8)) + SvTAINTED_on(tmp); + utf8 = tmp; + } + sv_utf8_upgrade_nomg(utf8); + s = (U8 *)SvPV_nomg(utf8, ulen); + } + e = s+ulen; + + /* Optimise for the common case of being called from PerlIOEncode_flush() + with a standard length buffer. In this case the result SV's buffer is + only used temporarily, so we can afford to allocate the maximum needed + and not care about unused space. */ + temp_result = (ulen == PERLIO_BUFSIZ); + + ST(0) = sv_2mortal(result); + + /* Preallocate the result buffer to the maximum possible size. + ie. assume each UTF8 byte is 1 character. + Then shrink the result's buffer if necesary at the end. */ + SvGROW(result, ((ulen+1) * usize)); + + if (!endian) { + SV *sv; + endian = (size == 4) ? 'N' : 'n'; + enc_pack(aTHX_ result,size,endian,BOM_BE); +#if 1 + /* Update endian for next sequence */ + sv = attr("renewed", 7); + if (SvTRUE(sv)) { + (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0); + } +#endif + } + while (s < e && s+UTF8SKIP(s) <= e) { + STRLEN len; + UV ord = utf8n_to_uvchr(s, e-s, &len, (UTF8_DISALLOW_SURROGATE + |UTF8_WARN_SURROGATE + |UTF8_DISALLOW_FE_FF + |UTF8_WARN_FE_FF + |UTF8_WARN_NONCHAR)); + s += len; + if (size != 4 && invalid_ucs2(ord)) { + if (!issurrogate(ord)) { + if (ucs2 == -1) { + SV *sv = attr("ucs2", 4); + ucs2 = SvTRUE(sv); + } + if (ucs2 || ord > 0x10FFFF) { + if (check) { + croak("%" SVf ":code point \"\\x{%" UVxf "}\" too high", + *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord); + } + enc_pack(aTHX_ result,size,endian,FBCHAR); + } else { + UV hi = ((ord - 0x10000) >> 10) + 0xD800; + UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00; + enc_pack(aTHX_ result,size,endian,hi); + enc_pack(aTHX_ result,size,endian,lo); + } + } + else { + /* not supposed to happen */ + enc_pack(aTHX_ result,size,endian,FBCHAR); + } + } + else { + enc_pack(aTHX_ result,size,endian,ord); + } + } + if (s < e) { + /* UTF-8 partial char happens often on PerlIO. + Since this is okay and normal, we do not warn. + But this is critical when you choose to LEAVE_SRC + in which case we die */ + if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)) { + Perl_croak(aTHX_ "%" SVf ":partial character is not allowed " + "when CHECK = 0x%" UVuf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), check); + } + } + if (check && !(check & ENCODE_LEAVE_SRC)) { + if (s < e) { + Move(s,SvPVX(utf8),e-s,U8); + SvCUR_set(utf8,(e-s)); + } + else { + SvCUR_set(utf8,0); + } + *SvEND(utf8) = '\0'; + SvSETMAGIC(utf8); + } + + if (!temp_result) shrink_buffer(result); + if (SvTAINTED(utf8)) SvTAINTED_on(result); /* propagate taintedness */ + + XSRETURN(1); +} diff --git a/bin/enc2xs b/bin/enc2xs new file mode 100755 index 0000000..619b64b --- /dev/null +++ b/bin/enc2xs @@ -0,0 +1,1492 @@ +#!./perl +BEGIN { + # @INC poking no longer needed w/ new MakeMaker and Makefile.PL's + # with $ENV{PERL_CORE} set + # In case we need it in future... + require Config; import Config; + pop @INC if $INC[-1] eq '.'; +} +use strict; +use warnings; +use Getopt::Std; +use Config; +my @orig_ARGV = @ARGV; +our $VERSION = do { my @r = (q$Revision: 2.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +# These may get re-ordered. +# RAW is a do_now as inserted by &enter +# AGG is an aggregated do_now, as built up by &process + +use constant { + RAW_NEXT => 0, + RAW_IN_LEN => 1, + RAW_OUT_BYTES => 2, + RAW_FALLBACK => 3, + + AGG_MIN_IN => 0, + AGG_MAX_IN => 1, + AGG_OUT_BYTES => 2, + AGG_NEXT => 3, + AGG_IN_LEN => 4, + AGG_OUT_LEN => 5, + AGG_FALLBACK => 6, +}; + +# (See the algorithm in encengine.c - we're building structures for it) + +# There are two sorts of structures. +# "do_now" (an array, two variants of what needs storing) is whatever we need +# to do now we've read an input byte. +# It's housed in a "do_next" (which is how we got to it), and in turn points +# to a "do_next" which contains all the "do_now"s for the next input byte. + +# There will be a "do_next" which is the start state. +# For a single byte encoding it's the only "do_next" - each "do_now" points +# back to it, and each "do_now" will cause bytes. There is no state. + +# For a multi-byte encoding where all characters in the input are the same +# length, then there will be a tree of "do_now"->"do_next"->"do_now" +# branching out from the start state, one step for each input byte. +# The leaf "do_now"s will all be at the same distance from the start state, +# only the leaf "do_now"s cause output bytes, and they in turn point back to +# the start state. + +# For an encoding where there are variable length input byte sequences, you +# will encounter a leaf "do_now" sooner for the shorter input sequences, but +# as before the leaves will point back to the start state. + +# The system will cope with escape encodings (imagine them as a mostly +# self-contained tree for each escape state, and cross links between trees +# at the state-switching characters) but so far no input format defines these. + +# The system will also cope with having output "leaves" in the middle of +# the bifurcating branches, not just at the extremities, but again no +# input format does this yet. + +# There are two variants of the "do_now" structure. The first, smaller variant +# is generated by &enter as the input file is read. There is one structure +# for each input byte. Say we are mapping a single byte encoding to a +# single byte encoding, with "ABCD" going "abcd". There will be +# 4 "do_now"s, {"A" => [...,"a",...], "B" => [...,"b",...], "C"=>..., "D"=>...} + +# &process then walks the tree, building aggregate "do_now" structures for +# adjacent bytes where possible. The aggregate is for a contiguous range of +# bytes which each produce the same length of output, each move to the +# same next state, and each have the same fallback flag. +# So our 4 RAW "do_now"s above become replaced by a single structure +# containing: +# ["A", "D", "abcd", 1, ...] +# ie, for an input byte $_ in "A".."D", output 1 byte, found as +# substr ("abcd", (ord $_ - ord "A") * 1, 1) +# which maps very nicely into pointer arithmetic in C for encengine.c + +sub encode_U +{ + # UTF-8 encode long hand - only covers part of perl's range + ## my $uv = shift; + # chr() works in native space so convert value from table + # into that space before using chr(). + my $ch = chr(utf8::unicode_to_native($_[0])); + # Now get core perl to encode that the way it likes. + utf8::encode($ch); + return $ch; +} + +sub encode_S +{ + # encode single byte + ## my ($ch,$page) = @_; return chr($ch); + return chr $_[0]; +} + +sub encode_D +{ + # encode double byte MS byte first + ## my ($ch,$page) = @_; return chr($page).chr($ch); + return chr ($_[1]) . chr $_[0]; +} + +sub encode_M +{ + # encode Multi-byte - single for 0..255 otherwise double + ## my ($ch,$page) = @_; + ## return &encode_D if $page; + ## return &encode_S; + return chr ($_[1]) . chr $_[0] if $_[1]; + return chr $_[0]; +} + +my %encode_types = (U => \&encode_U, + S => \&encode_S, + D => \&encode_D, + M => \&encode_M, + ); + +# Win32 does not expand globs on command line +if ($^O eq 'MSWin32' and !$ENV{PERL_CORE}) { + eval "\@ARGV = map(glob(\$_),\@ARGV)"; + @ARGV = @orig_ARGV unless @ARGV; +} + +my %opt; +# I think these are: +# -Q to disable the duplicate codepoint test +# -S make mapping errors fatal +# -q to remove comments written to output files +# -O to enable the (brute force) substring optimiser +# -o to specify the output file name (else it's the first arg) +# -f to give a file with a list of input files (else use the args) +# -n to name the encoding (else use the basename of the input file. +#Getopt::Long::Configure("bundling"); +#GetOptions(\%opt, qw(C M=s S Q q O o=s f=s n=s v)); +getopts('CM:SQqOo:f:n:v',\%opt); + +$opt{M} and make_makefile_pl($opt{M}, @ARGV); +$opt{C} and make_configlocal_pm($opt{C}, @ARGV); +$opt{v} ||= $ENV{ENC2XS_VERBOSE}; + +sub verbose { + print STDERR @_ if $opt{v}; +} +sub verbosef { + printf STDERR @_ if $opt{v}; +} + + +# ($cpp, $static, $sized) = compiler_info($declaration) +# +# return some information about the compiler and compile options we're using: +# +# $declaration - true if we're doing a declaration rather than a definition. +# +# $cpp - we're using C++ +# $static - ok to declare the arrays as static +# $sized - the array declarations should be sized + +sub compiler_info { + my ($declaration) = @_; + + my $ccflags = $Config{ccflags}; + if (defined $Config{ccwarnflags}) { + $ccflags .= " " . $Config{ccwarnflags}; + } + my $compat = $ccflags =~ /\Q-Wc++-compat/; + my $pedantic = $ccflags =~ /-pedantic/; + + my $cpp = ($Config{d_cplusplus} || '') eq 'define'; + + # The encpage_t tables contain recursive and mutually recursive + # references. To allow them to compile under C++ and some restrictive + # cc options, it may be necessary to make the tables non-static/const + # (thus moving them from the text to the data segment) and/or not + # include the size in the declaration. + + my $static = !( + $cpp + || ($compat && $pedantic) + || ($^O eq 'MacOS' && $declaration) + ); + + # -Wc++-compat on its own warns if the array declaration is sized. + # The easiest way to avoid this warning is simply not to include + # the size in the declaration. + # With -pedantic as well, the issue doesn't arise because $static + # above becomes false. + my $sized = $declaration && !($compat && !$pedantic); + + return ($cpp, $static, $sized); +} + + +# This really should go first, else the die here causes empty (non-erroneous) +# output files to be written. +my @encfiles; +if (exists $opt{f}) { + # -F is followed by name of file containing list of filenames + my $flist = $opt{f}; + open(FLIST,$flist) || die "Cannot open $flist:$!"; + chomp(@encfiles = ); + close(FLIST); +} else { + @encfiles = @ARGV; +} + +my $cname = $opt{o} ? $opt{o} : shift(@ARGV); +unless ($cname) { #debuging a win32 nmake error-only. works via cmdline + print "\nARGV:"; + print "$_ " for @ARGV; + print "\nopt:"; + print " $_ => ",defined $opt{$_}?$opt{$_}:"undef","\n" for keys %opt; +} +chmod(0666,$cname) if -f $cname && !-w $cname; +open(C,">", $cname) || die "Cannot open $cname:$!"; + +my $dname = $cname; +my $hname = $cname; + +my ($doC,$doEnc,$doUcm,$doPet); + +if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARSE_STYLE defined + { + $doC = 1; + $dname =~ s/(\.[^\.]*)?$/.exh/; + chmod(0666,$dname) if -f $cname && !-w $dname; + open(D,">", $dname) || die "Cannot open $dname:$!"; + $hname =~ s/(\.[^\.]*)?$/.h/; + chmod(0666,$hname) if -f $cname && !-w $hname; + open(H,">", $hname) || die "Cannot open $hname:$!"; + + foreach my $fh (\*C,\*D,\*H) + { + print $fh <<"END" unless $opt{'q'}; +/* + !!!!!!! DO NOT EDIT THIS FILE !!!!!!! + This file was autogenerated by: + $^X $0 @orig_ARGV + enc2xs VERSION $VERSION +*/ +END + } + + if ($cname =~ /(\w+)\.xs$/) + { + print C "#define PERL_NO_GET_CONTEXT\n"; + print C "#include \n"; + print C "#include \n"; + print C "#include \n"; + } + print C "#include \"encode.h\"\n\n"; + + } +elsif ($cname =~ /\.enc$/) + { + $doEnc = 1; + } +elsif ($cname =~ /\.ucm$/) + { + $doUcm = 1; + } +elsif ($cname =~ /\.pet$/) + { + $doPet = 1; + } + +my %encoding; +my %strings; +my $string_acc; +my %strings_in_acc; + +my $saved = 0; +my $subsave = 0; +my $strings = 0; + +sub cmp_name +{ + if ($a =~ /^.*-(\d+)/) + { + my $an = $1; + if ($b =~ /^.*-(\d+)/) + { + my $r = $an <=> $1; + return $r if $r; + } + } + return $a cmp $b; +} + + +foreach my $enc (sort cmp_name @encfiles) + { + my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/; + $name = $opt{'n'} if exists $opt{'n'}; + if (open(E,$enc)) + { + if ($sfx eq 'enc') + { + compile_enc(\*E,lc($name)); + } + else + { + compile_ucm(\*E,lc($name)); + } + } + else + { + warn "Cannot open $enc for $name:$!"; + } + } + +if ($doC) + { + verbose "Writing compiled form\n"; + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + process($name.'_utf8',$e2u); + addstrings(\*C,$e2u); + + process('utf8_'.$name,$u2e); + addstrings(\*C,$u2e); + } + outbigstring(\*C,"enctable"); + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + outtable(\*C,$e2u, "enctable"); + outtable(\*C,$u2e, "enctable"); + + # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); + } + my ($cpp) = compiler_info(0); + my $ext = $cpp ? 'extern "C"' : "extern"; + my $exta = $cpp ? 'extern "C"' : "static"; + my $extb = $cpp ? 'extern "C"' : ""; + foreach my $enc (sort cmp_name keys %encoding) + { + # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}}; + my ($e2u,$u2e,$rep,$min_el,$max_el) = @{$encoding{$enc}}; + #my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el); + my $replen = 0; + $replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g); + my $sym = "${enc}_encoding"; + $sym =~ s/\W+/_/g; + my @info = ($e2u->{Cname},$u2e->{Cname},"${sym}_rep_character",$replen, + $min_el,$max_el); + print C "${exta} const U8 ${sym}_rep_character[] = \"$rep\";\n"; + print C "${exta} const char ${sym}_enc_name[] = \"$enc\";\n\n"; + print C "${extb} const encode_t $sym = \n"; + # This is to make null encoding work -- dankogai + for (my $i = (scalar @info) - 1; $i >= 0; --$i){ + $info[$i] ||= 1; + } + # end of null tweak -- dankogai + print C " {",join(',',@info,"{${sym}_enc_name,(const char *)0}"),"};\n\n"; + } + + foreach my $enc (sort cmp_name keys %encoding) + { + my $sym = "${enc}_encoding"; + $sym =~ s/\W+/_/g; + print H "${ext} encode_t $sym;\n"; + print D " Encode_XSEncoding(aTHX_ &$sym);\n"; + } + + if ($cname =~ /(\w+)\.xs$/) + { + my $mod = $1; + print C <<'END'; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *iv = newSViv(PTR2IV(enc)); + SV *sv = sv_bless(newRV_noinc(iv),stash); + int i = 0; + /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's + constness, in the hope that perl won't mess with it. */ + assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); + SvFLAGS(iv) |= SVp_POK; + SvPVX(iv) = (char*) enc->name[0]; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +END + + print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n"; + print C "BOOT:\n{\n"; + print C "#include \"$dname\"\n"; + print C "}\n"; + } + # Close in void context is bad, m'kay + close(D) or warn "Error closing '$dname': $!"; + close(H) or warn "Error closing '$hname': $!"; + + my $perc_saved = $saved/($strings + $saved) * 100; + my $perc_subsaved = $subsave/($strings + $subsave) * 100; + verbosef "%d bytes in string tables\n",$strings; + verbosef "%d bytes (%.3g%%) saved spotting duplicates\n", + $saved, $perc_saved if $saved; + verbosef "%d bytes (%.3g%%) saved using substrings\n", + $subsave, $perc_subsaved if $subsave; + } +elsif ($doEnc) + { + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + output_enc(\*C,$name,$e2u); + } + } +elsif ($doUcm) + { + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + output_ucm(\*C,$name,$u2e,$erep,$min_el,$max_el); + } + } + +# writing half meg files and then not checking to see if you just filled the +# disk is bad, m'kay +close(C) or die "Error closing '$cname': $!"; + +# End of the main program. + +sub compile_ucm +{ + my ($fh,$name) = @_; + my $e2u = {}; + my $u2e = {}; + my $cs; + my %attr; + while (<$fh>) + { + s/#.*$//; + last if /^\s*CHARMAP\s*$/i; + if (/^\s*<(\w+)>\s+"?([^"]*)"?\s*$/i) # " # Grrr + { + $attr{$1} = $2; + } + } + if (!defined($cs = $attr{'code_set_name'})) + { + warn "No in $name\n"; + } + else + { + $name = $cs unless exists $opt{'n'}; + } + my $erep; + my $urep; + my $max_el; + my $min_el; + if (exists $attr{'subchar'}) + { + #my @byte; + #$attr{'subchar'} =~ /^\s*/cg; + #push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg; + #$erep = join('',map(chr(hex($_)),@byte)); + $erep = $attr{'subchar'}; + $erep =~ s/^\s+//; $erep =~ s/\s+$//; + } + print "Reading $name ($cs)\n" + unless defined $ENV{MAKEFLAGS} + and $ENV{MAKEFLAGS} =~ /\b(s|silent|quiet)\b/; + my $nfb = 0; + my $hfb = 0; + while (<$fh>) + { + s/#.*$//; + last if /^\s*END\s+CHARMAP\s*$/i; + next if /^\s*$/; + my (@uni, @byte) = (); + my ($uni, $byte, $fb) = m/^(\S+)\s+(\S+)\s+(\S+)\s+/o + or die "Bad line: $_"; + while ($uni =~ m/\G<([U0-9a-fA-F\+]+)>/g){ + push @uni, map { substr($_, 1) } split(/\+/, $1); + } + while ($byte =~ m/\G\\x([0-9a-fA-F]+)/g){ + push @byte, $1; + } + if (@uni) + { + my $uch = join('', map { encode_U(hex($_)) } @uni ); + my $ech = join('',map(chr(hex($_)),@byte)); + my $el = length($ech); + $max_el = $el if (!defined($max_el) || $el > $max_el); + $min_el = $el if (!defined($min_el) || $el < $min_el); + if (length($fb)) + { + $fb = substr($fb,1); + $hfb++; + } + else + { + $nfb++; + $fb = '0'; + } + # $fb is fallback flag + # 0 - round trip safe + # 1 - fallback for unicode -> enc + # 2 - skip sub-char mapping + # 3 - fallback enc -> unicode + enter($u2e,$uch,$ech,$u2e,$fb+0) if ($fb =~ /[01]/); + enter($e2u,$ech,$uch,$e2u,$fb+0) if ($fb =~ /[03]/); + } + else + { + warn $_; + } + } + if ($nfb && $hfb) + { + die "$nfb entries without fallback, $hfb entries with\n"; + } + $encoding{$name} = [$e2u,$u2e,$erep,$min_el,$max_el]; +} + + + +sub compile_enc +{ + my ($fh,$name) = @_; + my $e2u = {}; + my $u2e = {}; + + my $type; + while ($type = <$fh>) + { + last if $type !~ /^\s*#/; + } + chomp($type); + return if $type eq 'E'; + # Do the hash lookup once, rather than once per function call. 4% speedup. + my $type_func = $encode_types{$type}; + my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); + warn "$type encoded $name\n"; + my $rep = ''; + # Save a defined test by setting these to defined values. + my $min_el = ~0; # A very big integer + my $max_el = 0; # Anything must be longer than 0 + { + my $v = hex($def); + $rep = &$type_func($v & 0xFF, ($v >> 8) & 0xffe); + } + my $errors; + my $seen; + # use -Q to silence the seen test. Makefile.PL uses this by default. + $seen = {} unless $opt{Q}; + do + { + my $line = <$fh>; + chomp($line); + my $page = hex($line); + my $ch = 0; + my $i = 16; + do + { + # So why is it 1% faster to leave the my here? + my $line = <$fh>; + $line =~ s/\r\n$/\n/; + die "$.:${line}Line should be exactly 65 characters long including + newline (".length($line).")" unless length ($line) == 65; + # Split line into groups of 4 hex digits, convert groups to ints + # This takes 65.35 + # map {hex $_} $line =~ /(....)/g + # This takes 63.75 (2.5% less time) + # unpack "n*", pack "H*", $line + # There's an implicit loop in map. Loops are bad, m'kay. Ops are bad, m'kay + # Doing it as while ($line =~ /(....)/g) took 74.63 + foreach my $val (unpack "n*", pack "H*", $line) + { + next if $val == 0xFFFD; + my $ech = &$type_func($ch,$page); + if ($val || (!$ch && !$page)) + { + my $el = length($ech); + $max_el = $el if $el > $max_el; + $min_el = $el if $el < $min_el; + my $uch = encode_U($val); + if ($seen) { + # We're doing the test. + # We don't need to read this quickly, so storing it as a scalar, + # rather than 3 (anon array, plus the 2 scalars it holds) saves + # RAM and may make us faster on low RAM systems. [see __END__] + if (exists $seen->{$uch}) + { + warn sprintf("U%04X is %02X%02X and %04X\n", + $val,$page,$ch,$seen->{$uch}); + $errors++; + } + else + { + $seen->{$uch} = $page << 8 | $ch; + } + } + # Passing 2 extra args each time is 3.6% slower! + # Even with having to add $fallback ||= 0 later + enter_fb0($e2u,$ech,$uch); + enter_fb0($u2e,$uch,$ech); + } + else + { + # No character at this position + # enter($e2u,$ech,undef,$e2u); + } + $ch++; + } + } while --$i; + } while --$pages; + die "\$min_el=$min_el, \$max_el=$max_el - seems we read no lines" + if $min_el > $max_el; + die "$errors mapping conflicts\n" if ($errors && $opt{'S'}); + $encoding{$name} = [$e2u,$u2e,$rep,$min_el,$max_el]; +} + +# my ($a,$s,$d,$t,$fb) = @_; +sub enter { + my ($current,$inbytes,$outbytes,$next,$fallback) = @_; + # state we shift to after this (multibyte) input character defaults to same + # as current state. + $next ||= $current; + # Making sure it is defined seems to be faster than {no warnings;} in + # &process, or passing it in as 0 explicitly. + # XXX $fallback ||= 0; + + # Start at the beginning and work forwards through the string to zero. + # effectively we are removing 1 character from the front each time + # but we don't actually edit the string. [this alone seems to be 14% speedup] + # Hence -$pos is the length of the remaining string. + my $pos = -length $inbytes; + while (1) { + my $byte = substr $inbytes, $pos, 1; + # RAW_NEXT => 0, + # RAW_IN_LEN => 1, + # RAW_OUT_BYTES => 2, + # RAW_FALLBACK => 3, + # to unicode an array would seem to be better, because the pages are dense. + # from unicode can be very sparse, favouring a hash. + # hash using the bytes (all length 1) as keys rather than ord value, + # as it's easier to sort these in &process. + + # It's faster to always add $fallback even if it's undef, rather than + # choosing between 3 and 4 element array. (hence why we set it defined + # above) + my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,'',$fallback]; + # When $pos was -1 we were at the last input character. + unless (++$pos) { + $do_now->[RAW_OUT_BYTES] = $outbytes; + $do_now->[RAW_NEXT] = $next; + return; + } + # Tail recursion. The intermediate state may not have a name yet. + $current = $do_now->[RAW_NEXT]; + } +} + +# This is purely for optimisation. It's just &enter hard coded for $fallback +# of 0, using only a 3 entry array ref to save memory for every entry. +sub enter_fb0 { + my ($current,$inbytes,$outbytes,$next) = @_; + $next ||= $current; + + my $pos = -length $inbytes; + while (1) { + my $byte = substr $inbytes, $pos, 1; + my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,'']; + unless (++$pos) { + $do_now->[RAW_OUT_BYTES] = $outbytes; + $do_now->[RAW_NEXT] = $next; + return; + } + $current = $do_now->[RAW_NEXT]; + } +} + +sub process +{ + my ($name,$a) = @_; + $name =~ s/\W+/_/g; + $a->{Cname} = $name; + my $raw = $a->{Raw}; + my ($l, $agg_max_in, $agg_next, $agg_in_len, $agg_out_len, $agg_fallback); + my @ent; + $agg_max_in = 0; + foreach my $key (sort keys %$raw) { + # RAW_NEXT => 0, + # RAW_IN_LEN => 1, + # RAW_OUT_BYTES => 2, + # RAW_FALLBACK => 3, + my ($next, $in_len, $out_bytes, $fallback) = @{$raw->{$key}}; + # Now we are converting from raw to aggregate, switch from 1 byte strings + # to numbers + my $b = ord $key; + $fallback ||= 0; + if ($l && + # If this == fails, we're going to reset $agg_max_in below anyway. + $b == ++$agg_max_in && + # References in numeric context give the pointer as an int. + $agg_next == $next && + $agg_in_len == $in_len && + $agg_out_len == length $out_bytes && + $agg_fallback == $fallback + # && length($l->[AGG_OUT_BYTES]) < 16 + ) { + # my $i = ord($b)-ord($l->[AGG_MIN_IN]); + # we can aggregate this byte onto the end. + $l->[AGG_MAX_IN] = $b; + $l->[AGG_OUT_BYTES] .= $out_bytes; + } else { + # AGG_MIN_IN => 0, + # AGG_MAX_IN => 1, + # AGG_OUT_BYTES => 2, + # AGG_NEXT => 3, + # AGG_IN_LEN => 4, + # AGG_OUT_LEN => 5, + # AGG_FALLBACK => 6, + # Reset the last thing we saw, plus set 5 lexicals to save some derefs. + # (only gains .6% on euc-jp -- is it worth it?) + push @ent, $l = [$b, $agg_max_in = $b, $out_bytes, $agg_next = $next, + $agg_in_len = $in_len, $agg_out_len = length $out_bytes, + $agg_fallback = $fallback]; + } + if (exists $next->{Cname}) { + $next->{'Forward'} = 1 if $next != $a; + } else { + process(sprintf("%s_%02x",$name,$b),$next); + } + } + # encengine.c rules say that last entry must be for 255 + if ($agg_max_in < 255) { + push @ent, [1+$agg_max_in, 255,undef,$a,0,0]; + } + $a->{'Entries'} = \@ent; +} + + +sub addstrings +{ + my ($fh,$a) = @_; + my $name = $a->{'Cname'}; + # String tables + foreach my $b (@{$a->{'Entries'}}) + { + next unless $b->[AGG_OUT_LEN]; + $strings{$b->[AGG_OUT_BYTES]} = undef; + } + if ($a->{'Forward'}) + { + my ($cpp, $static, $sized) = compiler_info(1); + my $count = $sized ? scalar(@{$a->{'Entries'}}) : ''; + if ($static) { + # we cannot ask Config for d_plusplus since we can override CC=g++-6 on the cmdline + print $fh "#ifdef __cplusplus\n"; # -fpermissive since g++-6 + print $fh "extern encpage_t $name\[$count];\n"; + print $fh "#else\n"; + print $fh "static const encpage_t $name\[$count];\n"; + print $fh "#endif\n"; + } else { + print $fh "extern encpage_t $name\[$count];\n"; + } + } + $a->{'DoneStrings'} = 1; + foreach my $b (@{$a->{'Entries'}}) + { + my ($s,$e,$out,$t,$end,$l) = @$b; + addstrings($fh,$t) unless $t->{'DoneStrings'}; + } +} + +sub outbigstring +{ + my ($fh,$name) = @_; + + $string_acc = ''; + + # Make the big string in the string accumulator. Longest first, on the hope + # that this makes it more likely that we find the short strings later on. + # Not sure if it helps sorting strings of the same length lexically. + foreach my $s (sort {length $b <=> length $a || $a cmp $b} keys %strings) { + my $index = index $string_acc, $s; + if ($index >= 0) { + $saved += length($s); + $strings_in_acc{$s} = $index; + } else { + OPTIMISER: { + if ($opt{'O'}) { + my $sublength = length $s; + while (--$sublength > 0) { + # progressively lop characters off the end, to see if the start of + # the new string overlaps the end of the accumulator. + if (substr ($string_acc, -$sublength) + eq substr ($s, 0, $sublength)) { + $subsave += $sublength; + $strings_in_acc{$s} = length ($string_acc) - $sublength; + # append the last bit on the end. + $string_acc .= substr ($s, $sublength); + last OPTIMISER; + } + # or if the end of the new string overlaps the start of the + # accumulator + next unless substr ($string_acc, 0, $sublength) + eq substr ($s, -$sublength); + # well, the last $sublength characters of the accumulator match. + # so as we're prepending to the accumulator, need to shift all our + # existing offsets forwards + $_ += $sublength foreach values %strings_in_acc; + $subsave += $sublength; + $strings_in_acc{$s} = 0; + # append the first bit on the start. + $string_acc = substr ($s, 0, -$sublength) . $string_acc; + last OPTIMISER; + } + } + # Optimiser (if it ran) found nothing, so just going have to tack the + # whole thing on the end. + $strings_in_acc{$s} = length $string_acc; + $string_acc .= $s; + }; + } + } + + $strings = length $string_acc; + my ($cpp) = compiler_info(0); + my $var = $cpp ? '' : 'static'; + my $definition = "\n$var const U8 $name\[$strings] = { " . + join(',',unpack "C*",$string_acc); + # We have a single long line. Split it at convenient commas. + print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs; + print $fh substr ($definition, pos $definition), " };\n"; +} + +sub findstring { + my ($name,$s) = @_; + my $offset = $strings_in_acc{$s}; + die "Can't find string " . join (',',unpack "C*",$s) . " in accumulator" + unless defined $offset; + "$name + $offset"; +} + +sub outtable +{ + my ($fh,$a,$bigname) = @_; + my $name = $a->{'Cname'}; + $a->{'Done'} = 1; + foreach my $b (@{$a->{'Entries'}}) + { + my ($s,$e,$out,$t,$end,$l) = @$b; + outtable($fh,$t,$bigname) unless $t->{'Done'}; + } + my ($cpp, $static) = compiler_info(0); + my $count = scalar(@{$a->{'Entries'}}); + if ($static) { + print $fh "#ifdef __cplusplus\n"; # -fpermissive since g++-6 + print $fh "encpage_t $name\[$count] = {\n"; + print $fh "#else\n"; + print $fh "static const encpage_t $name\[$count] = {\n"; + print $fh "#endif\n"; + } else { + print $fh "\nencpage_t $name\[$count] = {\n"; + } + foreach my $b (@{$a->{'Entries'}}) + { + my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b; + # $end |= 0x80 if $fb; # what the heck was on your mind, Nick? -- Dan + print $fh "{"; + if ($l) + { + printf $fh findstring($bigname,$out); + } + else + { + print $fh "0"; + } + print $fh ",",$t->{Cname}; + printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec; + } + print $fh "};\n"; +} + +sub output_enc +{ + my ($fh,$name,$a) = @_; + die "Changed - fix me for new structure"; + foreach my $b (sort keys %$a) + { + my ($s,$e,$out,$t,$end,$l,$fb) = @{$a->{$b}}; + } +} + +sub decode_U +{ + my $s = shift; +} + +my @uname; +sub char_names +{ + my $s = do "unicore/Name.pl"; + die "char_names: unicore/Name.pl: $!\n" unless defined $s; + pos($s) = 0; + while ($s =~ /\G([0-9a-f]+)\t([0-9a-f]*)\t(.*?)\s*\n/igc) + { + my $name = $3; + my $s = hex($1); + last if $s >= 0x10000; + my $e = length($2) ? hex($2) : $s; + for (my $i = $s; $i <= $e; $i++) + { + $uname[$i] = $name; +# print sprintf("U%04X $name\n",$i); + } + } +} + +sub output_ucm_page +{ + my ($cmap,$a,$t,$pre) = @_; + # warn sprintf("Page %x\n",$pre); + my $raw = $t->{Raw}; + foreach my $key (sort keys %$raw) { + # RAW_NEXT => 0, + # RAW_IN_LEN => 1, + # RAW_OUT_BYTES => 2, + # RAW_FALLBACK => 3, + my ($next, $in_len, $out_bytes, $fallback) = @{$raw->{$key}}; + my $u = ord $key; + $fallback ||= 0; + + if ($next != $a && $next != $t) { + output_ucm_page($cmap,$a,$next,(($pre|($u &0x3F)) << 6)&0xFFFF); + } elsif (length $out_bytes) { + if ($pre) { + $u = $pre|($u &0x3f); + } + my $s = sprintf " ",$u; + #foreach my $c (split(//,$out_bytes)) { + # $s .= sprintf "\\x%02X",ord($c); + #} + # 9.5% faster changing that loop to this: + $s .= sprintf +("\\x%02X" x length $out_bytes), unpack "C*", $out_bytes; + $s .= sprintf " |%d # %s\n",($fallback ? 1 : 0),$uname[$u]; + push(@$cmap,$s); + } else { + warn join(',',$u, @{$raw->{$key}},$a,$t); + } + } +} + +sub output_ucm +{ + my ($fh,$name,$h,$rep,$min_el,$max_el) = @_; + print $fh "# $0 @orig_ARGV\n" unless $opt{'q'}; + print $fh " \"$name\"\n"; + char_names(); + if (defined $min_el) + { + print $fh " $min_el\n"; + } + if (defined $max_el) + { + print $fh " $max_el\n"; + } + if (defined $rep) + { + print $fh " "; + foreach my $c (split(//,$rep)) + { + printf $fh "\\x%02X",ord($c); + } + print $fh "\n"; + } + my @cmap; + output_ucm_page(\@cmap,$h,$h,0); + print $fh "#\nCHARMAP\n"; + foreach my $line (sort { substr($a,8) cmp substr($b,8) } @cmap) + { + print $fh $line; + } + print $fh "END CHARMAP\n"; +} + +use vars qw( + $_Enc2xs + $_Version + $_Inc + $_E2X + $_Name + $_TableFiles + $_Now +); + +sub find_e2x{ + eval { require File::Find; }; + my (@inc, %e2x_dir); + for my $inc (@INC){ + push @inc, $inc unless $inc eq '.'; #skip current dir + } + File::Find::find( + sub { + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) + = lstat($_) or return; + -f _ or return; + if (/^.*\.e2x$/o){ + no warnings 'once'; + $e2x_dir{$File::Find::dir} ||= $mtime; + } + return; + }, @inc); + warn join("\n", keys %e2x_dir), "\n"; + for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){ + $_E2X = $d; + # warn "$_E2X => ", scalar localtime($e2x_dir{$d}); + return $_E2X; + } +} + +sub make_makefile_pl +{ + eval { require Encode } or die "You need to install Encode to use enc2xs -M\nerror: $@\n"; + # our used for variable expansion + $_Enc2xs = $0; + $_Version = $VERSION; + $_E2X = find_e2x(); + $_Name = shift; + $_TableFiles = join(",", map {qq('$_')} @_); + $_Now = scalar localtime(); + + eval { require File::Spec; }; + _print_expand(File::Spec->catfile($_E2X,"Makefile_PL.e2x"),"Makefile.PL"); + _print_expand(File::Spec->catfile($_E2X,"_PM.e2x"), "$_Name.pm"); + _print_expand(File::Spec->catfile($_E2X,"_T.e2x"), "t/$_Name.t"); + _print_expand(File::Spec->catfile($_E2X,"README.e2x"), "README"); + _print_expand(File::Spec->catfile($_E2X,"Changes.e2x"), "Changes"); + exit; +} + +use vars qw( + $_ModLines + $_LocalVer + ); + +sub make_configlocal_pm { + eval { require Encode } or die "Unable to require Encode: $@\n"; + eval { require File::Spec; }; + + # our used for variable expantion + my %in_core = map { $_ => 1 } ( + 'ascii', 'iso-8859-1', 'utf8', + 'ascii-ctrl', 'null', 'utf-8-strict' + ); + my %LocalMod = (); + # check @enc; + use File::Find (); + my $wanted = sub{ + -f $_ or return; + $File::Find::name =~ /\A\./ and return; + $File::Find::name =~ /\.pm\z/ or return; + $File::Find::name =~ m/\bEncode\b/ or return; + my $mod = $File::Find::name; + $mod =~ s/.*\bEncode\b/Encode/o; + $mod =~ s/\.pm\z//o; + $mod =~ s,/,::,og; + eval qq{ require $mod; } or return; + warn qq{ require $mod;\n}; + for my $enc ( Encode->encodings() ) { + no warnings; + $in_core{$enc} and next; + $Encode::Config::ExtModule{$enc} and next; + $LocalMod{$enc} ||= $mod; + } + }; + File::Find::find({wanted => $wanted}, @INC); + $_ModLines = ""; + for my $enc ( sort keys %LocalMod ) { + $_ModLines .= + qq(\$Encode::ExtModule{'$enc'} = "$LocalMod{$enc}";\n); + } + warn $_ModLines if $_ModLines; + $_LocalVer = _mkversion(); + $_E2X = find_e2x(); + $_Inc = $INC{"Encode.pm"}; + $_Inc =~ s/\.pm$//o; + _print_expand( File::Spec->catfile( $_E2X, "ConfigLocal_PM.e2x" ), + File::Spec->catfile( $_Inc, "ConfigLocal.pm" ), 1 ); + exit; +} + +sub _mkversion{ + # v-string is now depreciated; use time() instead; + #my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime(); + #$yyyy += 1900, $mo +=1; + #return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm); + return time(); +} + +sub _print_expand{ + eval { require File::Basename } or die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; + File::Basename->import(); + my ($src, $dst, $clobber) = @_; + if (!$clobber and -e $dst){ + warn "$dst exists. skipping\n"; + return; + } + warn "Generating $dst...\n"; + open my $in, $src or die "$src : $!"; + if ((my $d = dirname($dst)) ne '.'){ + -d $d or mkdir $d, 0755 or die "mkdir $d : $!"; + } + open my $out, ">", $dst or die "$!"; + my $asis = 0; + while (<$in>){ + if (/^#### END_OF_HEADER/){ + $asis = 1; next; + } + s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis; + print $out $_; + } +} +__END__ + +=head1 NAME + +enc2xs -- Perl Encode Module Generator + +=head1 SYNOPSIS + + enc2xs -[options] + enc2xs -M ModName mapfiles... + enc2xs -C + +=head1 DESCRIPTION + +F builds a Perl extension for use by Encode from either +Unicode Character Mapping files (.ucm) or Tcl Encoding Files (.enc). +Besides being used internally during the build process of the Encode +module, you can use F to add your own encoding to perl. +No knowledge of XS is necessary. + +=head1 Quick Guide + +If you want to know as little about Perl as possible but need to +add a new encoding, just read this chapter and forget the rest. + +=over 4 + +=item 0.Z<> + +Have a .ucm file ready. You can get it from somewhere or you can write +your own from scratch or you can grab one from the Encode distribution +and customize it. For the UCM format, see the next Chapter. In the +example below, I'll call my theoretical encoding myascii, defined +in I. C<$> is a shell prompt. + + $ ls -F + my.ucm + +=item 1.Z<> + +Issue a command as follows; + + $ enc2xs -M My my.ucm + generating Makefile.PL + generating My.pm + generating README + generating Changes + +Now take a look at your current directory. It should look like this. + + $ ls -F + Makefile.PL My.pm my.ucm t/ + +The following files were created. + + Makefile.PL - MakeMaker script + My.pm - Encode submodule + t/My.t - test file + +=over 4 + +=item 1.1.Z<> + +If you want *.ucm installed together with the modules, do as follows; + + $ mkdir Encode + $ mv *.ucm Encode + $ enc2xs -M My Encode/*ucm + +=back + +=item 2.Z<> + +Edit the files generated. You don't have to if you have no time AND no +intention to give it to someone else. But it is a good idea to edit +the pod and to add more tests. + +=item 3.Z<> + +Now issue a command all Perl Mongers love: + + $ perl Makefile.PL + Writing Makefile for Encode::My + +=item 4.Z<> + +Now all you have to do is make. + + $ make + cp My.pm blib/lib/Encode/My.pm + /usr/local/bin/perl /usr/local/bin/enc2xs -Q -O \ + -o encode_t.c -f encode_t.fnm + Reading myascii (myascii) + Writing compiled form + 128 bytes in string tables + 384 bytes (75%) saved spotting duplicates + 1 bytes (0.775%) saved using substrings + .... + chmod 644 blib/arch/auto/Encode/My/My.bs + $ + +The time it takes varies depending on how fast your machine is and +how large your encoding is. Unless you are working on something big +like euc-tw, it won't take too long. + +=item 5.Z<> + +You can "make install" already but you should test first. + + $ make test + PERL_DL_NONLAZY=1 /usr/local/bin/perl -Iblib/arch -Iblib/lib \ + -e 'use Test::Harness qw(&runtests $verbose); \ + $verbose=0; runtests @ARGV;' t/*.t + t/My....ok + All tests successful. + Files=1, Tests=2, 0 wallclock secs + ( 0.09 cusr + 0.01 csys = 0.09 CPU) + +=item 6.Z<> + +If you are content with the test result, just "make install" + +=item 7.Z<> + +If you want to add your encoding to Encode's demand-loading list +(so you don't have to "use Encode::YourEncoding"), run + + enc2xs -C + +to update Encode::ConfigLocal, a module that controls local settings. +After that, "use Encode;" is enough to load your encodings on demand. + +=back + +=head1 The Unicode Character Map + +Encode uses the Unicode Character Map (UCM) format for source character +mappings. This format is used by IBM's ICU package and was adopted +by Nick Ing-Simmons for use with the Encode module. Since UCM is +more flexible than Tcl's Encoding Map and far more user-friendly, +this is the recommended format for Encode now. + +A UCM file looks like this. + + # + # Comments + # + "US-ascii" # Required + "ascii" # Optional + 1 # Required; usually 1 + 1 # Max. # of bytes/char + \x3F # Substitution char + # + CHARMAP + \x00 |0 # + \x01 |0 # + \x02 |0 # + .... + \x7C |0 # VERTICAL LINE + \x7D |0 # RIGHT CURLY BRACKET + \x7E |0 # TILDE + \x7F |0 # + END CHARMAP + +=over 4 + +=item * + +Anything that follows C<#> is treated as a comment. + +=item * + +The header section continues until a line containing the word +CHARMAP. This section has a form of IkeywordE value>, one +pair per line. Strings used as values must be quoted. Barewords are +treated as numbers. I<\xXX> represents a byte. + +Most of the keywords are self-explanatory. I means +substitution character, not subcharacter. When you decode a Unicode +sequence to this encoding but no matching character is found, the byte +sequence defined here will be used. For most cases, the value here is +\x3F; in ASCII, this is a question mark. + +=item * + +CHARMAP starts the character map section. Each line has a form as +follows: + + \xXX.. |0 # comment + ^ ^ ^ + | | +- Fallback flag + | +-------- Encoded byte sequence + +-------------- Unicode Character ID in hex + +The format is roughly the same as a header section except for the +fallback flag: | followed by 0..3. The meaning of the possible +values is as follows: + +=over 4 + +=item |0 + +Round trip safe. A character decoded to Unicode encodes back to the +same byte sequence. Most characters have this flag. + +=item |1 + +Fallback for unicode -> encoding. When seen, enc2xs adds this +character for the encode map only. + +=item |2 + +Skip sub-char mapping should there be no code point. + +=item |3 + +Fallback for encoding -> unicode. When seen, enc2xs adds this +character for the decode map only. + +=back + +=item * + +And finally, END OF CHARMAP ends the section. + +=back + +When you are manually creating a UCM file, you should copy ascii.ucm +or an existing encoding which is close to yours, rather than write +your own from scratch. + +When you do so, make sure you leave at least B to B as +is, unless your environment is EBCDIC. + +B: not all features in UCM are implemented. For example, +icu:state is not used. Because of that, you need to write a perl +module if you want to support algorithmical encodings, notably +the ISO-2022 series. Such modules include L, +L, and L. + +=head2 Coping with duplicate mappings + +When you create a map, you SHOULD make your mappings round-trip safe. +That is, C stands for all characters that are marked as C<|0>. Here is +how to make sure: + +=over 4 + +=item * + +Sort your map in Unicode order. + +=item * + +When you have a duplicate entry, mark either one with '|1' or '|3'. + +=item * + +And make sure the '|1' or '|3' entry FOLLOWS the '|0' entry. + +=back + +Here is an example from big5-eten. + + \xF9\xF9 |0 + \xA2\xA4 |3 + +Internally Encoding -> Unicode and Unicode -> Encoding Map looks like +this; + + E to U U to E + -------------------------------------- + \xF9\xF9 => U2550 U2550 => \xF9\xF9 + \xA2\xA4 => U2550 + +So it is round-trip safe for \xF9\xF9. But if the line above is upside +down, here is what happens. + + E to U U to E + -------------------------------------- + \xA2\xA4 => U2550 U2550 => \xF9\xF9 + (\xF9\xF9 => U2550 is now overwritten!) + +The Encode package comes with F, a crude but sufficient +utility to check the integrity of a UCM file. Check under the +Encode/bin directory for this. + +When in doubt, you can use F, yet another utility under +Encode/bin directory. + +=head1 Bookmarks + +=over 4 + +=item * + +ICU Home Page +L + +=item * + +ICU Character Mapping Tables +L + +=item * + +ICU:Conversion Data +L + +=back + +=head1 SEE ALSO + +L, +L, +L + +=cut + +# -Q to disable the duplicate codepoint test +# -S make mapping errors fatal +# -q to remove comments written to output files +# -O to enable the (brute force) substring optimiser +# -o to specify the output file name (else it's the first arg) +# -f to give a file with a list of input files (else use the args) +# -n to name the encoding (else use the basename of the input file. + +With %seen holding array refs: + + 865.66 real 28.80 user 8.79 sys + 7904 maximum resident set size + 1356 average shared memory size + 18566 average unshared data size + 229 average unshared stack size + 46080 page reclaims + 33373 page faults + +With %seen holding simple scalars: + + 342.16 real 27.11 user 3.54 sys + 8388 maximum resident set size + 1394 average shared memory size + 14969 average unshared data size + 236 average unshared stack size + 28159 page reclaims + 9839 page faults + +Yes, 5 minutes is faster than 15. Above is for CP936 in CN. Only difference is +how %seen is storing things its seen. So it is pathalogically bad on a 16M +RAM machine, but it's going to help even on modern machines. +Swapping is bad, m'kay :-) diff --git a/bin/encguess b/bin/encguess new file mode 100755 index 0000000..0f344ea --- /dev/null +++ b/bin/encguess @@ -0,0 +1,146 @@ +#!./perl +use 5.008001; +BEGIN { pop @INC if $INC[-1] eq '.' } +use strict; +use warnings; +use Encode; +use Getopt::Std; +use Carp; +use Encode::Guess; +$Getopt::Std::STANDARD_HELP_VERSION = 1; + +my %opt; +getopts( "huSs:", \%opt ); +my @suspect_list; +list_valid_suspects() and exit if $opt{S}; +@suspect_list = split /:,/, $opt{s} if $opt{s}; +HELP_MESSAGE() if $opt{h}; +HELP_MESSAGE() unless @ARGV; +do_guess($_) for @ARGV; + +sub read_file { + my $filename = shift; + local $/; + open my $fh, '<:raw', $filename or croak "$filename:$!"; + my $content = <$fh>; + close $fh; + return $content; +} + +sub do_guess { + my $filename = shift; + my $data = read_file($filename); + my $enc = guess_encoding( $data, @suspect_list ); + if ( !ref($enc) && $opt{u} ) { + return 1; + } + print "$filename\t"; + if ( ref($enc) ) { + print $enc->mime_name(); + } + else { + print "unknown"; + } + print "\n"; + return 1; +} + +sub list_valid_suspects { + print join( "\n", Encode->encodings(":all") ); + print "\n"; + return 1; +} + +sub HELP_MESSAGE { + exec 'pod2usage', $0 or die "pod2usage: $!" +} +__END__ +=head1 NAME + +encguess - guess character encodings of files + +=head1 VERSION + +$Id: encguess,v 0.2 2016/08/04 03:15:58 dankogai Exp $ + +=head1 SYNOPSIS + + encguess [switches] filename... + +=head2 SWITCHES + +=over 2 + +=item -h + +show this message and exit. + +=item -s + +specify a list of "suspect encoding types" to test, +seperated by either C<:> or C<,> + +=item -S + +output a list of all acceptable encoding types that can be used with +the -s param + +=item -u + +suppress display of unidentified types + +=back + +=head2 EXAMPLES: + +=over 2 + +=item * + +Guess encoding of a file named C, using only the default +suspect types. + + encguess test.txt + +=item * + +Guess the encoding type of a file named C, using the suspect +types C. + + encguess -s euc-jp,shiftjis,7bit-jis test.txt + encguess -s euc-jp:shiftjis:7bit-jis test.txt + +=item * + +Guess the encoding type of several files, do not display results for +unidentified files. + + encguess -us euc-jp,shiftjis,7bit-jis test*.txt + +=back + +=head1 DESCRIPTION + +The encoding identification is done by checking one encoding type at a +time until all but the right type are eliminated. The set of encoding +types to try is defined by the -s parameter and defaults to ascii, +utf8 and UTF-16/32 with BOM. This can be overridden by passing one or +more encoding types via the -s parameter. If you need to pass in +multiple suspect encoding types, use a quoted string with the a space +separating each value. + +=head1 SEE ALSO + +L, L + +=head1 LICENSE AND COPYRIGHT + +Copyright 2015 Michael LaGrasta and Dan Kogai. + +This program is free software; you can redistribute it and/or modify it +under the terms of the the Artistic License (2.0). You may obtain a +copy of the full license at: + +L + +=cut diff --git a/bin/piconv b/bin/piconv new file mode 100755 index 0000000..2218d16 --- /dev/null +++ b/bin/piconv @@ -0,0 +1,319 @@ +#!./perl +# $Id: piconv,v 2.8 2016/08/04 03:15:58 dankogai Exp $ +# +BEGIN { pop @INC if $INC[-1] eq '.' } +use 5.8.0; +use strict; +use Encode ; +use Encode::Alias; +my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); + +use File::Basename; +my $name = basename($0); + +use Getopt::Long qw(:config no_ignore_case); + +my %Opt; + +help() + unless + GetOptions(\%Opt, + 'from|f=s', + 'to|t=s', + 'list|l', + 'string|s=s', + 'check|C=i', + 'c', + 'perlqq|p', + 'htmlcref', + 'xmlcref', + 'debug|D', + 'scheme|S=s', + 'resolve|r=s', + 'help', + ); + +$Opt{help} and help(); +$Opt{list} and list_encodings(); +my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; +defined $Opt{resolve} and resolve_encoding($Opt{resolve}); +$Opt{from} || $Opt{to} || help(); +my $from = $Opt{from} || $locale or help("from_encoding unspecified"); +my $to = $Opt{to} || $locale or help("to_encoding unspecified"); +$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; +my $scheme = do { + if (defined $Opt{scheme}) { + if (!exists $Scheme{$Opt{scheme}}) { + warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n"; + 'from_to'; + } else { + $Opt{scheme}; + } + } else { + 'from_to'; + } +}; + +$Opt{check} ||= $Opt{c}; +$Opt{perlqq} and $Opt{check} = Encode::PERLQQ; +$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF; +$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF; + +my $efrom = Encode->getEncoding($from) || die "Unknown encoding '$from'"; +my $eto = Encode->getEncoding($to) || die "Unknown encoding '$to'"; + +my $cfrom = $efrom->name; +my $cto = $eto->name; + +if ($Opt{debug}){ + print <<"EOT"; +Scheme: $scheme +From: $from => $cfrom +To: $to => $cto +EOT +} + +my %use_bom = + map { $_ => 1 } qw/UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE/; + +# we do not use <> (or ARGV) for the sake of binmode() +@ARGV or push @ARGV, \*STDIN; + +unless ( $scheme eq 'perlio' ) { + binmode STDOUT; + my $need2slurp = $use_bom{ $eto } || $use_bom{ $efrom }; + for my $argv (@ARGV) { + my $ifh = ref $argv ? $argv : undef; + $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next; + $ifh or open $ifh, "<", $argv or next; + binmode $ifh; + if ( $scheme eq 'from_to' ) { # default + if ($need2slurp){ + local $/; + $_ = <$ifh>; + Encode::from_to( $_, $from, $to, $Opt{check} ); + print; + }else{ + while (<$ifh>) { + Encode::from_to( $_, $from, $to, $Opt{check} ); + print; + } + } + } + elsif ( $scheme eq 'decode_encode' ) { # step-by-step + if ($need2slurp){ + local $/; + $_ = <$ifh>; + my $decoded = decode( $from, $_, $Opt{check} ); + my $encoded = encode( $to, $decoded ); + print $encoded; + }else{ + while (<$ifh>) { + my $decoded = decode( $from, $_, $Opt{check} ); + my $encoded = encode( $to, $decoded ); + print $encoded; + } + } + } + else { # won't reach + die "$name: unknown scheme: $scheme"; + } + } +} +else { + + # NI-S favorite + binmode STDOUT => "raw:encoding($to)"; + for my $argv (@ARGV) { + my $ifh = ref $argv ? $argv : undef; + $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next; + $ifh or open $ifh, "<", $argv or next; + binmode $ifh => "raw:encoding($from)"; + print while (<$ifh>); + } +} + +sub list_encodings { + print join( "\n", Encode->encodings(":all") ), "\n"; + exit 0; +} + +sub resolve_encoding { + if ( my $alias = Encode::resolve_alias( $_[0] ) ) { + print $alias, "\n"; + exit 0; + } + else { + warn "$name: $_[0] is not known to Encode\n"; + exit 1; + } +} + +sub help { + my $message = shift; + $message and print STDERR "$name error: $message\n"; + print STDERR <<"EOT"; +$name [-f from_encoding] [-t to_encoding] + [-p|--perlqq|--htmlcref|--xmlcref] [-C N|-c] [-D] [-S scheme] + [-s string|file...] +$name -l +$name -r encoding_alias +$name -h +Common options: + -l,--list + lists all available encodings + -r,--resolve encoding_alias + resolve encoding to its (Encode) canonical name + -f,--from from_encoding + when omitted, the current locale will be used + -t,--to to_encoding + when omitted, the current locale will be used + -s,--string string + "string" will be the input instead of STDIN or files +The following are mainly of interest to Encode hackers: + -C N | -c check the validity of the input + -D,--debug show debug information + -S,--scheme scheme use the scheme for conversion +Those are handy when you can only see ASCII characters: + -p,--perlqq transliterate characters missing in encoding to \\x{HHHH} + where HHHH is the hexadecimal Unicode code point + --htmlcref transliterate characters missing in encoding to &#NNN; + where NNN is the decimal Unicode code point + --xmlcref transliterate characters missing in encoding to &#xHHHH; + where HHHH is the hexadecimal Unicode code point + +EOT + exit; +} + +__END__ + +=head1 NAME + +piconv -- iconv(1), reinvented in perl + +=head1 SYNOPSIS + + piconv [-f from_encoding] [-t to_encoding] + [-p|--perlqq|--htmlcref|--xmlcref] [-C N|-c] [-D] [-S scheme] + [-s string|file...] + piconv -l + piconv -r encoding_alias + piconv -h + +=head1 DESCRIPTION + +B is perl version of B, a character encoding converter +widely available for various Unixen today. This script was primarily +a technology demonstrator for Perl 5.8.0, but you can use piconv in the +place of iconv for virtually any case. + +piconv converts the character encoding of either STDIN or files +specified in the argument and prints out to STDOUT. + +Here is the list of options. Some options can be in short format (-f) +or long (--from) one. + +=over 4 + +=item -f,--from I + +Specifies the encoding you are converting from. Unlike B, +this option can be omitted. In such cases, the current locale is used. + +=item -t,--to I + +Specifies the encoding you are converting to. Unlike B, +this option can be omitted. In such cases, the current locale is used. + +Therefore, when both -f and -t are omitted, B just acts +like B. + +=item -s,--string I + +uses I instead of file for the source of text. + +=item -l,--list + +Lists all available encodings, one per line, in case-insensitive +order. Note that only the canonical names are listed; many aliases +exist. For example, the names are case-insensitive, and many standard +and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" +instead of "cp850", or "winlatin1" for "cp1252". See L +for a full discussion. + +=item -r,--resolve I + +Resolve I to Encode canonical encoding name. + +=item -C,--check I + +Check the validity of the stream if I = 1. When I = -1, something +interesting happens when it encounters an invalid character. + +=item -c + +Same as C<-C 1>. + +=item -p,--perlqq + +Transliterate characters missing in encoding to \x{HHHH} where HHHH is the +hexadecimal Unicode code point. + +=item --htmlcref + +Transliterate characters missing in encoding to &#NNN; where NNN is the +decimal Unicode code point. + +=item --xmlcref + +Transliterate characters missing in encoding to &#xHHHH; where HHHH is the +hexadecimal Unicode code point. + +=item -h,--help + +Show usage. + +=item -D,--debug + +Invokes debugging mode. Primarily for Encode hackers. + +=item -S,--scheme I + +Selects which scheme is to be used for conversion. Available schemes +are as follows: + +=over 4 + +=item from_to + +Uses Encode::from_to for conversion. This is the default. + +=item decode_encode + +Input strings are decode()d then encode()d. A straight two-step +implementation. + +=item perlio + +The new perlIO layer is used. NI-S' favorite. + +You should use this option if you are using UTF-16 and others which +linefeed is not $/. + +=back + +Like the I<-D> option, this is also for Encode hackers. + +=back + +=head1 SEE ALSO + +L +L +L +L +L +L + +=cut diff --git a/bin/ucm2table b/bin/ucm2table new file mode 100755 index 0000000..66e63fc --- /dev/null +++ b/bin/ucm2table @@ -0,0 +1,45 @@ +#!/usr/bin/perl +# $Id: ucm2table,v 2.1 2006/05/03 18:24:10 dankogai Exp $ +# + +use 5.006; +use strict; +use Getopt::Std; +my %Opt; +getopts("aeu", \%Opt); +my %Chartab; + +my $Hex = '[0-9A-Fa-f]'; +while(<>){ + chomp; + my ($uni, $enc, $fb) = + /^\s+(\S+)\s+\|(\d)/o or next; + $fb eq '0' or next; + my @byte = (); + my $ord = 0; + while($enc =~ /\G\\x($Hex+)/iog){ + my $byte = hex($1); + push @byte, $byte; + $ord <<= 8; $ord += $byte; + }; + # print join('', @byte), " => $ord \n"; + if ($Opt{u}){ + $Chartab{$ord} = pack("U", hex($uni)); + }else{ + $Chartab{$ord} = pack("C*", @byte); + } +} + +my $start = $Opt{a} ? 0x20 : 0xa0; + +for (my $x = $start; $x <= 0xffff; $x += 32) { + my $line = ''; + for my $i (0..31){ + my $num = $x+$i; $num eq 0x7f and next; # skip delete + my $char = $Chartab{$num}; + $line .= !$char ? " " : + ($num < 0x7f ) ? " $char" : $char ; + } + $line =~ /^\s+$/o and next; + printf "0x%04x: $line\n", $x; +} diff --git a/bin/ucmlint b/bin/ucmlint new file mode 100644 index 0000000..a31a7a2 --- /dev/null +++ b/bin/ucmlint @@ -0,0 +1,203 @@ +#!/usr/local/bin/perl +# +# $Id: ucmlint,v 2.4 2017/06/10 17:23:50 dankogai Exp $ +# + +BEGIN { pop @INC if $INC[-1] eq '.' } +use strict; +our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +use Getopt::Std; +our %Opt; +getopts("Dehfv", \%Opt); + +if ($Opt{e}){ + eval { require Encode } or die "can't load Encode : $@"; +} + +$Opt{h} and help(); +@ARGV or help(); + +sub help{ + print <<""; +$0 -[Dehfv] [ucm files ...] + -D debug mode on + -e test with Encode module also (requires perl 5.7.3 or higher) + -h shows this message + -f forces roundtrip check even for |[123] + -v verbose mode + +} + +$| = 1; +my (%Hdr, %U2E, %E2U, %Fallback); +my $in_charmap = 0; +my $nerror = 0; +my $nwarning = 0; + +sub nit($;$){ + my ($msg, $level) = @_; + my $lstr; + if ($level == 2){ + $lstr = 'notice'; + }elsif ($level == 1){ + $lstr = 'warning'; $nwarning++; + }else{ + $lstr = 'error'; $nerror++; + } + print "$ARGV:$lstr in line $.: $msg\n"; +} + +for $ARGV (@ARGV){ + open UCM, $ARGV or die "$ARGV:$!"; + %Hdr = %U2E = %E2U = %Fallback = (); + $in_charmap = $nerror = $nwarning = 0; + $. = 0; + while(){ + chomp; + s/\s*#.*$//o; /^$/ and next; + if ($_ eq "CHARMAP"){ + $in_charmap = 1; + for my $must (qw/code_set_name mb_cur_min mb_cur_max/){ + exists $Hdr{$must} or nit "<$must> nonexistent"; + } + $Hdr{mb_cur_min} > $Hdr{mb_cur_max} + and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)", + $Hdr{mb_cur_min},$Hdr{mb_cur_max}); + $in_charmap = 1; + next; + } + unless ($in_charmap){ + my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next; + $Opt{D} and warn "$hkey => $hvalue"; + if ($hkey eq "code_set_name"){ # name check + exists $Hdr{code_set_name} + and nit "Duplicate : $hkey"; + } + if ($hkey eq "code_set_alias"){ # alias check + $hvalue eq $Hdr{code_set_name} + and nit qq(alias "$hvalue" is already in ); + } + $Hdr{$hkey} = $hvalue; + }else{ + my $name = $Hdr{code_set_name}; + my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next; + $Opt{v} and nit $_, 2; + my $uni = uniparse($unistr); + my $enc = encparse($encstr); + $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb"; + $fb = $1; + $Opt{f} and $fb = 0; + unless ($fb == 3){ # check uni -> enc + if (exists $U2E{$uni}){ + nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1; + }else{ + $U2E{$uni} = $enc; + $Fallback{$uni}{$enc} = 1 if $fb == 1; + if ($Opt{e}) { + my $e = hex2enc($enc); + my $u = hex2uni($uni); + my $eu = Encode::encode($name, $u); + $e eq $eu + or nit qq(encode('$name', $uni) != $enc); + } + } + } + unless ($fb == 1){ # check enc -> uni + if (exists $E2U{$enc}){ + nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1; + }else{ + $E2U{$enc} = $uni; + $Fallback{$enc}{$uni} = 1 if $fb == 3; + if ($Opt{e}) { + my $e = hex2enc($enc); + my $u = hex2uni($uni); + $Opt{D} and warn "$uni, $enc"; + my $de = Encode::decode($name, $e); + $de eq $u + or nit qq(decode('$name', $enc) != $uni); + } + } + } + # warn "$uni, $enc, $fb"; + } + } + $in_charmap or nit "Where is CHARMAP?"; + checkRT(); + printf ("$ARGV: %s error%s found\n", + ($nerror == 0 ? 'no' : $nerror), + ($nerror > 1 ? 's' : '')); +} + +exit; + +sub hex2enc{ + pack("C*", map {hex($_)} split(",", shift)); +} +sub hex2uni{ + join("", map { chr(hex($_)) } split(",", shift)); +} + +sub checkRT{ + for my $uni (keys %E2U){ + my $enc = $U2E{$uni} or next; # okay + $E2U{$U2E{$uni}} eq $uni or $Fallback{$uni}{$enc} or + nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}"; + } + for my $enc (keys %E2U){ + my $uni = $E2U{$enc} or next; # okay + $U2E{$E2U{$enc}} eq $enc or $Fallback{$enc}{$uni} or + nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}"; + } +} + + +sub uniparse{ + my $str = shift; + my @u; + push @u, $1 while($str =~ /\G/ig); + for my $u (@u){ + $u =~ /^([0-9A-Za-z]+)$/o + or nit "malformed Unicode character: $u"; + } + return join(',', @u); +} + +sub encparse{ + my $str = shift; + my @e; + for my $e (split /\\x/io, $str){ + $e or next; # first \x + $e =~ /^([0-9A-Za-z]{1,2})$/io + or nit "Hex $e in $str is bogus"; + push @e, $1; + } + return join(',', @e); +} + + + +__END__ + +A UCM file looks like this. + + # + # Comments + # + "US-ascii" # Required + "ascii" # Optional + 1 # Required; usually 1 + 1 # Max. # of bytes/char + \x3F # Substitution char + # + CHARMAP + \x00 |0 # + \x01 |0 # + \x02 |0 # + .... + \x7C |0 # VERTICAL LINE + \x7D |0 # RIGHT CURLY BRACKET + \x7E |0 # TILDE + \x7F |0 # + END CHARMAP + diff --git a/bin/ucmsort b/bin/ucmsort new file mode 100755 index 0000000..3e037dc --- /dev/null +++ b/bin/ucmsort @@ -0,0 +1,35 @@ +#!/usr/local/bin/perl +# +# $Id: ucmsort,v 2.2 2006/05/03 18:24:10 dankogai Exp $ +# +use strict; +my @lines; +my ($head, $tail); +while (<>){ + unless (m/^.*//o; + push @lines,[ $u, @words ]; +} + +print $head; +for (sort { + hex($a->[0]) <=> hex($b->[0]) # Unicode descending order + or $a->[2] cmp $b->[2] # fallback descending order + or $a->[1] cmp $b->[1] # Encoding descending order + } + @lines) { + my $u = shift @$_; + print join(" " => "", @$_), "\n"; +} +print $tail; +__END__ diff --git a/bin/unidump b/bin/unidump new file mode 100755 index 0000000..f190827 --- /dev/null +++ b/bin/unidump @@ -0,0 +1,273 @@ +#!./perl + +BEGIN { pop @INC if $INC[-1] eq '.' } +use strict; +use Encode; +use Getopt::Std; +my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt); +$Opt{p} ||= $Opt{P}; +$Opt{e} ||= 'utf8'; +$Opt{f} ||= $Opt{e}; +$Opt{t} ||= $Opt{e}; +$Opt{h} and help(); + +my ($linebuf, $outbuf); +my $CPL = $Opt{p} ? 64 : 8; +my $linenum; +my $linesperheading = $Opt{H}; +my $nchars; +our $PrevChunk; + +$Opt{h} and help(); +$Opt{p} and do_perl($Opt{s}); +do_dump($Opt{s}); +exit; + +# + +sub do_perl{ + my $string = shift; + $Opt{P} and print "#!$^X -w\nprint\n"; + unless ($string){ + while(<>){ + use utf8; + $linebuf .= Encode::decode($Opt{f}, $_); + while($linebuf){ + my $chr = render_p(substr($linebuf, 0, 1, '')); + length($outbuf) + length($chr) > $CPL and print_P(); + $outbuf .= $chr; + } + } + $outbuf and print print_P(";"); + }else{ + while($string){ + my $chr = render_p(substr($string, 0, 1, '')); + length($outbuf) + length($chr) > $CPL and print_P(); + $outbuf .= $chr; + } + } + $outbuf and print print_P(";"); + exit; +} + +sub render_p{ + my ($chr, $format) = @_; + our %S2pstr; + $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n... + $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable; + my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ? + q(\x%x) : q(\x{%x}); + return sprintf $fmt, ord($chr); +} + +sub print_P{ + my $end = shift; + $outbuf or return; + print '"', encode($Opt{t}, $outbuf), '"'; + my $tail = $Opt{P} ? $end ? "$end" : "," : ''; + print $tail, "\n"; + $outbuf = ''; +} + +sub do_dump{ + my $string = shift; + !$Opt{p} and exists $Opt{H} and print_H(); + unless ($string){ + while(<>){ + use utf8; + $linebuf .= Encode::decode($Opt{f}, $_); + while (length($linebuf) > $CPL){ + my $chunk = substr($linebuf, 0, $CPL, ''); + print_C($chunk, $linenum++); + $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); + } + } + $linebuf and print_C($linebuf); + }else{ + while ($string){ + my $chunk = substr($string, 0, $CPL, ''); + print_C($chunk, $linenum++); + $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); + } + } + exit; +} + +sub print_S{ + print "--------+------------------------------------------------"; + if ($Opt{C}){ + print "-+-----------------"; + } + print "\n"; +} +sub print_H{ + print " Offset 0 1 2 3 4 5 6 7"; + if ($Opt{C}){ + print " | 0 1 2 3 4 5 6 7"; + } + print "\n"; + print_S; +} + +sub print_C{ + my ($chunk, $linenum) = @_; + if (!$Opt{v} and $chunk eq $PrevChunk){ + printf "%08x *\n", $linenum*8; return; + } + $PrevChunk = $chunk; + my $end = length($chunk) - 1; + my (@ord, @chr); + for my $i (0..$end){ + use utf8; + my $chr = substr($chunk,$i,1); + my $ord = ord($chr); + my $fmt = $ord <= 0xffff ? " %04x" : " %05x"; + push @ord, (sprintf $fmt, $ord); + $Opt{C} and push @chr, render_c($chr); + } + if (++$end < 7){ + for my $i ($end..7){ + push @ord, (" " x 6); + } + } + my $line = sprintf "%08x %s", $linenum*8, join('', @ord); + $Opt{C} and $line .= sprintf " | %s", join('', @chr); + print encode($Opt{t}, $line), "\n"; +} + +sub render_c{ + my ($chr, $format) = @_; + our (%S2str, $IsFullWidth); + $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " "; + $chr =~ $IsFullWidth and return $chr; # as is + return " " . $chr; +} + +sub help{ + my $message = shift; + use File::Basename; + my $name = basename($0); + $message and print STDERR "$name error: $message\n"; + print STDERR <<"EOT"; +Usage: + $name -[options...] [files...] + $name -[options...] -s "string" + $name -h + -h prints this message. +Inherited from hexdump; + -C Canonical unidump mode + -v prints the duplicate line as is. Without this option, + single "*" will be printed instead. +For unidump only + -p prints in perl literals that you can copy and paste directly + to your perl script. + -P prints in perl executable format! + -u prints a bunch of "Uxxxx,". Handy when you want to pass your + characters in mailing lists. +IO Options: + -e io_encoding same as "-f io_encoding -t io_encoding" + -f from_encoding convert the source stream from this encoding + -t to_encoding print to STDOUT in this encoding + -s string "string" will be converted instead of STDIN. + -H nline prints separater for each nlines of output. + 0 means only the table headding be printed. +EOT + exit; +} + +BEGIN{ + our %S2pstr= ( + "\\" => '\\\\', + "\0" => '\0', + "\t" => '\t', + "\n" => '\n', + "\r" => '\r', + "\v" => '\v', + "\a" => '\a', + "\e" => '\e', + "\"" => qq(\\\"), + "\'" => qq(\\\'), + '$' => '\$', + "@" => '\@', + "%" => '\%', + ); + + our %S2str = ( + qq(\x00) => q(\0), # NULL + qq(\x01) => q(^A), # START OF HEADING + qq(\x02) => q(^B), # START OF TEXT + qq(\x03) => q(^C), # END OF TEXT + qq(\x04) => q(^D), # END OF TRANSMISSION + qq(\x05) => q(^E), # ENQUIRY + qq(\x06) => q(^F), # ACKNOWLEDGE + qq(\x07) => q(\a), # BELL + qq(\x08) => q(^H), # BACKSPACE + qq(\x09) => q(\t), # HORIZONTAL TABULATION + qq(\x0A) => q(\n), # LINE FEED + qq(\x0B) => q(\v), # VERTICAL TABULATION + qq(\x0C) => q(^L), # FORM FEED + qq(\x0D) => q(\r), # CARRIAGE RETURN + qq(\x0E) => q(^N), # SHIFT OUT + qq(\x0F) => q(^O), # SHIFT IN + qq(\x10) => q(^P), # DATA LINK ESCAPE + qq(\x11) => q(^Q), # DEVICE CONTROL ONE + qq(\x12) => q(^R), # DEVICE CONTROL TWO + qq(\x13) => q(^S), # DEVICE CONTROL THREE + qq(\x14) => q(^T), # DEVICE CONTROL FOUR + qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE + qq(\x16) => q(^V), # SYNCHRONOUS IDLE + qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK + qq(\x18) => q(^X), # CANCEL + qq(\x19) => q(^Y), # END OF MEDIUM + qq(\x1A) => q(^Z), # SUBSTITUTE + qq(\x1B) => q(\e), # ESCAPE (\c[) + qq(\x1C) => "^\\", # FILE SEPARATOR + qq(\x1D) => "^\]", # GROUP SEPARATOR + qq(\x1E) => q(^^), # RECORD SEPARATOR + qq(\x1F) => q(^_), # UNIT SEPARATOR + ); + # + # Generated out of lib/unicore/EastAsianWidth.txt + # will it work ? + # + our $IsFullWidth = + qr/^[ + \x{1100}-\x{1159} + \x{115F}-\x{115F} + \x{2329}-\x{232A} + \x{2E80}-\x{2E99} + \x{2E9B}-\x{2EF3} + \x{2F00}-\x{2FD5} + \x{2FF0}-\x{2FFB} + \x{3000}-\x{303E} + \x{3041}-\x{3096} + \x{3099}-\x{30FF} + \x{3105}-\x{312C} + \x{3131}-\x{318E} + \x{3190}-\x{31B7} + \x{31F0}-\x{321C} + \x{3220}-\x{3243} + \x{3251}-\x{327B} + \x{327F}-\x{32CB} + \x{32D0}-\x{32FE} + \x{3300}-\x{3376} + \x{337B}-\x{33DD} + \x{3400}-\x{4DB5} + \x{4E00}-\x{9FA5} + \x{33E0}-\x{33FE} + \x{A000}-\x{A48C} + \x{AC00}-\x{D7A3} + \x{A490}-\x{A4C6} + \x{F900}-\x{FA2D} + \x{FA30}-\x{FA6A} + \x{FE30}-\x{FE46} + \x{FE49}-\x{FE52} + \x{FE54}-\x{FE66} + \x{FE68}-\x{FE6B} + \x{FF01}-\x{FF60} + \x{FFE0}-\x{FFE6} + \x{20000}-\x{2A6D6} + ]$/xo; +} + +__END__ diff --git a/encengine.c b/encengine.c new file mode 100644 index 0000000..67613a8 --- /dev/null +++ b/encengine.c @@ -0,0 +1,158 @@ +/* +Data structures for encoding transformations. + +Perl works internally in either a native 'byte' encoding or +in UTF-8 encoded Unicode. We have no immediate need for a "wchar_t" +representation. When we do we can use utf8_to_uv(). + +Most character encodings are either simple byte mappings or +variable length multi-byte encodings. UTF-8 can be viewed as a +rather extreme case of the latter. + +So to solve an important part of perl's encode needs we need to solve the +"multi-byte -> multi-byte" case. The simple byte forms are then just degenerate +case. (Where one of multi-bytes will usually be UTF-8.) + +The other type of encoding is a shift encoding where a prefix sequence +determines what subsequent bytes mean. Such encodings have state. + +We also need to handle case where a character in one encoding has to be +represented as multiple characters in the other. e.g. letter+diacritic. + +The process can be considered as pseudo perl: + +my $dst = ''; +while (length($src)) + { + my $size = $count($src); + my $in_seq = substr($src,0,$size,''); + my $out_seq = $s2d_hash{$in_seq}; + if (defined $out_seq) + { + $dst .= $out_seq; + } + else + { + # an error condition + } + } +return $dst; + +That has the following components: + &src_count - a "rule" for how many bytes make up the next character in the + source. + %s2d_hash - a mapping from input sequences to output sequences + +The problem with that scheme is that it does not allow the output +character repertoire to affect the characters considered from the +input. + +So we use a "trie" representation which can also be considered +a state machine: + +my $dst = ''; +my $seq = \@s2d_seq; +my $next = \@s2d_next; +while (length($src)) + { + my $byte = $substr($src,0,1,''); + my $out_seq = $seq->[$byte]; + if (defined $out_seq) + { + $dst .= $out_seq; + } + else + { + # an error condition + } + ($next,$seq) = @$next->[$byte] if $next; + } +return $dst; + +There is now a pair of data structures to represent everything. +It is valid for output sequence at a particular point to +be defined but zero length, that just means "don't know yet". +For the single byte case there is no 'next' so new tables will be the same as +the original tables. For a multi-byte case a prefix byte will flip to the tables +for the next page (adding nothing to the output), then the tables for the page +will provide the actual output and set tables back to original base page. + +This scheme can also handle shift encodings. + +A slight enhancement to the scheme also allows for look-ahead - if +we add a flag to re-add the removed byte to the source we could handle + a" -> U+00E4 (LATIN SMALL LETTER A WITH DIAERESIS) + ab -> a (and take b back please) + +*/ + +#define PERL_NO_GET_CONTEXT +#include +#include +#include "encode.h" + +int +do_encode(const encpage_t * enc, const U8 * src, STRLEN * slen, U8 * dst, + STRLEN dlen, STRLEN * dout, int approx, const U8 *term, STRLEN tlen) +{ + const U8 *s = src; + const U8 *send = s + *slen; + const U8 *last = s; + U8 *d = dst; + U8 *dend = d + dlen, *dlast = d; + int code = 0; + while (s < send) { + const encpage_t *e = enc; + U8 byte = *s; + while (byte > e->max) + e++; + if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) { + const U8 *cend = s + (e->slen & 0x7f); + if (cend <= send) { + STRLEN n; + if ((n = e->dlen)) { + const U8 *out = e->seq + n * (byte - e->min); + U8 *oend = d + n; + if (dst) { + if (oend <= dend) { + while (d < oend) + *d++ = *out++; + } + else { + /* Out of space */ + code = ENCODE_NOSPACE; + break; + } + } + else + d = oend; + } + enc = e->next; + s++; + if (s == cend) { + if (approx && (e->slen & 0x80)) + code = ENCODE_FALLBACK; + last = s; + if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) { + code = ENCODE_FOUND_TERM; + break; + } + dlast = d; + } + } + else { + /* partial source character */ + code = ENCODE_PARTIAL; + break; + } + } + else { + /* Cannot represent */ + code = ENCODE_NOREP; + break; + } + } + *slen = last - src; + *dout = d - dst; + return code; +} diff --git a/encoding.pm b/encoding.pm new file mode 100644 index 0000000..c3f324d --- /dev/null +++ b/encoding.pm @@ -0,0 +1,727 @@ +# $Id: encoding.pm,v 2.22 2018/02/11 05:32:03 dankogai Exp $ +package encoding; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.22 $ =~ /(\d+)/g; + +use Encode; +use strict; +use warnings; +use Config; + +use constant { + DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, + HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, + PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped +}; + +sub _exception { + my $name = shift; + $] > 5.008 and return 0; # 5.8.1 or higher then no + my %utfs = map { $_ => 1 } + qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE + UTF-32 UTF-32BE UTF-32LE); + $utfs{$name} or return 0; # UTFs or no + require Config; + Config->import(); + our %Config; + return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no +} + +sub in_locale { $^H & ( $locale::hint_bits || 0 ) } + +sub _get_locale_encoding { + my $locale_encoding; + + if ($^O eq 'MSWin32') { + my @tries = ( + # First try to get the OutputCP. This will work only if we + # are attached to a console + 'Win32.pm' => 'Win32::GetConsoleOutputCP', + 'Win32/Console.pm' => 'Win32::Console::OutputCP', + # If above failed, this means that we are a GUI app + # Let's assume that the ANSI codepage is what matters + 'Win32.pm' => 'Win32::GetACP', + ); + while (@tries) { + my $cp = eval { + require $tries[0]; + no strict 'refs'; + &{$tries[1]}() + }; + if ($cp) { + if ($cp == 65001) { # Code page for UTF-8 + $locale_encoding = 'UTF-8'; + } else { + $locale_encoding = 'cp' . $cp; + } + return $locale_encoding; + } + splice(@tries, 0, 2) + } + } + + # I18N::Langinfo isn't available everywhere + $locale_encoding = eval { + require I18N::Langinfo; + find_encoding( + I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() ) + )->name + }; + return $locale_encoding if defined $locale_encoding; + + eval { + require POSIX; + # Get the current locale + # Remember that MSVCRT impl is quite different from Unixes + my $locale = POSIX::setlocale(POSIX::LC_CTYPE()); + if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) { + my $country_language; + ( $country_language, $locale_encoding ) = ( $1, $2 ); + + # Could do more heuristics based on the country and language + # since we have Locale::Country and Locale::Language available. + # TODO: get a database of Language -> Encoding mappings + # (the Estonian database at http://www.eki.ee/letter/ + # would be excellent!) --jhi + if (lc($locale_encoding) eq 'euc') { + if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { + $locale_encoding = 'euc-jp'; + } + elsif ( $country_language =~ /^ko_KR|korean?$/i ) { + $locale_encoding = 'euc-kr'; + } + elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { + $locale_encoding = 'euc-cn'; + } + elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { + $locale_encoding = 'euc-tw'; + } + else { + require Carp; + Carp::croak( + "encoding: Locale encoding '$locale_encoding' too ambiguous" + ); + } + } + } + }; + + return $locale_encoding; +} + +sub import { + + if ( ord("A") == 193 ) { + require Carp; + Carp::croak("encoding: pragma does not support EBCDIC platforms"); + } + + my $deprecate = + ($] >= 5.017 and !$Config{usecperl}) + ? "Use of the encoding pragma is deprecated" : 0; + + my $class = shift; + my $name = shift; + if (!$name){ + require Carp; + Carp::croak("encoding: no encoding specified."); + } + if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm + my $caller = caller(); + { + no strict 'refs'; + *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding; + } + return; + } + $name = _get_locale_encoding() if $name eq ':locale'; + BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; } + my %arg = @_; + $name = $ENV{PERL_ENCODING} unless defined $name; + my $enc = find_encoding($name); + unless ( defined $enc ) { + require Carp; + Carp::croak("encoding: Unknown encoding '$name'"); + } + $name = $enc->name; # canonize + unless ( $arg{Filter} ) { + if ($] >= 5.025003 and !$Config{usecperl}) { + require Carp; + Carp::croak("The encoding pragma is no longer supported. Check cperl"); + } + warnings::warnif("deprecated",$deprecate) if $deprecate; + + DEBUG and warn "_exception($name) = ", _exception($name); + if (! _exception($name)) { + if (!PERL_5_21_7) { + ${^ENCODING} = $enc; + } + else { + # Starting with 5.21.7, this pragma uses a shadow variable + # designed explicitly for it, ${^E_NCODING}, to enforce + # lexical scope; instead of ${^ENCODING}. + $^H{'encoding'} = 1; + ${^E_NCODING} = $enc; + } + } + if (! HAS_PERLIO ) { + return 1; + } + } + else { + warnings::warnif("deprecated",$deprecate) if $deprecate; + + defined( ${^ENCODING} ) and undef ${^ENCODING}; + undef ${^E_NCODING} if PERL_5_21_7; + + # implicitly 'use utf8' + require utf8; # to fetch $utf8::hint_bits; + $^H |= $utf8::hint_bits; + + require Filter::Util::Call; + Filter::Util::Call->import; + filter_add( + sub { + my $status = filter_read(); + if ( $status > 0 ) { + $_ = $enc->decode( $_, 1 ); + DEBUG and warn $_; + } + $status; + } + ); + } + defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; + for my $h (qw(STDIN STDOUT)) { + if ( $arg{$h} ) { + unless ( defined find_encoding( $arg{$h} ) ) { + require Carp; + Carp::croak( + "encoding: Unknown encoding for $h, '$arg{$h}'"); + } + binmode( $h, ":raw :encoding($arg{$h})" ); + } + else { + unless ( exists $arg{$h} ) { + no warnings 'uninitialized'; + binmode( $h, ":raw :encoding($name)" ); + } + } + } + return 1; # I doubt if we need it, though +} + +sub unimport { + no warnings; + undef ${^ENCODING}; + undef ${^E_NCODING} if PERL_5_21_7; + if (HAS_PERLIO) { + binmode( STDIN, ":raw" ); + binmode( STDOUT, ":raw" ); + } + else { + binmode(STDIN); + binmode(STDOUT); + } + if ( $INC{"Filter/Util/Call.pm"} ) { + eval { filter_del() }; + } +} + +1; +__END__ + +=pod + +=head1 NAME + +encoding - allows you to write your script in non-ASCII and non-UTF-8 + +=head1 WARNING + +This module has been deprecated since perl v5.18. See L and +L. + +=head1 SYNOPSIS + + use encoding "greek"; # Perl like Greek to you? + use encoding "euc-jp"; # Jperl! + + # or you can even do this if your shell supports your native encoding + + perl -Mencoding=latin2 -e'...' # Feeling centrally European? + perl -Mencoding=euc-kr -e'...' # Or Korean? + + # more control + + # A simple euc-cn => utf-8 converter + use encoding "euc-cn", STDOUT => "utf8"; while(<>){print}; + + # "no encoding;" supported + no encoding; + + # an alternate way, Filter + use encoding "euc-jp", Filter=>1; + # now you can use kanji identifiers -- in euc-jp! + + # encode based on the current locale - specialized purposes only; + # fraught with danger!! + use encoding ':locale'; + +=head1 DESCRIPTION + +This pragma is used to enable a Perl script to be written in encodings that +aren't strictly ASCII nor UTF-8. It translates all or portions of the Perl +program script from a given encoding into UTF-8, and changes the PerlIO layers +of C and C to the encoding specified. + +This pragma dates from the days when UTF-8-enabled editors were uncommon. But +that was long ago, and the need for it is greatly diminished. That, coupled +with the fact that it doesn't work with threads, along with other problems, +(see L) have led to its being deprecated. It is planned to remove this +pragma in a future Perl version. New code should be written in UTF-8, and the +C pragma used instead (see L and L for details). +Old code should be converted to UTF-8, via something like the recipe in the +L (though this simple approach may require manual adjustments +afterwards). + +If UTF-8 is not an option, it is recommended that one use a simple source +filter, such as that provided by L on CPAN or this +pragma's own C option (see below). + +The only legitimate use of this pragma is almost certainly just one per file, +near the top, with file scope, as the file is likely going to only be written +in one encoding. Further restrictions apply in Perls before v5.22 (see +L). + +There are two basic modes of operation (plus turning if off): + +=over 4 + +=item C'] ;> + +Please note: This mode of operation is no longer supported as of Perl +v5.26. + +This is the normal operation. It translates various literals encountered in +the Perl source file from the encoding I into UTF-8, and similarly +converts character code points. This is used when the script is a combination +of ASCII (for the variable names and punctuation, I), but the literal +data is in the specified encoding. + +I is optional. If omitted, the encoding specified in the environment +variable L|perlrun/PERL_ENCODING> is used. If this isn't +set, or the resolved-to encoding is not known to C>, the error +C'> will be thrown. + +Starting in Perl v5.8.6 (C version 2.0.1), I may be the +name C<:locale>. This is for very specialized applications, and is documented +in L sub-pragma> below. + +The literals that are converted are C, and +starting in v5.8.1, C. Operations that do conversions include C, +C, C (but not C), and C. + +Also starting in v5.8.1, the C pseudo-filehandle is translated from the +encoding into UTF-8. + +For example, you can write code in EUC-JP as follows: + + my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji + #<-char-><-char-> # 4 octets + s/\bCamel\b/$Rakuda/; + +And with C in effect, it is the same thing as +that code in UTF-8: + + my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters + s/\bCamel\b/$Rakuda/; + +See L below for a more complete example. + +Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the +PerlIO layers of C and C are set to "C<:encoding(I)>". +Therefore, + + use encoding "euc-jp"; + my $message = "Camel is the symbol of perl.\n"; + my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji + $message =~ s/\bCamel\b/$Rakuda/; + print $message; + +will print + + "\xF1\xD1\xF1\xCC is the symbol of perl.\n" + +not + + "\x{99F1}\x{99DD} is the symbol of perl.\n" + +You can override this by giving extra arguments; see below. + +Note that C WILL NOT be changed, regardless. + +Also note that non-STD file handles remain unaffected. Use C or C to change the layers of those. + +=item C, Filter=E1;> + +This operates as above, but the C argument with a non-zero +value causes the entire script, and not just literals, to be translated from +the encoding into UTF-8. This allows identifiers in the source to be in that +encoding as well. (Problems may occur if the encoding is not a superset of +ASCII; imagine all your semi-colons being translated into something +different.) One can use this form to make + + ${"\x{4eba}"}++ + +work. (This is equivalent to C<$I++>, where I is a single Han +ideograph). + +This effectively means that your source code behaves as if it were written in +UTF-8 with C<'use utf8>' in effect. So even if your editor only supports +Shift_JIS, for example, you can still try examples in Chapter 15 of +C. + +This option is significantly slower than the other one. + +=item C + +Unsets the script encoding. The layers of C, C are +reset to "C<:raw>" (the default unprocessed raw stream of bytes). + +=back + +=head1 OPTIONS + +=head2 Setting C and/or C individually + +The encodings of C and C are individually settable by parameters to +the pragma: + + use encoding 'euc-tw', STDIN => 'greek' ...; + +In this case, you cannot omit the first I. C<< STDIN => undef >> +turns the I/O transcoding completely off for that filehandle. + +When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, +these options will be completely ignored. See L> and +L<"C<-C>" in perlrun|perlrun/-C [numberElist]> for details. + +=head2 The C<:locale> sub-pragma + +Starting in v5.8.6, the encoding name may be C<:locale>. This means that the +encoding is taken from the current locale, and not hard-coded by the pragma. +Since a script really can only be encoded in exactly one encoding, this option +is dangerous. It makes sense only if the script itself is written in ASCII, +and all the possible locales that will be in use when the script is executed +are supersets of ASCII. That means that the script itself doesn't get +changed, but the I/O handles have the specified encoding added, and the +operations like C and C use that encoding. + +The logic of finding which locale C<:locale> uses is as follows: + +=over 4 + +=item 1. + +If the platform supports the C interface, the codeset +returned is used as the default encoding for the open pragma. + +=item 2. + +If 1. didn't work but we are under the locale pragma, the environment +variables C and C (in that order) are matched for encodings +(the part after "C<.>", if any), and if any found, that is used +as the default encoding for the open pragma. + +=item 3. + +If 1. and 2. didn't work, the environment variables C and C +(in that order) are matched for anything looking like UTF-8, and if +any found, C<:utf8> is used as the default encoding for the open +pragma. + +=back + +If your locale environment variables (C, C, C) +contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), +the default encoding of your C, C, and C, and of +B, is UTF-8. + +=head1 CAVEATS + +=head2 SIDE EFFECTS + +=over + +=item * + +If the C pragma is in scope then the lengths returned are +calculated from the length of C<$/> in Unicode characters, which is not +always the same as the length of C<$/> in the native encoding. + +=item * + +Without this pragma, if strings operating under byte semantics and strings +with Unicode character data are concatenated, the new string will +be created by decoding the byte strings as I. + +The B pragma changes this to use the specified encoding +instead. For example: + + use encoding 'utf8'; + my $string = chr(20000); # a Unicode string + utf8::encode($string); # now it's a UTF-8 encoded byte string + # concatenate with another Unicode string + print length($string . chr(20000)); + +Will print C<2>, because C<$string> is upgraded as UTF-8. Without +C, it will print C<4> instead, since C<$string> +is three octets when interpreted as Latin-1. + +=back + +=head2 DO NOT MIX MULTIPLE ENCODINGS + +Notice that only literals (string or regular expression) having only +legacy code points are affected: if you mix data like this + + \x{100}\xDF + \xDF\x{100} + +the data is assumed to be in (Latin 1 and) Unicode, not in your native +encoding. In other words, this will match in "greek": + + "\xDF" =~ /\x{3af}/ + +but this will not + + "\xDF\x{100}" =~ /\x{3af}\x{100}/ + +since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on +the left will B be upgraded to C<\x{3af}> (Unicode GREEK SMALL +LETTER IOTA WITH TONOS) because of the C<\x{100}> on the left. You +should not be mixing your legacy data and Unicode in the same string. + +This pragma also affects encoding of the 0x80..0xFF code point range: +normally characters in that range are left as eight-bit bytes (unless +they are combined with characters with code points 0x100 or larger, +in which case all characters need to become UTF-8 encoded), but if +the C pragma is present, even the 0x80..0xFF range always +gets UTF-8 encoded. + +After all, the best thing about this pragma is that you don't have to +resort to \x{....} just to spell your name in a native encoding. +So feel free to put your strings in your encoding in quotes and +regexes. + +=head2 Prior to Perl v5.22 + +The pragma was a per script, not a per block lexical. Only the last +C or C mattered, and it affected +B. However, the C pragma was supported and +C could appear as many times as you want in a given script +(though only the last was effective). + +Since the scope wasn't lexical, other modules' use of C, C, I +were affected. This leads to spooky, incorrect action at a distance that is +hard to debug. + +This means you would have to be very careful of the load order: + + # called module + package Module_IN_BAR; + use encoding "bar"; + # stuff in "bar" encoding here + 1; + + # caller script + use encoding "foo" + use Module_IN_BAR; + # surprise! use encoding "bar" is in effect. + +The best way to avoid this oddity is to use this pragma RIGHT AFTER +other modules are loaded. i.e. + + use Module_IN_BAR; + use encoding "foo"; + +=head2 Prior to Encode version 1.87 + +=over + +=item * + +C and C were not set under the filter option. +And C<< STDIN=>I >> and C<< STDOUT=>I >> didn't work like +non-filter version. + +=item * + +C wasn't implicitly declared so you have to C to do + + ${"\x{4eba}"}++ + +=back + +=head2 Prior to Perl v5.8.1 + +=over + +=item "NON-EUC" doublebyte encodings + +Because perl needs to parse the script before applying this pragma, such +encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH; +C<\x5c>) in the second byte fail because the second byte may +accidentally escape the quoting character that follows. + +=item C + +The B pragma works by decoding string literals in +C and so forth. In perl v5.8.0, this +does not apply to C. Therefore, + + use encoding 'euc-jp'; + #.... + $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/; + # -------- -------- -------- -------- + +Does not work as + + $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/; + +=over + +=item Legend of characters above + + utf8 euc-jp charnames::viacode() + ----------------------------------------- + \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A + \x{3093} \xA4\xF3 HIRAGANA LETTER N + \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A + \x{30f3} \xA5\xF3 KATAKANA LETTER N + +=back + +This counterintuitive behavior has been fixed in perl v5.8.1. + +In perl v5.8.0, you can work around this as follows; + + use encoding 'euc-jp'; + # .... + eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ }; + +Note the C expression is surrounded by C. The idea behind +this is the same as the classic idiom that makes C 'interpolate': + + tr/$from/$to/; # wrong! + eval qq{ tr/$from/$to/ }; # workaround. + +=back + +=head1 EXAMPLE - Greekperl + + use encoding "iso 8859-7"; + + # \xDF in ISO 8859-7 (Greek) is \x{3af} in Unicode. + + $a = "\xDF"; + $b = "\x{100}"; + + printf "%#x\n", ord($a); # will print 0x3af, not 0xdf + + $c = $a . $b; + + # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". + + # chr() is affected, and ... + + print "mega\n" if ord(chr(0xdf)) == 0x3af; + + # ... ord() is affected by the encoding pragma ... + + print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; + + # ... as are eq and cmp ... + + print "peta\n" if "\x{3af}" eq pack("C", 0xdf); + print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0; + + # ... but pack/unpack C are not affected, in case you still + # want to go back to your native encoding + + print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; + +=head1 BUGS + +=over + +=item Thread safety + +C is not thread-safe (i.e., do not use in threaded +applications). + +=item Can't be used by more than one module in a single program. + +Only one encoding is allowed. If you combine modules in a program that have +different encodings, only one will be actually used. + +=item Other modules using C and C get the encoded stream + +They may be expecting something completely different. + +=item literals in regex that are longer than 127 bytes + +For native multibyte encodings (either fixed or variable length), +the current implementation of the regular expressions may introduce +recoding errors for regular expression literals longer than 127 bytes. + +=item EBCDIC + +The encoding pragma is not supported on EBCDIC platforms. + +=item C + +This pragma doesn't work well with C because PerlIO does not +get along very well with it. When C contains non-ASCII +characters it prints funny or gets "wide character warnings". +To understand it, try the code below. + + # Save this one in utf8 + # replace *non-ascii* with a non-ascii string + my $camel; + format STDOUT = + *non-ascii*@>>>>>>> + $camel + . + $camel = "*non-ascii*"; + binmode(STDOUT=>':encoding(utf8)'); # bang! + write; # funny + print $camel, "\n"; # fine + +Without binmode this happens to work but without binmode, print() +fails instead of write(). + +At any rate, the very use of C is questionable when it comes to +unicode characters since you have to consider such things as character +width (i.e. double-width for ideographs) and directions (i.e. BIDI for +Arabic and Hebrew). + +=item See also L + +=back + +=head1 HISTORY + +This pragma first appeared in Perl v5.8.0. It has been enhanced in later +releases as specified above. + +=head1 SEE ALSO + +L, L, L, L, + +Ch. 15 of C +by Larry Wall, Tom Christiansen, Jon Orwant; +O'Reilly & Associates; ISBN 0-596-00027-8 + +=cut diff --git a/lib/Encode/Alias.pm b/lib/Encode/Alias.pm new file mode 100644 index 0000000..dbfa01b --- /dev/null +++ b/lib/Encode/Alias.pm @@ -0,0 +1,395 @@ +package Encode::Alias; +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; + +use Exporter 'import'; + +# Public, encouraged API is exported by default + +our @EXPORT = + qw ( + define_alias + find_alias +); + +our @Alias; # ordered matching list +our %Alias; # cached known aliases + +sub find_alias { + my $class = shift; + my $find = shift; + unless ( exists $Alias{$find} ) { + $Alias{$find} = undef; # Recursion guard + for ( my $i = 0 ; $i < @Alias ; $i += 2 ) { + my $alias = $Alias[$i]; + my $val = $Alias[ $i + 1 ]; + my $new; + if ( ref($alias) eq 'Regexp' && $find =~ $alias ) { + DEBUG and warn "eval $val"; + $new = eval $val; + DEBUG and $@ and warn "$val, $@"; + } + elsif ( ref($alias) eq 'CODE' ) { + DEBUG and warn "$alias", "->", "($find)"; + $new = $alias->($find); + } + elsif ( lc($find) eq lc($alias) ) { + $new = $val; + } + if ( defined($new) ) { + next if $new eq $find; # avoid (direct) recursion on bugs + DEBUG and warn "$alias, $new"; + my $enc = + ( ref($new) ) ? $new : Encode::find_encoding($new); + if ($enc) { + $Alias{$find} = $enc; + last; + } + } + } + + # case insensitive search when canonical is not in all lowercase + # RT ticket #7835 + unless ( $Alias{$find} ) { + my $lcfind = lc($find); + for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule ) + { + $lcfind eq lc($name) or next; + $Alias{$find} = Encode::find_encoding($name); + DEBUG and warn "$find => $name"; + } + } + } + if (DEBUG) { + my $name; + if ( my $e = $Alias{$find} ) { + $name = $e->name; + } + else { + $name = ""; + } + warn "find_alias($class, $find)->name = $name"; + } + return $Alias{$find}; +} + +sub define_alias { + while (@_) { + my $alias = shift; + my $name = shift; + unshift( @Alias, $alias => $name ) # newer one has precedence + if defined $alias; + if ( ref($alias) ) { + + # clear %Alias cache to allow overrides + my @a = keys %Alias; + for my $k (@a) { + if ( ref($alias) eq 'Regexp' && $k =~ $alias ) { + DEBUG and warn "delete \$Alias\{$k\}"; + delete $Alias{$k}; + } + elsif ( ref($alias) eq 'CODE' && $alias->($k) ) { + DEBUG and warn "delete \$Alias\{$k\}"; + delete $Alias{$k}; + } + } + } + elsif (defined $alias) { + DEBUG and warn "delete \$Alias\{$alias\}"; + delete $Alias{$alias}; + } + elsif (DEBUG) { + require Carp; + Carp::croak("undef \$alias"); + } + } +} + +# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias +use Encode (); + +# Allow latin-1 style names as well +# 0 1 2 3 4 5 6 7 8 9 10 +our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); + +# Allow winlatin1 style names as well +our %Winlatin2cp = ( + 'latin1' => 1252, + 'latin2' => 1250, + 'cyrillic' => 1251, + 'greek' => 1253, + 'turkish' => 1254, + 'hebrew' => 1255, + 'arabic' => 1256, + 'baltic' => 1257, + 'vietnamese' => 1258, +); + +init_aliases(); + +sub undef_aliases { + @Alias = (); + %Alias = (); +} + +sub init_aliases { + undef_aliases(); + + # Try all-lower-case version should all else fails + define_alias( qr/^(.*)$/ => '"\L$1"' ); + + # UTF/UCS stuff + define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' ); + define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' ); + define_alias( + qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"', + qr/^UCS-?4-?(BE|LE|)?$/i => 'uc("UTF-32$1")', + qr/^iso-10646-1$/i => '"UCS-2BE"' + ); + define_alias( + qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"', + qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"', + qr/^UTF-?(16|32)$/i => '"UTF-$1"', + ); + + # ASCII + define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' ); + define_alias( 'C' => 'ascii' ); + define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' ); + + # Allow variants of iso-8859-1 etc. + define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); + + # At least HP-UX has these. + define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' ); + + # More HP stuff. + define_alias( + qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => + '"${1}8"' ); + + # The Official name of ASCII. + define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); + + # This is a font issue, not an encoding issue. + # (The currency symbol of the Latin 1 upper half + # has been redefined as the euro symbol.) + define_alias( qr/^(.+)\@euro$/i => '"$1"' ); + + define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i => +'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' + ); + + define_alias( + qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish| + hebrew|arabic|baltic|vietnamese)$/ix => + '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' + ); + + # Common names for non-latin preferred MIME names + define_alias( + 'ascii' => 'US-ascii', + 'cyrillic' => 'iso-8859-5', + 'arabic' => 'iso-8859-6', + 'greek' => 'iso-8859-7', + 'hebrew' => 'iso-8859-8', + 'thai' => 'iso-8859-11', + ); + # RT #20781 + define_alias(qr/\btis-?620\b/i => '"iso-8859-11"'); + + # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. + # And Microsoft has their own naming (again, surprisingly). + # And windows-* is registered in IANA! + define_alias( + qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' ); + + # Sometimes seen with a leading zero. + # define_alias( qr/\bcp037\b/i => '"cp37"'); + + # Mac Mappings + # predefined in *.ucm; unneeded + # define_alias( qr/\bmacIcelandic$/i => '"macIceland"'); + define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' ); + # http://rt.cpan.org/Ticket/Display.html?id=36326 + define_alias( qr/^macintosh$/i => '"MacRoman"' ); + # https://rt.cpan.org/Ticket/Display.html?id=78125 + define_alias( qr/^macce$/i => '"MacCentralEurRoman"' ); + # Ououououou. gone. They are different! + # define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); + + # Standardize on the dashed versions. + define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' ); + + unless ($Encode::ON_EBCDIC) { + + # for Encode::CN + define_alias( qr/\beuc.*cn$/i => '"euc-cn"' ); + define_alias( qr/\bcn.*euc$/i => '"euc-cn"' ); + + # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' ) + # CP936 doesn't have vendor-addon for GBK, so they're identical. + define_alias( qr/^gbk$/i => '"cp936"' ); + + # This fixes gb2312 vs. euc-cn confusion, practically + define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' ); + + # for Encode::JP + define_alias( qr/\bjis$/i => '"7bit-jis"' ); + define_alias( qr/\beuc.*jp$/i => '"euc-jp"' ); + define_alias( qr/\bjp.*euc$/i => '"euc-jp"' ); + define_alias( qr/\bujis$/i => '"euc-jp"' ); + define_alias( qr/\bshift.*jis$/i => '"shiftjis"' ); + define_alias( qr/\bsjis$/i => '"shiftjis"' ); + define_alias( qr/\bwindows-31j$/i => '"cp932"' ); + + # for Encode::KR + define_alias( qr/\beuc.*kr$/i => '"euc-kr"' ); + define_alias( qr/\bkr.*euc$/i => '"euc-kr"' ); + + # This fixes ksc5601 vs. euc-kr confusion, practically + define_alias( qr/(?:x-)?uhc$/i => '"cp949"' ); + define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' ); + define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' ); + + # for Encode::TW + define_alias( qr/\bbig-?5$/i => '"big5-eten"' ); + define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' ); + define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' ); + define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' ); + define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' ); + } + + # https://github.com/dankogai/p5-encode/issues/37 + define_alias(qr/cp65000/i => '"UTF-7"'); + define_alias(qr/cp65001/i => '"utf-8-strict"'); + + # utf8 is blessed :) + define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' ); + + # At last, Map white space and _ to '-' + define_alias( qr/^([^\s_]+)[\s_]+([^\s_]*)$/i => '"$1-$2"' ); +} + +1; +__END__ + +# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8 +# TODO: HP-UX '15' encodings japanese15 korean15 roi15 +# TODO: Cyrillic encoding ISO-IR-111 (useful?) +# TODO: Armenian encoding ARMSCII-8 +# TODO: Hebrew encoding ISO-8859-8-1 +# TODO: Thai encoding TCVN +# TODO: Vietnamese encodings VPS +# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese +# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic +# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese +# Kannada Khmer Korean Laotian Malayalam Mongolian +# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese + +=head1 NAME + +Encode::Alias - alias definitions to encodings + +=head1 SYNOPSIS + + use Encode; + use Encode::Alias; + define_alias( "newName" => ENCODING); + define_alias( qr/.../ => ENCODING); + define_alias( sub { return ENCODING if ...; } ); + +=head1 DESCRIPTION + +Allows newName to be used as an alias for ENCODING. ENCODING may be +either the name of an encoding or an encoding object (as described +in L). + +Currently the first argument to define_alias() can be specified in the +following ways: + +=over 4 + +=item As a simple string. + +=item As a qr// compiled regular expression, e.g.: + + define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); + +In this case, if I is not a reference, it is C-ed +in order to allow C<$1> etc. to be substituted. The example is one +way to alias names as used in X11 fonts to the MIME names for the +iso-8859-* family. Note the double quotes inside the single quotes. + +(or, you don't have to do this yourself because this example is predefined) + +If you are using a regex here, you have to use the quotes as shown or +it won't work. Also note that regex handling is tricky even for the +experienced. Use this feature with caution. + +=item As a code reference, e.g.: + + define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } ); + +The same effect as the example above in a different way. The coderef +takes the alias name as an argument and returns a canonical name on +success or undef if not. Note the second argument is ignored if provided. +Use this with even more caution than the regex version. + +=back + +=head3 Changes in code reference aliasing + +As of Encode 1.87, the older form + + define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } ); + +no longer works. + +Encode up to 1.86 internally used "local $_" to implement this older +form. But consider the code below; + + use Encode; + $_ = "eeeee" ; + while (/(e)/g) { + my $utf = decode('aliased-encoding-name', $1); + print "position:",pos,"\n"; + } + +Prior to Encode 1.86 this fails because of "local $_". + +=head2 Alias overloading + +You can override predefined aliases by simply applying define_alias(). +The new alias is always evaluated first, and when necessary, +define_alias() flushes the internal cache to make the new definition +available. + + # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a + # superset of SHIFT_JIS + + define_alias( qr/shift.*jis$/i => '"cp932"' ); + define_alias( qr/sjis$/i => '"cp932"' ); + +If you want to zap all predefined aliases, you can use + + Encode::Alias->undef_aliases; + +to do so. And + + Encode::Alias->init_aliases; + +gets the factory settings back. + +Note that define_alias() will not be able to override the canonical name +of encodings. Encodings are first looked up by canonical name before +potential aliases are tried. + +=head1 SEE ALSO + +L, L + +=cut + diff --git a/lib/Encode/CJKConstants.pm b/lib/Encode/CJKConstants.pm new file mode 100644 index 0000000..43d2033 --- /dev/null +++ b/lib/Encode/CJKConstants.pm @@ -0,0 +1,66 @@ +# +# $Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $ +# + +package Encode::CJKConstants; + +use strict; +use warnings; +our $RCSID = q$Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $; +our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Carp; + +require Exporter; +our @ISA = qw(Exporter); +our @EXPORT = qw(); +our @EXPORT_OK = qw(%CHARCODE %ESC %RE); +our %EXPORT_TAGS = ( 'all' => [ @EXPORT_OK, @EXPORT ] ); + +my %_0208 = ( + 1978 => '\e\$\@', + 1983 => '\e\$B', + 1990 => '\e&\@\e\$B', +); + +our %CHARCODE = ( + UNDEF_EUC => "\xa2\xae", # �� in EUC + UNDEF_SJIS => "\x81\xac", # �� in SJIS + UNDEF_JIS => "\xa2\xf7", # �� -- used in unicode + UNDEF_UNICODE => "\x20\x20", # �� -- used in unicode +); + +our %ESC = ( + GB_2312 => "\e\$A", + JIS_0208 => "\e\$B", + JIS_0212 => "\e\$(D", + KSC_5601 => "\e\$(C", + ASC => "\e\(B", + KANA => "\e\(I", + '2022_KR' => "\e\$)C", +); + +our %RE = ( + ASCII => '[\x00-\x7f]', + BIN => '[\x00-\x06\x7f\xff]', + EUC_0212 => '\x8f[\xa1-\xfe][\xa1-\xfe]', + EUC_C => '[\xa1-\xfe][\xa1-\xfe]', + EUC_KANA => '\x8e[\xa1-\xdf]', + JIS_0208 => "$_0208{1978}|$_0208{1983}|$_0208{1990}", + JIS_0212 => "\e" . '\$\(D', + ISO_ASC => "\e" . '\([BJ]', + JIS_KANA => "\e" . '\(I', + '2022_KR' => "\e" . '\$\)C', + SJIS_C => '[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]', + SJIS_KANA => '[\xa1-\xdf]', + UTF8 => '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]' +); + +1; + +=head1 NAME + +Encode::CJKConstants.pm -- Internally used by Encode::??::ISO_2022_* + +=cut + diff --git a/lib/Encode/CN/HZ.pm b/lib/Encode/CN/HZ.pm new file mode 100644 index 0000000..e444cb0 --- /dev/null +++ b/lib/Encode/CN/HZ.pm @@ -0,0 +1,201 @@ +package Encode::CN::HZ; + +use strict; +use warnings; +use utf8 (); + +use vars qw($VERSION); +$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Encode qw(:fallbacks); + +use parent qw(Encode::Encoding); +__PACKAGE__->Define('hz'); + +# HZ is a combination of ASCII and escaped GB, so we implement it +# with the GB2312(raw) encoding here. Cf. RFCs 1842 & 1843. + +# not ported for EBCDIC. Which should be used, "~" or "\x7E"? + +sub needs_lines { 1 } + +sub decode ($$;$) { + my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; + + my $GB = Encode::find_encoding('gb2312-raw'); + my $ret = substr($str, 0, 0); # to propagate taintedness + my $in_ascii = 1; # default mode is ASCII. + + while ( length $str ) { + if ($in_ascii) { # ASCII mode + if ( $str =~ s/^([\x00-\x7D\x7F]+)// ) { # no '~' => ASCII + $ret .= $1; + + # EBCDIC should need ascii2native, but not ported. + } + elsif ( $str =~ s/^\x7E\x7E// ) { # escaped tilde + $ret .= '~'; + } + elsif ( $str =~ s/^\x7E\cJ// ) { # '\cJ' == LF in ASCII + 1; # no-op + } + elsif ( $str =~ s/^\x7E\x7B// ) { # '~{' + $in_ascii = 0; # to GB + } + else { # encounters an invalid escape, \x80 or greater + last; + } + } + else { # GB mode; the byte ranges are as in RFC 1843. + no warnings 'uninitialized'; + if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) { + my $prefix = $1; + $ret .= $GB->decode( $prefix, $chk ); + } + elsif ( $str =~ s/^\x7E\x7D// ) { # '~}' + $in_ascii = 1; + } + else { # invalid + last; + } + } + } + $_[1] = '' if $chk; # needs_lines guarantees no partial character + return $ret; +} + +sub cat_decode { + my ( $obj, undef, $src, $pos, $trm, $chk ) = @_; + my ( $rdst, $rsrc, $rpos ) = \@_[ 1 .. 3 ]; + + my $GB = Encode::find_encoding('gb2312-raw'); + my $ret = ''; + my $in_ascii = 1; # default mode is ASCII. + + my $ini_pos = pos($$rsrc); + + substr( $src, 0, $pos ) = ''; + + my $ini_len = bytes::length($src); + + # $trm is the first of the pair '~~', then 2nd tilde is to be removed. + # XXX: Is better C<$src =~ s/^\x7E// or die if ...>? + $src =~ s/^\x7E// if $trm eq "\x7E"; + + while ( length $src ) { + my $now; + if ($in_ascii) { # ASCII mode + if ( $src =~ s/^([\x00-\x7D\x7F])// ) { # no '~' => ASCII + $now = $1; + } + elsif ( $src =~ s/^\x7E\x7E// ) { # escaped tilde + $now = '~'; + } + elsif ( $src =~ s/^\x7E\cJ// ) { # '\cJ' == LF in ASCII + next; + } + elsif ( $src =~ s/^\x7E\x7B// ) { # '~{' + $in_ascii = 0; # to GB + next; + } + else { # encounters an invalid escape, \x80 or greater + last; + } + } + else { # GB mode; the byte ranges are as in RFC 1843. + if ( $src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)// ) { + $now = $GB->decode( $1, $chk ); + } + elsif ( $src =~ s/^\x7E\x7D// ) { # '~}' + $in_ascii = 1; + next; + } + else { # invalid + last; + } + } + + next if !defined $now; + + $ret .= $now; + + if ( $now eq $trm ) { + $$rdst .= $ret; + $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src); + pos($$rsrc) = $ini_pos; + return 1; + } + } + + $$rdst .= $ret; + $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src); + pos($$rsrc) = $ini_pos; + return ''; # terminator not found +} + +sub encode($$;$) { + my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; + + my $GB = Encode::find_encoding('gb2312-raw'); + my $ret = substr($str, 0, 0); # to propagate taintedness; + my $in_ascii = 1; # default mode is ASCII. + + no warnings 'utf8'; # $str may be malformed UTF8 at the end of a chunk. + + while ( length $str ) { + if ( $str =~ s/^([[:ascii:]]+)// ) { + my $tmp = $1; + $tmp =~ s/~/~~/g; # escapes tildes + if ( !$in_ascii ) { + $ret .= "\x7E\x7D"; # '~}' + $in_ascii = 1; + } + $ret .= pack 'a*', $tmp; # remove UTF8 flag. + } + elsif ( $str =~ s/(.)// ) { + my $s = $1; + my $tmp = $GB->encode( $s, $chk || 0 ); + last if !defined $tmp; + if ( length $tmp == 2 ) { # maybe a valid GB char (XXX) + if ($in_ascii) { + $ret .= "\x7E\x7B"; # '~{' + $in_ascii = 0; + } + $ret .= $tmp; + } + elsif ( length $tmp ) { # maybe FALLBACK in ASCII (XXX) + if ( !$in_ascii ) { + $ret .= "\x7E\x7D"; # '~}' + $in_ascii = 1; + } + $ret .= $tmp; + } + } + else { # if $str is malformed UTF8 *and* if length $str != 0. + last; + } + } + $_[1] = $str if $chk; + + # The state at the end of the chunk is discarded, even if in GB mode. + # That results in the combination of GB-OUT and GB-IN, i.e. "~}~{". + # Parhaps it is harmless, but further investigations may be required... + + if ( !$in_ascii ) { + $ret .= "\x7E\x7D"; # '~}' + $in_ascii = 1; + } + utf8::encode($ret); # https://rt.cpan.org/Ticket/Display.html?id=35120 + return $ret; +} + +1; +__END__ + +=head1 NAME + +Encode::CN::HZ -- internally used by Encode::CN + +=cut diff --git a/lib/Encode/Config.pm b/lib/Encode/Config.pm new file mode 100644 index 0000000..1286a47 --- /dev/null +++ b/lib/Encode/Config.pm @@ -0,0 +1,170 @@ +# +# Demand-load module list +# +package Encode::Config; +our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use strict; +use warnings; + +our %ExtModule = ( + + # Encode::Byte + #iso-8859-1 is in Encode.pm itself + 'iso-8859-2' => 'Encode::Byte', + 'iso-8859-3' => 'Encode::Byte', + 'iso-8859-4' => 'Encode::Byte', + 'iso-8859-5' => 'Encode::Byte', + 'iso-8859-6' => 'Encode::Byte', + 'iso-8859-7' => 'Encode::Byte', + 'iso-8859-8' => 'Encode::Byte', + 'iso-8859-9' => 'Encode::Byte', + 'iso-8859-10' => 'Encode::Byte', + 'iso-8859-11' => 'Encode::Byte', + 'iso-8859-13' => 'Encode::Byte', + 'iso-8859-14' => 'Encode::Byte', + 'iso-8859-15' => 'Encode::Byte', + 'iso-8859-16' => 'Encode::Byte', + 'koi8-f' => 'Encode::Byte', + 'koi8-r' => 'Encode::Byte', + 'koi8-u' => 'Encode::Byte', + 'viscii' => 'Encode::Byte', + 'cp424' => 'Encode::Byte', + 'cp437' => 'Encode::Byte', + 'cp737' => 'Encode::Byte', + 'cp775' => 'Encode::Byte', + 'cp850' => 'Encode::Byte', + 'cp852' => 'Encode::Byte', + 'cp855' => 'Encode::Byte', + 'cp856' => 'Encode::Byte', + 'cp857' => 'Encode::Byte', + 'cp858' => 'Encode::Byte', + 'cp860' => 'Encode::Byte', + 'cp861' => 'Encode::Byte', + 'cp862' => 'Encode::Byte', + 'cp863' => 'Encode::Byte', + 'cp864' => 'Encode::Byte', + 'cp865' => 'Encode::Byte', + 'cp866' => 'Encode::Byte', + 'cp869' => 'Encode::Byte', + 'cp874' => 'Encode::Byte', + 'cp1006' => 'Encode::Byte', + 'cp1250' => 'Encode::Byte', + 'cp1251' => 'Encode::Byte', + 'cp1252' => 'Encode::Byte', + 'cp1253' => 'Encode::Byte', + 'cp1254' => 'Encode::Byte', + 'cp1255' => 'Encode::Byte', + 'cp1256' => 'Encode::Byte', + 'cp1257' => 'Encode::Byte', + 'cp1258' => 'Encode::Byte', + 'AdobeStandardEncoding' => 'Encode::Byte', + 'MacArabic' => 'Encode::Byte', + 'MacCentralEurRoman' => 'Encode::Byte', + 'MacCroatian' => 'Encode::Byte', + 'MacCyrillic' => 'Encode::Byte', + 'MacFarsi' => 'Encode::Byte', + 'MacGreek' => 'Encode::Byte', + 'MacHebrew' => 'Encode::Byte', + 'MacIcelandic' => 'Encode::Byte', + 'MacRoman' => 'Encode::Byte', + 'MacRomanian' => 'Encode::Byte', + 'MacRumanian' => 'Encode::Byte', + 'MacSami' => 'Encode::Byte', + 'MacThai' => 'Encode::Byte', + 'MacTurkish' => 'Encode::Byte', + 'MacUkrainian' => 'Encode::Byte', + 'nextstep' => 'Encode::Byte', + 'hp-roman8' => 'Encode::Byte', + #'gsm0338' => 'Encode::Byte', + 'gsm0338' => 'Encode::GSM0338', + + # Encode::EBCDIC + 'cp37' => 'Encode::EBCDIC', + 'cp500' => 'Encode::EBCDIC', + 'cp875' => 'Encode::EBCDIC', + 'cp1026' => 'Encode::EBCDIC', + 'cp1047' => 'Encode::EBCDIC', + 'posix-bc' => 'Encode::EBCDIC', + + # Encode::Symbol + 'dingbats' => 'Encode::Symbol', + 'symbol' => 'Encode::Symbol', + 'AdobeSymbol' => 'Encode::Symbol', + 'AdobeZdingbat' => 'Encode::Symbol', + 'MacDingbats' => 'Encode::Symbol', + 'MacSymbol' => 'Encode::Symbol', + + # Encode::Unicode + 'UCS-2BE' => 'Encode::Unicode', + 'UCS-2LE' => 'Encode::Unicode', + 'UTF-16' => 'Encode::Unicode', + 'UTF-16BE' => 'Encode::Unicode', + 'UTF-16LE' => 'Encode::Unicode', + 'UTF-32' => 'Encode::Unicode', + 'UTF-32BE' => 'Encode::Unicode', + 'UTF-32LE' => 'Encode::Unicode', + 'UTF-7' => 'Encode::Unicode::UTF7', +); + +unless ( ord("A") == 193 ) { + %ExtModule = ( + %ExtModule, + 'euc-cn' => 'Encode::CN', + 'gb12345-raw' => 'Encode::CN', + 'gb2312-raw' => 'Encode::CN', + 'hz' => 'Encode::CN', + 'iso-ir-165' => 'Encode::CN', + 'cp936' => 'Encode::CN', + 'MacChineseSimp' => 'Encode::CN', + + '7bit-jis' => 'Encode::JP', + 'euc-jp' => 'Encode::JP', + 'iso-2022-jp' => 'Encode::JP', + 'iso-2022-jp-1' => 'Encode::JP', + 'jis0201-raw' => 'Encode::JP', + 'jis0208-raw' => 'Encode::JP', + 'jis0212-raw' => 'Encode::JP', + 'cp932' => 'Encode::JP', + 'MacJapanese' => 'Encode::JP', + 'shiftjis' => 'Encode::JP', + + 'euc-kr' => 'Encode::KR', + 'iso-2022-kr' => 'Encode::KR', + 'johab' => 'Encode::KR', + 'ksc5601-raw' => 'Encode::KR', + 'cp949' => 'Encode::KR', + 'MacKorean' => 'Encode::KR', + + 'big5-eten' => 'Encode::TW', + 'big5-hkscs' => 'Encode::TW', + 'cp950' => 'Encode::TW', + 'MacChineseTrad' => 'Encode::TW', + + #'big5plus' => 'Encode::HanExtra', + #'euc-tw' => 'Encode::HanExtra', + #'gb18030' => 'Encode::HanExtra', + + 'MIME-Header' => 'Encode::MIME::Header', + 'MIME-B' => 'Encode::MIME::Header', + 'MIME-Q' => 'Encode::MIME::Header', + + 'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP', + ); +} + +# +# Why not export ? to keep ConfigLocal Happy! +# +while ( my ( $enc, $mod ) = each %ExtModule ) { + $Encode::ExtModule{$enc} = $mod; +} + +1; +__END__ + +=head1 NAME + +Encode::Config -- internally used by Encode + +=cut diff --git a/lib/Encode/Encoder.pm b/lib/Encode/Encoder.pm new file mode 100644 index 0000000..23e0349 --- /dev/null +++ b/lib/Encode/Encoder.pm @@ -0,0 +1,253 @@ +# +# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $ +# +package Encode::Encoder; +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +require Exporter; +our @ISA = qw(Exporter); +our @EXPORT_OK = qw ( encoder ); + +our $AUTOLOAD; +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; +use Encode qw(encode decode find_encoding from_to); +use Carp; + +sub new { + my ( $class, $data, $encname ) = @_; + unless ($encname) { + $encname = Encode::is_utf8($data) ? 'utf8' : ''; + } + else { + my $obj = find_encoding($encname) + or croak __PACKAGE__, ": unknown encoding: $encname"; + $encname = $obj->name; + } + my $self = { + data => $data, + encoding => $encname, + }; + bless $self => $class; +} + +sub encoder { __PACKAGE__->new(@_) } + +sub data { + my ( $self, $data ) = @_; + if ( defined $data ) { + $self->{data} = $data; + return $data; + } + else { + return $self->{data}; + } +} + +sub encoding { + my ( $self, $encname ) = @_; + if ($encname) { + my $obj = find_encoding($encname) + or confess __PACKAGE__, ": unknown encoding: $encname"; + $self->{encoding} = $obj->name; + return $self; + } + else { + return $self->{encoding}; + } +} + +sub bytes { + my ( $self, $encname ) = @_; + $encname ||= $self->{encoding}; + my $obj = find_encoding($encname) + or confess __PACKAGE__, ": unknown encoding: $encname"; + $self->{data} = $obj->decode( $self->{data}, 1 ); + $self->{encoding} = ''; + return $self; +} + +sub DESTROY { # defined so it won't autoload. + DEBUG and warn shift; +} + +sub AUTOLOAD { + my $self = shift; + my $type = ref($self) + or confess "$self is not an object"; + my $myname = $AUTOLOAD; + $myname =~ s/.*://; # strip fully-qualified portion + my $obj = find_encoding($myname) + or confess __PACKAGE__, ": unknown encoding: $myname"; + DEBUG and warn $self->{encoding}, " => ", $obj->name; + if ( $self->{encoding} ) { + from_to( $self->{data}, $self->{encoding}, $obj->name, 1 ); + } + else { + $self->{data} = $obj->encode( $self->{data}, 1 ); + } + $self->{encoding} = $obj->name; + return $self; +} + +use overload + q("") => sub { $_[0]->{data} }, + q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) }, + fallback => 1, + ; + +1; +__END__ + +=head1 NAME + +Encode::Encoder -- Object Oriented Encoder + +=head1 SYNOPSIS + + use Encode::Encoder; + # Encode::encode("ISO-8859-1", $data); + Encode::Encoder->new($data)->iso_8859_1; # OOP way + # shortcut + use Encode::Encoder qw(encoder); + encoder($data)->iso_8859_1; + # you can stack them! + encoder($data)->iso_8859_1->base64; # provided base64() is defined + # you can use it as a decoder as well + encoder($base64)->bytes('base64')->latin1; + # stringified + print encoder($data)->utf8->latin1; # prints the string in latin1 + # numified + encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data) + +=head1 ABSTRACT + +B allows you to use Encode in an object-oriented +style. This is not only more intuitive than a functional approach, +but also handier when you want to stack encodings. Suppose you want +your UTF-8 string converted to Latin1 then Base64: you can simply say + + my $base64 = encoder($utf8)->latin1->base64; + +instead of + + my $latin1 = encode("latin1", $utf8); + my $base64 = encode_base64($utf8); + +or the lazier and more convoluted + + my $base64 = encode_base64(encode("latin1", $utf8)); + +=head1 Description + +Here is how to use this module. + +=over 4 + +=item * + +There are at least two instance variables stored in a hash reference, +{data} and {encoding}. + +=item * + +When there is no method, it takes the method name as the name of the +encoding and encodes the instance I with I. If successful, +the instance I is set accordingly. + +=item * + +You can retrieve the result via -Edata but usually you don't have to +because the stringify operator ("") is overridden to do exactly that. + +=back + +=head2 Predefined Methods + +This module predefines the methods below: + +=over 4 + +=item $e = Encode::Encoder-Enew([$data, $encoding]); + +returns an encoder object. Its data is initialized with $data if +present, and its encoding is set to $encoding if present. + +When $encoding is omitted, it defaults to utf8 if $data is already in +utf8 or "" (empty string) otherwise. + +=item encoder() + +is an alias of Encode::Encoder-Enew(). This one is exported on demand. + +=item $e-Edata([$data]) + +When $data is present, sets the instance data to $data and returns the +object itself. Otherwise, the current instance data is returned. + +=item $e-Eencoding([$encoding]) + +When $encoding is present, sets the instance encoding to $encoding and +returns the object itself. Otherwise, the current instance encoding is +returned. + +=item $e-Ebytes([$encoding]) + +decodes instance data from $encoding, or the instance encoding if +omitted. If the conversion is successful, the instance encoding +will be set to "". + +The name I was deliberately picked to avoid namespace tainting +-- this module may be used as a base class so method names that appear +in Encode::Encoding are avoided. + +=back + +=head2 Example: base64 transcoder + +This module is designed to work with L. +To make the Base64 transcoder example above really work, you could +write a module like this: + + package Encode::Base64; + use parent 'Encode::Encoding'; + __PACKAGE__->Define('base64'); + use MIME::Base64; + sub encode{ + my ($obj, $data) = @_; + return encode_base64($data); + } + sub decode{ + my ($obj, $data) = @_; + return decode_base64($data); + } + 1; + __END__ + +And your caller module would be something like this: + + use Encode::Encoder; + use Encode::Base64; + + # now you can really do the following + + encoder($data)->iso_8859_1->base64; + encoder($base64)->bytes('base64')->latin1; + +=head2 Operator Overloading + +This module overloads two operators, stringify ("") and numify (0+). + +Stringify dumps the data inside the object. + +Numify returns the number of bytes in the instance data. + +They come in handy when you want to print or find the size of data. + +=head1 SEE ALSO + +L, +L + +=cut diff --git a/lib/Encode/Encoding.pm b/lib/Encode/Encoding.pm new file mode 100644 index 0000000..815937f --- /dev/null +++ b/lib/Encode/Encoding.pm @@ -0,0 +1,356 @@ +package Encode::Encoding; + +# Base class for classes which implement encodings +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +our @CARP_NOT = qw(Encode Encode::Encoder); + +use Carp (); +use Encode (); +use Encode::MIME::Name; + +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; + +sub Define { + my $obj = shift; + my $canonical = shift; + $obj = bless { Name => $canonical }, $obj unless ref $obj; + + # warn "$canonical => $obj\n"; + Encode::define_encoding( $obj, $canonical, @_ ); +} + +sub name { return shift->{'Name'} } + +sub mime_name { + return Encode::MIME::Name::get_mime_name(shift->name); +} + +sub renew { + my $self = shift; + my $clone = bless {%$self} => ref($self); + $clone->{renewed}++; # so the caller can see it + DEBUG and warn $clone->{renewed}; + return $clone; +} + +sub renewed { return $_[0]->{renewed} || 0 } + +*new_sequence = \&renew; + +sub needs_lines { 0 } + +sub perlio_ok { + return eval { require PerlIO::encoding } ? 1 : 0; +} + +# (Temporary|legacy) methods + +sub toUnicode { shift->decode(@_) } +sub fromUnicode { shift->encode(@_) } + +# +# Needs to be overloaded or just croak +# + +sub encode { + my $obj = shift; + my $class = ref($obj) ? ref($obj) : $obj; + Carp::croak( $class . "->encode() not defined!" ); +} + +sub decode { + my $obj = shift; + my $class = ref($obj) ? ref($obj) : $obj; + Carp::croak( $class . "->encode() not defined!" ); +} + +sub DESTROY { } + +1; +__END__ + +=head1 NAME + +Encode::Encoding - Encode Implementation Base Class + +=head1 SYNOPSIS + + package Encode::MyEncoding; + use parent qw(Encode::Encoding); + + __PACKAGE__->Define(qw(myCanonical myAlias)); + +=head1 DESCRIPTION + +As mentioned in L, encodings are (in the current +implementation at least) defined as objects. The mapping of encoding +name to object is via the C<%Encode::Encoding> hash. Though you can +directly manipulate this hash, it is strongly encouraged to use this +base class module and add encode() and decode() methods. + +=head2 Methods you should implement + +You are strongly encouraged to implement methods below, at least +either encode() or decode(). + +=over 4 + +=item -Eencode($string [,$check]) + +MUST return the octet sequence representing I<$string>. + +=over 2 + +=item * + +If I<$check> is true, it SHOULD modify I<$string> in place to remove +the converted part (i.e. the whole string unless there is an error). +If perlio_ok() is true, SHOULD becomes MUST. + +=item * + +If an error occurs, it SHOULD return the octet sequence for the +fragment of string that has been converted and modify $string in-place +to remove the converted part leaving it starting with the problem +fragment. If perlio_ok() is true, SHOULD becomes MUST. + +=item * + +If I<$check> is false then C MUST make a "best effort" to +convert the string - for example, by using a replacement character. + +=back + +=item -Edecode($octets [,$check]) + +MUST return the string that I<$octets> represents. + +=over 2 + +=item * + +If I<$check> is true, it SHOULD modify I<$octets> in place to remove +the converted part (i.e. the whole sequence unless there is an +error). If perlio_ok() is true, SHOULD becomes MUST. + +=item * + +If an error occurs, it SHOULD return the fragment of string that has +been converted and modify $octets in-place to remove the converted +part leaving it starting with the problem fragment. If perlio_ok() is +true, SHOULD becomes MUST. + +=item * + +If I<$check> is false then C should make a "best effort" to +convert the string - for example by using Unicode's "\x{FFFD}" as a +replacement character. + +=back + +=back + +If you want your encoding to work with L pragma, you should +also implement the method below. + +=over 4 + +=item -Ecat_decode($destination, $octets, $offset, $terminator [,$check]) + +MUST decode I<$octets> with I<$offset> and concatenate it to I<$destination>. +Decoding will terminate when $terminator (a string) appears in output. +I<$offset> will be modified to the last $octets position at end of decode. +Returns true if $terminator appears output, else returns false. + +=back + +=head2 Other methods defined in Encode::Encodings + +You do not have to override methods shown below unless you have to. + +=over 4 + +=item -Ename + +Predefined As: + + sub name { return shift->{'Name'} } + +MUST return the string representing the canonical name of the encoding. + +=item -Emime_name + +Predefined As: + + sub mime_name{ + return Encode::MIME::Name::get_mime_name(shift->name); + } + +MUST return the string representing the IANA charset name of the encoding. + +=item -Erenew + +Predefined As: + + sub renew { + my $self = shift; + my $clone = bless { %$self } => ref($self); + $clone->{renewed}++; + return $clone; + } + +This method reconstructs the encoding object if necessary. If you need +to store the state during encoding, this is where you clone your object. + +PerlIO ALWAYS calls this method to make sure it has its own private +encoding object. + +=item -Erenewed + +Predefined As: + + sub renewed { $_[0]->{renewed} || 0 } + +Tells whether the object is renewed (and how many times). Some +modules emit C warning +unless the value is numeric so return 0 for false. + +=item -Eperlio_ok() + +Predefined As: + + sub perlio_ok { + return eval { require PerlIO::encoding } ? 1 : 0; + } + +If your encoding does not support PerlIO for some reasons, just; + + sub perlio_ok { 0 } + +=item -Eneeds_lines() + +Predefined As: + + sub needs_lines { 0 }; + +If your encoding can work with PerlIO but needs line buffering, you +MUST define this method so it returns true. 7bit ISO-2022 encodings +are one example that needs this. When this method is missing, false +is assumed. + +=back + +=head2 Example: Encode::ROT13 + + package Encode::ROT13; + use strict; + use parent qw(Encode::Encoding); + + __PACKAGE__->Define('rot13'); + + sub encode($$;$){ + my ($obj, $str, $chk) = @_; + $str =~ tr/A-Za-z/N-ZA-Mn-za-m/; + $_[1] = '' if $chk; # this is what in-place edit means + return $str; + } + + # Jr pna or ynml yvxr guvf; + *decode = \&encode; + + 1; + +=head1 Why the heck Encode API is different? + +It should be noted that the I<$check> behaviour is different from the +outer public API. The logic is that the "unchecked" case is useful +when the encoding is part of a stream which may be reporting errors +(e.g. STDERR). In such cases, it is desirable to get everything +through somehow without causing additional errors which obscure the +original one. Also, the encoding is best placed to know what the +correct replacement character is, so if that is the desired behaviour +then letting low level code do it is the most efficient. + +By contrast, if I<$check> is true, the scheme above allows the +encoding to do as much as it can and tell the layer above how much +that was. What is lacking at present is a mechanism to report what +went wrong. The most likely interface will be an additional method +call to the object, or perhaps (to avoid forcing per-stream objects +on otherwise stateless encodings) an additional parameter. + +It is also highly desirable that encoding classes inherit from +C as a base class. This allows that class to define +additional behaviour for all encoding objects. + + package Encode::MyEncoding; + use parent qw(Encode::Encoding); + + __PACKAGE__->Define(qw(myCanonical myAlias)); + +to create an object with C<< bless {Name => ...}, $class >>, and call +define_encoding. They inherit their C method from +C. + +=head2 Compiled Encodings + +For the sake of speed and efficiency, most of the encodings are now +supported via a I: XS modules generated from UCM +files. Encode provides the enc2xs tool to achieve that. Please see +L for more details. + +=head1 SEE ALSO + +L, L + +=begin future + +=over 4 + +=item Scheme 1 + +The fixup routine gets passed the remaining fragment of string being +processed. It modifies it in place to remove bytes/characters it can +understand and returns a string used to represent them. For example: + + sub fixup { + my $ch = substr($_[0],0,1,''); + return sprintf("\x{%02X}",ord($ch); + } + +This scheme is close to how the underlying C code for Encode works, +but gives the fixup routine very little context. + +=item Scheme 2 + +The fixup routine gets passed the original string, an index into +it of the problem area, and the output string so far. It appends +what it wants to the output string and returns a new index into the +original string. For example: + + sub fixup { + # my ($s,$i,$d) = @_; + my $ch = substr($_[0],$_[1],1); + $_[2] .= sprintf("\x{%02X}",ord($ch); + return $_[1]+1; + } + +This scheme gives maximal control to the fixup routine but is more +complicated to code, and may require that the internals of Encode be tweaked to +keep the original string intact. + +=item Other Schemes + +Hybrids of the above. + +Multiple return values rather than in-place modifications. + +Index into the string could be C allowing C. + +=back + +=end future + +=cut diff --git a/lib/Encode/GSM0338.pm b/lib/Encode/GSM0338.pm new file mode 100644 index 0000000..e87141e --- /dev/null +++ b/lib/Encode/GSM0338.pm @@ -0,0 +1,294 @@ +# +# $Id: GSM0338.pm,v 2.7 2017/06/10 17:23:50 dankogai Exp $ +# +package Encode::GSM0338; + +use strict; +use warnings; +use Carp; + +use vars qw($VERSION); +$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Encode qw(:fallbacks); + +use parent qw(Encode::Encoding); +__PACKAGE__->Define('gsm0338'); + +sub needs_lines { 1 } +sub perlio_ok { 0 } + +use utf8; +our %UNI2GSM = ( + "\x{0040}" => "\x00", # COMMERCIAL AT + "\x{000A}" => "\x0A", # LINE FEED + "\x{000C}" => "\x1B\x0A", # FORM FEED + "\x{000D}" => "\x0D", # CARRIAGE RETURN + "\x{0020}" => "\x20", # SPACE + "\x{0021}" => "\x21", # EXCLAMATION MARK + "\x{0022}" => "\x22", # QUOTATION MARK + "\x{0023}" => "\x23", # NUMBER SIGN + "\x{0024}" => "\x02", # DOLLAR SIGN + "\x{0025}" => "\x25", # PERCENT SIGN + "\x{0026}" => "\x26", # AMPERSAND + "\x{0027}" => "\x27", # APOSTROPHE + "\x{0028}" => "\x28", # LEFT PARENTHESIS + "\x{0029}" => "\x29", # RIGHT PARENTHESIS + "\x{002A}" => "\x2A", # ASTERISK + "\x{002B}" => "\x2B", # PLUS SIGN + "\x{002C}" => "\x2C", # COMMA + "\x{002D}" => "\x2D", # HYPHEN-MINUS + "\x{002E}" => "\x2E", # FULL STOP + "\x{002F}" => "\x2F", # SOLIDUS + "\x{0030}" => "\x30", # DIGIT ZERO + "\x{0031}" => "\x31", # DIGIT ONE + "\x{0032}" => "\x32", # DIGIT TWO + "\x{0033}" => "\x33", # DIGIT THREE + "\x{0034}" => "\x34", # DIGIT FOUR + "\x{0035}" => "\x35", # DIGIT FIVE + "\x{0036}" => "\x36", # DIGIT SIX + "\x{0037}" => "\x37", # DIGIT SEVEN + "\x{0038}" => "\x38", # DIGIT EIGHT + "\x{0039}" => "\x39", # DIGIT NINE + "\x{003A}" => "\x3A", # COLON + "\x{003B}" => "\x3B", # SEMICOLON + "\x{003C}" => "\x3C", # LESS-THAN SIGN + "\x{003D}" => "\x3D", # EQUALS SIGN + "\x{003E}" => "\x3E", # GREATER-THAN SIGN + "\x{003F}" => "\x3F", # QUESTION MARK + "\x{0041}" => "\x41", # LATIN CAPITAL LETTER A + "\x{0042}" => "\x42", # LATIN CAPITAL LETTER B + "\x{0043}" => "\x43", # LATIN CAPITAL LETTER C + "\x{0044}" => "\x44", # LATIN CAPITAL LETTER D + "\x{0045}" => "\x45", # LATIN CAPITAL LETTER E + "\x{0046}" => "\x46", # LATIN CAPITAL LETTER F + "\x{0047}" => "\x47", # LATIN CAPITAL LETTER G + "\x{0048}" => "\x48", # LATIN CAPITAL LETTER H + "\x{0049}" => "\x49", # LATIN CAPITAL LETTER I + "\x{004A}" => "\x4A", # LATIN CAPITAL LETTER J + "\x{004B}" => "\x4B", # LATIN CAPITAL LETTER K + "\x{004C}" => "\x4C", # LATIN CAPITAL LETTER L + "\x{004D}" => "\x4D", # LATIN CAPITAL LETTER M + "\x{004E}" => "\x4E", # LATIN CAPITAL LETTER N + "\x{004F}" => "\x4F", # LATIN CAPITAL LETTER O + "\x{0050}" => "\x50", # LATIN CAPITAL LETTER P + "\x{0051}" => "\x51", # LATIN CAPITAL LETTER Q + "\x{0052}" => "\x52", # LATIN CAPITAL LETTER R + "\x{0053}" => "\x53", # LATIN CAPITAL LETTER S + "\x{0054}" => "\x54", # LATIN CAPITAL LETTER T + "\x{0055}" => "\x55", # LATIN CAPITAL LETTER U + "\x{0056}" => "\x56", # LATIN CAPITAL LETTER V + "\x{0057}" => "\x57", # LATIN CAPITAL LETTER W + "\x{0058}" => "\x58", # LATIN CAPITAL LETTER X + "\x{0059}" => "\x59", # LATIN CAPITAL LETTER Y + "\x{005A}" => "\x5A", # LATIN CAPITAL LETTER Z + "\x{005F}" => "\x11", # LOW LINE + "\x{0061}" => "\x61", # LATIN SMALL LETTER A + "\x{0062}" => "\x62", # LATIN SMALL LETTER B + "\x{0063}" => "\x63", # LATIN SMALL LETTER C + "\x{0064}" => "\x64", # LATIN SMALL LETTER D + "\x{0065}" => "\x65", # LATIN SMALL LETTER E + "\x{0066}" => "\x66", # LATIN SMALL LETTER F + "\x{0067}" => "\x67", # LATIN SMALL LETTER G + "\x{0068}" => "\x68", # LATIN SMALL LETTER H + "\x{0069}" => "\x69", # LATIN SMALL LETTER I + "\x{006A}" => "\x6A", # LATIN SMALL LETTER J + "\x{006B}" => "\x6B", # LATIN SMALL LETTER K + "\x{006C}" => "\x6C", # LATIN SMALL LETTER L + "\x{006D}" => "\x6D", # LATIN SMALL LETTER M + "\x{006E}" => "\x6E", # LATIN SMALL LETTER N + "\x{006F}" => "\x6F", # LATIN SMALL LETTER O + "\x{0070}" => "\x70", # LATIN SMALL LETTER P + "\x{0071}" => "\x71", # LATIN SMALL LETTER Q + "\x{0072}" => "\x72", # LATIN SMALL LETTER R + "\x{0073}" => "\x73", # LATIN SMALL LETTER S + "\x{0074}" => "\x74", # LATIN SMALL LETTER T + "\x{0075}" => "\x75", # LATIN SMALL LETTER U + "\x{0076}" => "\x76", # LATIN SMALL LETTER V + "\x{0077}" => "\x77", # LATIN SMALL LETTER W + "\x{0078}" => "\x78", # LATIN SMALL LETTER X + "\x{0079}" => "\x79", # LATIN SMALL LETTER Y + "\x{007A}" => "\x7A", # LATIN SMALL LETTER Z + "\x{000C}" => "\x1B\x0A", # FORM FEED + "\x{005B}" => "\x1B\x3C", # LEFT SQUARE BRACKET + "\x{005C}" => "\x1B\x2F", # REVERSE SOLIDUS + "\x{005D}" => "\x1B\x3E", # RIGHT SQUARE BRACKET + "\x{005E}" => "\x1B\x14", # CIRCUMFLEX ACCENT + "\x{007B}" => "\x1B\x28", # LEFT CURLY BRACKET + "\x{007C}" => "\x1B\x40", # VERTICAL LINE + "\x{007D}" => "\x1B\x29", # RIGHT CURLY BRACKET + "\x{007E}" => "\x1B\x3D", # TILDE + "\x{00A0}" => "\x1B", # NO-BREAK SPACE + "\x{00A1}" => "\x40", # INVERTED EXCLAMATION MARK + "\x{00A3}" => "\x01", # POUND SIGN + "\x{00A4}" => "\x24", # CURRENCY SIGN + "\x{00A5}" => "\x03", # YEN SIGN + "\x{00A7}" => "\x5F", # SECTION SIGN + "\x{00BF}" => "\x60", # INVERTED QUESTION MARK + "\x{00C4}" => "\x5B", # LATIN CAPITAL LETTER A WITH DIAERESIS + "\x{00C5}" => "\x0E", # LATIN CAPITAL LETTER A WITH RING ABOVE + "\x{00C6}" => "\x1C", # LATIN CAPITAL LETTER AE + "\x{00C9}" => "\x1F", # LATIN CAPITAL LETTER E WITH ACUTE + "\x{00D1}" => "\x5D", # LATIN CAPITAL LETTER N WITH TILDE + "\x{00D6}" => "\x5C", # LATIN CAPITAL LETTER O WITH DIAERESIS + "\x{00D8}" => "\x0B", # LATIN CAPITAL LETTER O WITH STROKE + "\x{00DC}" => "\x5E", # LATIN CAPITAL LETTER U WITH DIAERESIS + "\x{00DF}" => "\x1E", # LATIN SMALL LETTER SHARP S + "\x{00E0}" => "\x7F", # LATIN SMALL LETTER A WITH GRAVE + "\x{00E4}" => "\x7B", # LATIN SMALL LETTER A WITH DIAERESIS + "\x{00E5}" => "\x0F", # LATIN SMALL LETTER A WITH RING ABOVE + "\x{00E6}" => "\x1D", # LATIN SMALL LETTER AE + #"\x{00E7}" => "\x09", # LATIN SMALL LETTER C WITH CEDILLA + "\x{00C7}" => "\x09", # LATIN CAPITAL LETTER C WITH CEDILLA + "\x{00E8}" => "\x04", # LATIN SMALL LETTER E WITH GRAVE + "\x{00E9}" => "\x05", # LATIN SMALL LETTER E WITH ACUTE + "\x{00EC}" => "\x07", # LATIN SMALL LETTER I WITH GRAVE + "\x{00F1}" => "\x7D", # LATIN SMALL LETTER N WITH TILDE + "\x{00F2}" => "\x08", # LATIN SMALL LETTER O WITH GRAVE + "\x{00F6}" => "\x7C", # LATIN SMALL LETTER O WITH DIAERESIS + "\x{00F8}" => "\x0C", # LATIN SMALL LETTER O WITH STROKE + "\x{00F9}" => "\x06", # LATIN SMALL LETTER U WITH GRAVE + "\x{00FC}" => "\x7E", # LATIN SMALL LETTER U WITH DIAERESIS + "\x{0393}" => "\x13", # GREEK CAPITAL LETTER GAMMA + "\x{0394}" => "\x10", # GREEK CAPITAL LETTER DELTA + "\x{0398}" => "\x19", # GREEK CAPITAL LETTER THETA + "\x{039B}" => "\x14", # GREEK CAPITAL LETTER LAMDA + "\x{039E}" => "\x1A", # GREEK CAPITAL LETTER XI + "\x{03A0}" => "\x16", # GREEK CAPITAL LETTER PI + "\x{03A3}" => "\x18", # GREEK CAPITAL LETTER SIGMA + "\x{03A6}" => "\x12", # GREEK CAPITAL LETTER PHI + "\x{03A8}" => "\x17", # GREEK CAPITAL LETTER PSI + "\x{03A9}" => "\x15", # GREEK CAPITAL LETTER OMEGA + "\x{20AC}" => "\x1B\x65", # EURO SIGN +); +our %GSM2UNI = reverse %UNI2GSM; +our $ESC = "\x1b"; +our $ATMARK = "\x40"; +our $FBCHAR = "\x3F"; +our $NBSP = "\x{00A0}"; + +#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" + +sub decode ($$;$) { + my ( $obj, $bytes, $chk ) = @_; + return undef unless defined $bytes; + my $str = substr($bytes, 0, 0); # to propagate taintedness; + while ( length $bytes ) { + my $c = substr( $bytes, 0, 1, '' ); + my $u; + if ( $c eq "\x00" ) { + my $c2 = substr( $bytes, 0, 1, '' ); + $u = + !length $c2 ? $ATMARK + : $c2 eq "\x00" ? "\x{0000}" + : exists $GSM2UNI{$c2} ? $ATMARK . $GSM2UNI{$c2} + : $chk + ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode", + ord($c), ord($c2) ) + : $ATMARK . $FBCHAR; + + } + elsif ( $c eq $ESC ) { + my $c2 = substr( $bytes, 0, 1, '' ); + $u = + exists $GSM2UNI{ $c . $c2 } ? $GSM2UNI{ $c . $c2 } + : exists $GSM2UNI{$c2} ? $NBSP . $GSM2UNI{$c2} + : $chk + ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode", + ord($c), ord($c2) ) + : $NBSP . $FBCHAR; + } + else { + $u = + exists $GSM2UNI{$c} + ? $GSM2UNI{$c} + : $chk ? ref $chk eq 'CODE' + ? $chk->( ord $c ) + : croak sprintf( "\\x%02X does not map to Unicode", ord($c) ) + : $FBCHAR; + } + $str .= $u; + } + $_[1] = $bytes if $chk; + return $str; +} + +#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" + +sub encode($$;$) { + my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; + my $bytes = substr($str, 0, 0); # to propagate taintedness + while ( length $str ) { + my $u = substr( $str, 0, 1, '' ); + my $c; + $bytes .= + exists $UNI2GSM{$u} + ? $UNI2GSM{$u} + : $chk ? ref $chk eq 'CODE' + ? $chk->( ord($u) ) + : croak sprintf( "\\x{%04x} does not map to %s", + ord($u), $obj->name ) + : $FBCHAR; + } + $_[1] = $str if $chk; + return $bytes; +} + +1; +__END__ + +=head1 NAME + +Encode::GSM0338 -- ESTI GSM 03.38 Encoding + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $gsm0338 = encode("gsm0338", $utf8); # loads Encode::GSM0338 implicitly + $utf8 = decode("gsm0338", $gsm0338); # ditto + +=head1 DESCRIPTION + +GSM0338 is for GSM handsets. Though it shares alphanumerals with ASCII, +control character ranges and other parts are mapped very differently, +mainly to store Greek characters. There are also escape sequences +(starting with 0x1B) to cover e.g. the Euro sign. + +This was once handled by L but because of all those +unusual specifications, Encode 2.20 has relocated the support to +this module. + +=head1 NOTES + +Unlike most other encodings, the following always croaks on error +for any $chk that evaluates to true. + + $gsm0338 = encode("gsm0338", $utf8 $chk); + $utf8 = decode("gsm0338", $gsm0338, $chk); + +So if you want to check the validity of the encoding, surround the +expression with C block as follows; + + eval { + $utf8 = decode("gsm0338", $gsm0338, $chk); + } or do { + # handle exception here + }; + +=head1 BUGS + +ESTI GSM 03.38 Encoding itself. + +Mapping \x00 to '@' causes too much pain everywhere. + +Its use of \x1b (escape) is also very questionable. + +Because of those two, the code paging approach used use in ucm-based +Encoding SOMETIMES fails so this module was written. + +=head1 SEE ALSO + +L + +=cut diff --git a/lib/Encode/Guess.pm b/lib/Encode/Guess.pm new file mode 100644 index 0000000..41fc19b --- /dev/null +++ b/lib/Encode/Guess.pm @@ -0,0 +1,356 @@ +package Encode::Guess; +use strict; +use warnings; +use Encode qw(:fallbacks find_encoding); +our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +my $Canon = 'Guess'; +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; +our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8); +my $obj = bless { + Name => $Canon, + Suspects => {%DEF_SUSPECTS}, +} => __PACKAGE__; +Encode::define_encoding($obj, $Canon); + +use parent qw(Encode::Encoding); +sub needs_lines { 1 } +sub perlio_ok { 0 } + +our @EXPORT = qw(guess_encoding); +our $NoUTFAutoGuess = 0; +our $UTF8_BOM = pack( "C3", 0xef, 0xbb, 0xbf ); + +sub import { # Exporter not used so we do it on our own + my $callpkg = caller; + for my $item (@EXPORT) { + no strict 'refs'; + *{"$callpkg\::$item"} = \&{"$item"}; + } + set_suspects(@_); +} + +sub set_suspects { + my $class = shift; + my $self = ref($class) ? $class : $Encode::Encoding{$Canon}; + $self->{Suspects} = {%DEF_SUSPECTS}; + $self->add_suspects(@_); +} + +sub add_suspects { + my $class = shift; + my $self = ref($class) ? $class : $Encode::Encoding{$Canon}; + for my $c (@_) { + my $e = find_encoding($c) or die "Unknown encoding: $c"; + $self->{Suspects}{ $e->name } = $e; + DEBUG and warn "Added: ", $e->name; + } +} + +sub decode($$;$) { + my ( $obj, $octet, $chk ) = @_; + my $guessed = guess( $obj, $octet ); + unless ( ref($guessed) ) { + require Carp; + Carp::croak($guessed); + } + my $utf8 = $guessed->decode( $octet, $chk || 0 ); + $_[1] = $octet if $chk; + return $utf8; +} + +sub guess_encoding { + guess( $Encode::Encoding{$Canon}, @_ ); +} + +sub guess { + my $class = shift; + my $obj = ref($class) ? $class : $Encode::Encoding{$Canon}; + my $octet = shift; + + # sanity check + return "Empty string, empty guess" unless defined $octet and length $octet; + + # cheat 0: utf8 flag; + if ( Encode::is_utf8($octet) ) { + return find_encoding('utf8') unless $NoUTFAutoGuess; + Encode::_utf8_off($octet); + } + + # cheat 1: BOM + use Encode::Unicode; + unless ($NoUTFAutoGuess) { + my $BOM = pack( 'C3', unpack( "C3", $octet ) ); + return find_encoding('utf8') + if ( defined $BOM and $BOM eq $UTF8_BOM ); + $BOM = unpack( 'N', $octet ); + return find_encoding('UTF-32') + if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe0000 ) ); + $BOM = unpack( 'n', $octet ); + return find_encoding('UTF-16') + if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe ) ); + if ( $octet =~ /\x00/o ) + { # if \x00 found, we assume UTF-(16|32)(BE|LE) + my $utf; + my ( $be, $le ) = ( 0, 0 ); + if ( $octet =~ /\x00\x00/o ) { # UTF-32(BE|LE) assumed + $utf = "UTF-32"; + for my $char ( unpack( 'N*', $octet ) ) { + $char & 0x0000ffff and $be++; + $char & 0xffff0000 and $le++; + } + } + else { # UTF-16(BE|LE) assumed + $utf = "UTF-16"; + for my $char ( unpack( 'n*', $octet ) ) { + $char & 0x00ff and $be++; + $char & 0xff00 and $le++; + } + } + DEBUG and warn "$utf, be == $be, le == $le"; + $be == $le + and return + "Encodings ambiguous between $utf BE and LE ($be, $le)"; + $utf .= ( $be > $le ) ? 'BE' : 'LE'; + return find_encoding($utf); + } + } + my %try = %{ $obj->{Suspects} }; + for my $c (@_) { + my $e = find_encoding($c) or die "Unknown encoding: $c"; + $try{ $e->name } = $e; + DEBUG and warn "Added: ", $e->name; + } + my $nline = 1; + for my $line ( split /\r\n?|\n/, $octet ) { + + # cheat 2 -- \e in the string + if ( $line =~ /\e/o ) { + my @keys = keys %try; + delete @try{qw/utf8 ascii/}; + for my $k (@keys) { + ref( $try{$k} ) eq 'Encode::XS' and delete $try{$k}; + } + } + my %ok = %try; + + # warn join(",", keys %try); + for my $k ( keys %try ) { + my $scratch = $line; + $try{$k}->decode( $scratch, FB_QUIET ); + if ( $scratch eq '' ) { + DEBUG and warn sprintf( "%4d:%-24s ok\n", $nline, $k ); + } + else { + use bytes (); + DEBUG + and warn sprintf( "%4d:%-24s not ok; %d bytes left\n", + $nline, $k, bytes::length($scratch) ); + delete $ok{$k}; + } + } + %ok or return "No appropriate encodings found!"; + if ( scalar( keys(%ok) ) == 1 ) { + my ($retval) = values(%ok); + return $retval; + } + %try = %ok; + $nline++; + } + $try{ascii} + or return "Encodings too ambiguous: ", join( " or ", keys %try ); + return $try{ascii}; +} + +1; +__END__ + +=head1 NAME + +Encode::Guess -- Guesses encoding from data + +=head1 SYNOPSIS + + # if you are sure $data won't contain anything bogus + + use Encode; + use Encode::Guess qw/euc-jp shiftjis 7bit-jis/; + my $utf8 = decode("Guess", $data); + my $data = encode("Guess", $utf8); # this doesn't work! + + # more elaborate way + use Encode::Guess; + my $enc = guess_encoding($data, qw/euc-jp shiftjis 7bit-jis/); + ref($enc) or die "Can't guess: $enc"; # trap error this way + $utf8 = $enc->decode($data); + # or + $utf8 = decode($enc->name, $data) + +=head1 ABSTRACT + +Encode::Guess enables you to guess in what encoding a given data is +encoded, or at least tries to. + +=head1 DESCRIPTION + +By default, it checks only ascii, utf8 and UTF-16/32 with BOM. + + use Encode::Guess; # ascii/utf8/BOMed UTF + +To use it more practically, you have to give the names of encodings to +check (I as follows). The name of suspects can either be +canonical names or aliases. + +CAVEAT: Unlike UTF-(16|32), BOM in utf8 is NOT AUTOMATICALLY STRIPPED. + + # tries all major Japanese Encodings as well + use Encode::Guess qw/euc-jp shiftjis 7bit-jis/; + +If the C<$Encode::Guess::NoUTFAutoGuess> variable is set to a true +value, no heuristics will be applied to UTF8/16/32, and the result +will be limited to the suspects and C. + +=over 4 + +=item Encode::Guess->set_suspects + +You can also change the internal suspects list via C +method. + + use Encode::Guess; + Encode::Guess->set_suspects(qw/euc-jp shiftjis 7bit-jis/); + +=item Encode::Guess->add_suspects + +Or you can use C method. The difference is that +C flushes the current suspects list while +C adds. + + use Encode::Guess; + Encode::Guess->add_suspects(qw/euc-jp shiftjis 7bit-jis/); + # now the suspects are euc-jp,shiftjis,7bit-jis, AND + # euc-kr,euc-cn, and big5-eten + Encode::Guess->add_suspects(qw/euc-kr euc-cn big5-eten/); + +=item Encode::decode("Guess" ...) + +When you are content with suspects list, you can now + + my $utf8 = Encode::decode("Guess", $data); + +=item Encode::Guess->guess($data) + +But it will croak if: + +=over + +=item * + +Two or more suspects remain + +=item * + +No suspects left + +=back + +So you should instead try this; + + my $decoder = Encode::Guess->guess($data); + +On success, $decoder is an object that is documented in +L. So you can now do this; + + my $utf8 = $decoder->decode($data); + +On failure, $decoder now contains an error message so the whole thing +would be as follows; + + my $decoder = Encode::Guess->guess($data); + die $decoder unless ref($decoder); + my $utf8 = $decoder->decode($data); + +=item guess_encoding($data, [, I]) + +You can also try C function which is exported by +default. It takes $data to check and it also takes the list of +suspects by option. The optional suspect list is I to +the internal suspects list. + + my $decoder = guess_encoding($data, qw/euc-jp euc-kr euc-cn/); + die $decoder unless ref($decoder); + my $utf8 = $decoder->decode($data); + # check only ascii, utf8 and UTF-(16|32) with BOM + my $decoder = guess_encoding($data); + +=back + +=head1 CAVEATS + +=over 4 + +=item * + +Because of the algorithm used, ISO-8859 series and other single-byte +encodings do not work well unless either one of ISO-8859 is the only +one suspect (besides ascii and utf8). + + use Encode::Guess; + # perhaps ok + my $decoder = guess_encoding($data, 'latin1'); + # definitely NOT ok + my $decoder = guess_encoding($data, qw/latin1 greek/); + +The reason is that Encode::Guess guesses encoding by trial and error. +It first splits $data into lines and tries to decode the line for each +suspect. It keeps it going until all but one encoding is eliminated +out of suspects list. ISO-8859 series is just too successful for most +cases (because it fills almost all code points in \x00-\xff). + +=item * + +Do not mix national standard encodings and the corresponding vendor +encodings. + + # a very bad idea + my $decoder + = guess_encoding($data, qw/shiftjis MacJapanese cp932/); + +The reason is that vendor encoding is usually a superset of national +standard so it becomes too ambiguous for most cases. + +=item * + +On the other hand, mixing various national standard encodings +automagically works unless $data is too short to allow for guessing. + + # This is ok if $data is long enough + my $decoder = + guess_encoding($data, qw/euc-cn + euc-jp shiftjis 7bit-jis + euc-kr + big5-eten/); + +=item * + +DO NOT PUT TOO MANY SUSPECTS! Don't you try something like this! + + my $decoder = guess_encoding($data, + Encode->encodings(":all")); + +=back + +It is, after all, just a guess. You should alway be explicit when it +comes to encodings. But there are some, especially Japanese, +environment that guess-coding is a must. Use this module with care. + +=head1 TO DO + +Encode::Guess does not work on EBCDIC platforms. + +=head1 SEE ALSO + +L, L + +=cut + diff --git a/lib/Encode/JP/H2Z.pm b/lib/Encode/JP/H2Z.pm new file mode 100644 index 0000000..f8e2230 --- /dev/null +++ b/lib/Encode/JP/H2Z.pm @@ -0,0 +1,176 @@ +# +# $Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $ +# + +package Encode::JP::H2Z; + +use strict; +use warnings; + +our $RCSID = q$Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $; +our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Encode::CJKConstants qw(:all); + +use vars qw(%_D2Z $_PAT_D2Z + %_Z2D $_PAT_Z2D + %_H2Z $_PAT_H2Z + %_Z2H $_PAT_Z2H); + +%_H2Z = ( + "\x8e\xa1" => "\xa1\xa3", #�� + "\x8e\xa2" => "\xa1\xd6", #�� + "\x8e\xa3" => "\xa1\xd7", #�� + "\x8e\xa4" => "\xa1\xa2", #�� + "\x8e\xa5" => "\xa1\xa6", #�� + "\x8e\xa6" => "\xa5\xf2", #�� + "\x8e\xa7" => "\xa5\xa1", #�� + "\x8e\xa8" => "\xa5\xa3", #�� + "\x8e\xa9" => "\xa5\xa5", #�� + "\x8e\xaa" => "\xa5\xa7", #�� + "\x8e\xab" => "\xa5\xa9", #�� + "\x8e\xac" => "\xa5\xe3", #�� + "\x8e\xad" => "\xa5\xe5", #�� + "\x8e\xae" => "\xa5\xe7", #�� + "\x8e\xaf" => "\xa5\xc3", #�� + "\x8e\xb0" => "\xa1\xbc", #�� + "\x8e\xb1" => "\xa5\xa2", #�� + "\x8e\xb2" => "\xa5\xa4", #�� + "\x8e\xb3" => "\xa5\xa6", #�� + "\x8e\xb4" => "\xa5\xa8", #�� + "\x8e\xb5" => "\xa5\xaa", #�� + "\x8e\xb6" => "\xa5\xab", #�� + "\x8e\xb7" => "\xa5\xad", #�� + "\x8e\xb8" => "\xa5\xaf", #�� + "\x8e\xb9" => "\xa5\xb1", #�� + "\x8e\xba" => "\xa5\xb3", #�� + "\x8e\xbb" => "\xa5\xb5", #�� + "\x8e\xbc" => "\xa5\xb7", #�� + "\x8e\xbd" => "\xa5\xb9", #�� + "\x8e\xbe" => "\xa5\xbb", #�� + "\x8e\xbf" => "\xa5\xbd", #�� + "\x8e\xc0" => "\xa5\xbf", #�� + "\x8e\xc1" => "\xa5\xc1", #�� + "\x8e\xc2" => "\xa5\xc4", #�� + "\x8e\xc3" => "\xa5\xc6", #�� + "\x8e\xc4" => "\xa5\xc8", #�� + "\x8e\xc5" => "\xa5\xca", #�� + "\x8e\xc6" => "\xa5\xcb", #�� + "\x8e\xc7" => "\xa5\xcc", #�� + "\x8e\xc8" => "\xa5\xcd", #�� + "\x8e\xc9" => "\xa5\xce", #�� + "\x8e\xca" => "\xa5\xcf", #�� + "\x8e\xcb" => "\xa5\xd2", #�� + "\x8e\xcc" => "\xa5\xd5", #�� + "\x8e\xcd" => "\xa5\xd8", #�� + "\x8e\xce" => "\xa5\xdb", #�� + "\x8e\xcf" => "\xa5\xde", #�� + "\x8e\xd0" => "\xa5\xdf", #�� + "\x8e\xd1" => "\xa5\xe0", #�� + "\x8e\xd2" => "\xa5\xe1", #�� + "\x8e\xd3" => "\xa5\xe2", #�� + "\x8e\xd4" => "\xa5\xe4", #�� + "\x8e\xd5" => "\xa5\xe6", #�� + "\x8e\xd6" => "\xa5\xe8", #�� + "\x8e\xd7" => "\xa5\xe9", #�� + "\x8e\xd8" => "\xa5\xea", #�� + "\x8e\xd9" => "\xa5\xeb", #�� + "\x8e\xda" => "\xa5\xec", #�� + "\x8e\xdb" => "\xa5\xed", #�� + "\x8e\xdc" => "\xa5\xef", #�� + "\x8e\xdd" => "\xa5\xf3", #�� + "\x8e\xde" => "\xa1\xab", #�� + "\x8e\xdf" => "\xa1\xac", #�� +); + +%_D2Z = ( + "\x8e\xb6\x8e\xde" => "\xa5\xac", #�� + "\x8e\xb7\x8e\xde" => "\xa5\xae", #�� + "\x8e\xb8\x8e\xde" => "\xa5\xb0", #�� + "\x8e\xb9\x8e\xde" => "\xa5\xb2", #�� + "\x8e\xba\x8e\xde" => "\xa5\xb4", #�� + "\x8e\xbb\x8e\xde" => "\xa5\xb6", #�� + "\x8e\xbc\x8e\xde" => "\xa5\xb8", #�� + "\x8e\xbd\x8e\xde" => "\xa5\xba", #�� + "\x8e\xbe\x8e\xde" => "\xa5\xbc", #�� + "\x8e\xbf\x8e\xde" => "\xa5\xbe", #�� + "\x8e\xc0\x8e\xde" => "\xa5\xc0", #�� + "\x8e\xc1\x8e\xde" => "\xa5\xc2", #�� + "\x8e\xc2\x8e\xde" => "\xa5\xc5", #�� + "\x8e\xc3\x8e\xde" => "\xa5\xc7", #�� + "\x8e\xc4\x8e\xde" => "\xa5\xc9", #�� + "\x8e\xca\x8e\xde" => "\xa5\xd0", #�� + "\x8e\xcb\x8e\xde" => "\xa5\xd3", #�� + "\x8e\xcc\x8e\xde" => "\xa5\xd6", #�� + "\x8e\xcd\x8e\xde" => "\xa5\xd9", #�� + "\x8e\xce\x8e\xde" => "\xa5\xdc", #�� + "\x8e\xca\x8e\xdf" => "\xa5\xd1", #�� + "\x8e\xcb\x8e\xdf" => "\xa5\xd4", #�� + "\x8e\xcc\x8e\xdf" => "\xa5\xd7", #�� + "\x8e\xcd\x8e\xdf" => "\xa5\xda", #�� + "\x8e\xce\x8e\xdf" => "\xa5\xdd", #�� + "\x8e\xb3\x8e\xde" => "\xa5\xf4", #�� +); + +# init only once; + +#$_PAT_D2Z = join("|", keys %_D2Z); +#$_PAT_H2Z = join("|", keys %_H2Z); + +%_Z2H = reverse %_H2Z; +%_Z2D = reverse %_D2Z; + +#$_PAT_Z2H = join("|", keys %_Z2H); +#$_PAT_Z2D = join("|", keys %_Z2D); + +sub h2z { + no warnings qw(uninitialized); + my $r_str = shift; + my ($keep_dakuten) = @_; + my $n = 0; + unless ($keep_dakuten) { + $n = ( + $$r_str =~ s( + ($RE{EUC_KANA} + (?:\x8e[\xde\xdf])?) + ){ + my $str = $1; + $_D2Z{$str} || $_H2Z{$str} || + # in case dakuten and handakuten are side-by-side! + $_H2Z{substr($str,0,2)} . $_H2Z{substr($str,2,2)}; + }eogx + ); + } + else { + $n = ( + $$r_str =~ s( + ($RE{EUC_KANA}) + ){ + $_H2Z{$1}; + }eogx + ); + } + $n; +} + +sub z2h { + my $r_str = shift; + my $n = ( + $$r_str =~ s( + ($RE{EUC_C}|$RE{EUC_0212}|$RE{EUC_KANA}) + ){ + $_Z2D{$1} || $_Z2H{$1} || $1; + }eogx + ); + $n; +} + +1; +__END__ + + +=head1 NAME + +Encode::JP::H2Z -- internally used by Encode::JP::2022_JP* + +=cut diff --git a/lib/Encode/JP/JIS7.pm b/lib/Encode/JP/JIS7.pm new file mode 100644 index 0000000..6fc383c --- /dev/null +++ b/lib/Encode/JP/JIS7.pm @@ -0,0 +1,168 @@ +package Encode::JP::JIS7; +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Encode qw(:fallbacks); + +for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) { + my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1; + my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1; + + my $obj = bless { + Name => $name, + h2z => $h2z, + jis0212 => $jis0212, + } => __PACKAGE__; + Encode::define_encoding($obj, $name); +} + +use parent qw(Encode::Encoding); + +# we override this to 1 so PerlIO works +sub needs_lines { 1 } + +use Encode::CJKConstants qw(:all); + +# +# decode is identical for all 2022 variants +# + +sub decode($$;$) { + my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; + my $residue = ''; + if ($chk) { + $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1; + } + $residue .= jis_euc( \$str ); + $_[1] = $residue if $chk; + return Encode::decode( 'euc-jp', $str, FB_PERLQQ ); +} + +# +# encode is different +# + +sub encode($$;$) { + require Encode::JP::H2Z; + my ( $obj, $utf8, $chk ) = @_; + return undef unless defined $utf8; + + # empty the input string in the stack so perlio is ok + $_[1] = '' if $chk; + my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)}; + my $octet = Encode::encode( 'euc-jp', $utf8, $chk || 0 ); + $h2z and &Encode::JP::H2Z::h2z( \$octet ); + euc_jis( \$octet, $jis0212 ); + return $octet; +} + +# +# cat_decode +# +my $re_scan_jis_g = qr{ + \G ( ($RE{JIS_0212}) | $RE{JIS_0208} | + ($RE{ISO_ASC}) | ($RE{JIS_KANA}) | ) + ([^\e]*) +}x; + +sub cat_decode { # ($obj, $dst, $src, $pos, $trm, $chk) + my ( $obj, undef, undef, $pos, $trm ) = @_; # currently ignores $chk + my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; + local ${^ENCODING}; + use bytes; + my $opos = pos($$rsrc); + pos($$rsrc) = $pos; + while ( $$rsrc =~ /$re_scan_jis_g/gc ) { + my ( $esc, $esc_0212, $esc_asc, $esc_kana, $chunk ) = + ( $1, $2, $3, $4, $5 ); + + unless ($chunk) { $esc or last; next; } + + if ( $esc && !$esc_asc ) { + $chunk =~ tr/\x21-\x7e/\xa1-\xfe/; + if ($esc_kana) { + $chunk =~ s/([\xa1-\xdf])/\x8e$1/og; + } + elsif ($esc_0212) { + $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og; + } + $chunk = Encode::decode( 'euc-jp', $chunk, 0 ); + } + elsif ( ( my $npos = index( $chunk, $trm ) ) >= 0 ) { + $$rdst .= substr( $chunk, 0, $npos + length($trm) ); + $$rpos += length($esc) + $npos + length($trm); + pos($$rsrc) = $opos; + return 1; + } + $$rdst .= $chunk; + $$rpos = pos($$rsrc); + } + $$rpos = pos($$rsrc); + pos($$rsrc) = $opos; + return ''; +} + +# JIS<->EUC +my $re_scan_jis = qr{ + (?:($RE{JIS_0212})|$RE{JIS_0208}|($RE{ISO_ASC})|($RE{JIS_KANA}))([^\e]*) +}x; + +sub jis_euc { + local ${^ENCODING}; + my $r_str = shift; + $$r_str =~ s($re_scan_jis) + { + my ($esc_0212, $esc_asc, $esc_kana, $chunk) = + ($1, $2, $3, $4); + if (!$esc_asc) { + $chunk =~ tr/\x21-\x7e/\xa1-\xfe/; + if ($esc_kana) { + $chunk =~ s/([\xa1-\xdf])/\x8e$1/og; + } + elsif ($esc_0212) { + $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og; + } + } + $chunk; + }geox; + my ($residue) = ( $$r_str =~ s/(\e.*)$//so ); + return $residue; +} + +sub euc_jis { + no warnings qw(uninitialized); + local ${^ENCODING}; + my $r_str = shift; + my $jis0212 = shift; + $$r_str =~ s{ + ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+) + }{ + my $chunk = $1; + my $esc = + ( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} : + ( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} : + $ESC{JIS_0208}; + if ($esc eq $ESC{JIS_0212} && !$jis0212){ + # fallback to '?' + $chunk =~ tr/\xA1-\xFE/\x3F/; + }else{ + $chunk =~ tr/\xA1-\xFE/\x21-\x7E/; + } + $esc . $chunk . $ESC{ASC}; + }geox; + $$r_str =~ s/\Q$ESC{ASC}\E + (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox; + $$r_str; +} + +1; +__END__ + + +=head1 NAME + +Encode::JP::JIS7 -- internally used by Encode::JP + +=cut diff --git a/lib/Encode/KR/2022_KR.pm b/lib/Encode/KR/2022_KR.pm new file mode 100644 index 0000000..1223264 --- /dev/null +++ b/lib/Encode/KR/2022_KR.pm @@ -0,0 +1,83 @@ +package Encode::KR::2022_KR; +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Encode qw(:fallbacks); + +use parent qw(Encode::Encoding); +__PACKAGE__->Define('iso-2022-kr'); + +sub needs_lines { 1 } + +sub perlio_ok { + return 0; # for the time being +} + +sub decode { + my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; + my $res = $str; + my $residue = iso_euc( \$res ); + + # This is for PerlIO + $_[1] = $residue if $chk; + return Encode::decode( 'euc-kr', $res, FB_PERLQQ ); +} + +sub encode { + my ( $obj, $utf8, $chk ) = @_; + return undef unless defined $utf8; + + # empty the input string in the stack so perlio is ok + $_[1] = '' if $chk; + my $octet = Encode::encode( 'euc-kr', $utf8, FB_PERLQQ ); + euc_iso( \$octet ); + return $octet; +} + +use Encode::CJKConstants qw(:all); + +# ISO<->EUC + +sub iso_euc { + my $r_str = shift; + $$r_str =~ s/$RE{'2022_KR'}//gox; # remove the designator + $$r_str =~ s{ # replace characters in GL + \x0e # between SO(\x0e) and SI(\x0f) + ([^\x0f]*) # with characters in GR + \x0f + } + { + my $out= $1; + $out =~ tr/\x21-\x7e/\xa1-\xfe/; + $out; + }geox; + my ($residue) = ( $$r_str =~ s/(\e.*)$//so ); + return $residue; +} + +sub euc_iso { + no warnings qw(uninitialized); + my $r_str = shift; + substr( $$r_str, 0, 0 ) = + $ESC{'2022_KR'}; # put the designator at the beg. + $$r_str =~ + s{ # move KS X 1001 characters in GR to GL + ($RE{EUC_C}+) # and enclose them with SO and SI + }{ + my $str = $1; + $str =~ tr/\xA1-\xFE/\x21-\x7E/; + "\x0e" . $str . "\x0f"; + }geox; + $$r_str; +} + +1; +__END__ + +=head1 NAME + +Encode::KR::2022_KR -- internally used by Encode::KR + +=cut diff --git a/lib/Encode/MIME/Header.pm b/lib/Encode/MIME/Header.pm new file mode 100644 index 0000000..848de99 --- /dev/null +++ b/lib/Encode/MIME/Header.pm @@ -0,0 +1,427 @@ +package Encode::MIME::Header; +use strict; +use warnings; + +our $VERSION = do { my @r = ( q$Revision: 2.28 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +use Carp (); +use Encode (); +use MIME::Base64 (); + +my %seed = ( + decode_b => 1, # decodes 'B' encoding ? + decode_q => 1, # decodes 'Q' encoding ? + encode => 'B', # encode with 'B' or 'Q' ? + charset => 'UTF-8', # encode charset + bpl => 75, # bytes per line +); + +my @objs; + +push @objs, bless { + %seed, + Name => 'MIME-Header', +} => __PACKAGE__; + +push @objs, bless { + %seed, + decode_q => 0, + Name => 'MIME-B', +} => __PACKAGE__; + +push @objs, bless { + %seed, + decode_b => 0, + encode => 'Q', + Name => 'MIME-Q', +} => __PACKAGE__; + +Encode::define_encoding($_, $_->{Name}) foreach @objs; + +use parent qw(Encode::Encoding); + +sub needs_lines { 1 } +sub perlio_ok { 0 } + +# RFC 2047 and RFC 2231 grammar +my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/; +my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/; +my $re_encoding = qr/[QqBb]/; +my $re_encoded_text = qr/[^\?]*/; +my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/; +my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/; +my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/; + +# in strict mode check also for valid base64 characters and also for valid quoted printable codes +my $re_encoding_strict_b = qr/[Bb]/; +my $re_encoding_strict_q = qr/[Qq]/; +my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/; +my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB +my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/; +my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/; + +my $re_newline = qr/(?:\r\n|[\r\n])/; + +# in strict mode encoded words must be always separated by spaces or tabs (or folded newline) +# except in comments when separator between words and comment round brackets can be omitted +my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/; +my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/; +my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/; + +my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/; +my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/; + +my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/; +my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/; + +our $STRICT_DECODE = 0; + +sub decode($$;$) { + my ($obj, $str, $chk) = @_; + return undef unless defined $str; + + my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match; + my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture; + + my $stop = 0; + my $output = substr($str, 0, 0); # to propagate taintedness + + # decode each line separately, match whole continuous folded line at one call + 1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{ + + my $line = $1; + my $sep = defined $2 ? $2 : ''; + + $stop = 1 unless length($line) or length($sep); + + # NOTE: this code partially could break $chk support + # in non strict mode concat consecutive encoded mime words with same charset, language and encoding + # fixes breaking inside multi-byte characters + 1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so; + + # process sequence of encoded MIME words at once + 1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{ + + my $begin = $1 . $2; + my $words = $3; + + $begin =~ tr/\r\n//d; + $output .= $begin; + + # decode one MIME word + 1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{ + + $output .= $1; + my $orig = $2; + my $charset = $3; + my ($mime_enc, $text) = split /\?/, $5; + + $text =~ tr/\r\n//d; + + my $enc = Encode::find_mime_encoding($charset); + + # in non strict mode allow also perl encoding aliases + if ( not defined $enc and not $STRICT_DECODE ) { + # make sure that decoded string will be always strict UTF-8 + $charset = 'UTF-8' if lc($charset) eq 'utf8'; + $enc = Encode::find_encoding($charset); + } + + if ( not defined $enc ) { + Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR; + Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR; + $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR; + $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace + $stop ? $orig : ''; + } else { + if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) { + my $decoded = _decode_b($enc, $text, $chk); + $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR; + $output .= (defined $decoded ? $decoded : $text) unless $stop; + $stop ? $orig : ''; + } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) { + my $decoded = _decode_q($enc, $text, $chk); + $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR; + $output .= (defined $decoded ? $decoded : $text) unless $stop; + $stop ? $orig : ''; + } else { + Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR; + Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR; + $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR; + $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace + $stop ? $orig : ''; + } + } + + }se; + + if ( not $stop ) { + $output .= $words; + $words = ''; + } + + $words; + + }se; + + if ( not $stop ) { + $line =~ tr/\r\n//d; + $output .= $line . $sep; + $line = ''; + $sep = ''; + } + + $line . $sep; + + }se; + + $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); + return $output; +} + +sub _decode_b { + my ($enc, $text, $chk) = @_; + # MIME::Base64::decode ignores everything after a '=' padding character + # in non strict mode split string after each sequence of padding characters and decode each substring + my $octets = $STRICT_DECODE ? + MIME::Base64::decode($text) : + join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text); + return _decode_octets($enc, $octets, $chk); +} + +sub _decode_q { + my ($enc, $text, $chk) = @_; + $text =~ s/_/ /go; + $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego; + return _decode_octets($enc, $text, $chk); +} + +sub _decode_octets { + my ($enc, $octets, $chk) = @_; + $chk = 0 unless defined $chk; + $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk; + my $output = $enc->decode($octets, $chk); + return undef if not ref $chk and $chk and $octets ne ''; + return $output; +} + +sub encode($$;$) { + my ($obj, $str, $chk) = @_; + return undef unless defined $str; + my $output = $obj->_fold_line($obj->_encode_string($str, $chk)); + $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); + return $output . substr($str, 0, 0); # to propagate taintedness +} + +sub _fold_line { + my ($obj, $line) = @_; + my $bpl = $obj->{bpl}; + my $output = ''; + + while ( length($line) ) { + if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) { + $output .= $1; + $output .= "\r\n" . $2 if length($line); + } elsif ( $line =~ s/(\s)(.*)$// ) { + $output .= $line; + $line = $2; + $output .= "\r\n" . $1 if length($line); + } else { + $output .= $line; + last; + } + } + + return $output; +} + +sub _encode_string { + my ($obj, $str, $chk) = @_; + my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl}; + my $enc = Encode::find_mime_encoding($obj->{charset}); + my $enc_chk = $chk; + $enc_chk = 0 unless defined $enc_chk; + $enc_chk |= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk; + my @result = (); + my $octets = ''; + while ( length( my $chr = substr($str, 0, 1, '') ) ) { + my $seq = $enc->encode($chr, $enc_chk); + if ( not length($seq) ) { + substr($str, 0, 0, $chr); + last; + } + if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) { + push @result, $obj->_encode_word($octets); + $octets = ''; + } + $octets .= $seq; + } + length($octets) and push @result, $obj->_encode_word($octets); + $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); + return join(' ', @result); +} + +sub _encode_word { + my ($obj, $octets) = @_; + my $charset = $obj->{charset}; + my $encode = $obj->{encode}; + my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets); + return "=?$charset?$encode?$text?="; +} + +sub _encoded_word_len { + my ($obj, $octets) = @_; + my $charset = $obj->{charset}; + my $encode = $obj->{encode}; + my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets); + return length("=?$charset?$encode??=") + $text_len; +} + +sub _encode_b { + my ($octets) = @_; + return MIME::Base64::encode($octets, ''); +} + +sub _encoded_b_len { + my ($octets) = @_; + return ( length($octets) + 2 ) / 3 * 4; +} + +my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/; + +sub _encode_q { + my ($octets) = @_; + $octets =~ s{($re_invalid_q_char)}{ + join('', map { sprintf('=%02X', $_) } unpack('C*', $1)) + }egox; + $octets =~ s/ /_/go; + return $octets; +} + +sub _encoded_q_len { + my ($octets) = @_; + my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo; + return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count ); +} + +1; +__END__ + +=head1 NAME + +Encode::MIME::Header -- MIME encoding for an unstructured email header + +=head1 SYNOPSIS + + use Encode qw(encode decode); + + my $mime_str = encode("MIME-Header", "Sample:Text \N{U+263A}"); + # $mime_str is "=?UTF-8?B?U2FtcGxlOlRleHQg4pi6?=" + + my $mime_q_str = encode("MIME-Q", "Sample:Text \N{U+263A}"); + # $mime_q_str is "=?UTF-8?Q?Sample=3AText_=E2=98=BA?=" + + my $str = decode("MIME-Header", + "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n " . + "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" + ); + # $str is "If you can read this you understand the example." + + use Encode qw(decode :fallbacks); + use Encode::MIME::Header; + local $Encode::MIME::Header::STRICT_DECODE = 1; + my $strict_string = decode("MIME-Header", $mime_string, FB_CROAK); + # use strict decoding and croak on errors + +=head1 ABSTRACT + +This module implements L MIME +encoding for an unstructured field body of the email header. It can also be +used for L 'text' token. However, +it cannot be used directly for the whole header with the field name or for the +structured header fields like From, To, Cc, Message-Id, etc... There are 3 +encoding names supported by this module: C, C and +C. + +=head1 DESCRIPTION + +Decode method takes an unstructured field body of the email header (or +L 'text' token) as its input and +decodes each MIME encoded-word from input string to a sequence of bytes +according to L and +L. Subsequently, each sequence +of bytes with the corresponding MIME charset is decoded with +L and finally, one output string is returned. Text +parts of the input string which do not contain MIME encoded-word stay +unmodified in the output string. Folded newlines between two consecutive MIME +encoded-words are discarded, others are preserved in the output string. +C can decode Base64 variant, C can decode Quoted-Printable +variant and C can decode both of them. If L +does not support particular MIME charset or chosen variant then an action based +on L is performed (by default, the +MIME encoded-word is not decoded). + +Encode method takes a scalar string as its input and uses +L encoder for encoding it to UTF-8 +bytes. Then a sequence of UTF-8 bytes is encoded into MIME encoded-words +(C and C use a Base64 variant while C uses a +Quoted-Printable variant) where each MIME encoded-word is limited to 75 +characters. MIME encoded-words are separated by C and joined to +one output string. Output string is suitable for unstructured field body of +the email header. + +Both encode and decode methods propagate +L when encoding and decoding the +MIME charset. + +=head1 BUGS + +Versions prior to 2.22 (part of Encode 2.83) have a malfunctioning decoder +and encoder. The MIME encoder infamously inserted additional spaces or +discarded white spaces between consecutive MIME encoded-words, which led to +invalid MIME headers produced by this module. The MIME decoder had a tendency +to discard white spaces, incorrectly interpret data or attempt to decode Base64 +MIME encoded-words as Quoted-Printable. These problems were fixed in version +2.22. It is highly recommended not to use any version prior 2.22! + +Versions prior to 2.24 (part of Encode 2.87) ignored +L. The MIME encoder used +L encoder for input Unicode +strings which could lead to invalid UTF-8 sequences. MIME decoder used also +L decoder and additionally +called the decode method with a C flag (thus user-specified +L were ignored). Moreover, it +automatically croaked when a MIME encoded-word contained unknown encoding. +Since version 2.24, this module uses +L encoder and decoder. And +L are correctly propagated. + +Since version 2.22 (part of Encode 2.83), the MIME encoder should be fully +compliant to L and +L. Due to the aforementioned +bugs in previous versions of the MIME encoder, there is a I +compatible mode for the MIME decoder which is used by default. It should be +able to decode MIME encoded-words encoded by pre 2.22 versions of this module. +However, note that this is not correct according to +L. + +In default I mode the MIME decoder attempts to decode every substring +which looks like a MIME encoded-word. Therefore, the MIME encoded-words do not +need to be separated by white space. To enforce a correct I mode, set +variable C<$Encode::MIME::Header::STRICT_DECODE> to 1 e.g. by localizing: + + use Encode::MIME::Header; + local $Encode::MIME::Header::STRICT_DECODE = 1; + +=head1 AUTHORS + +Pali Epali@cpan.orgE + +=head1 SEE ALSO + +L, +L, +L, +L + +=cut diff --git a/lib/Encode/MIME/Header/ISO_2022_JP.pm b/lib/Encode/MIME/Header/ISO_2022_JP.pm new file mode 100644 index 0000000..dc1e427 --- /dev/null +++ b/lib/Encode/MIME/Header/ISO_2022_JP.pm @@ -0,0 +1,133 @@ +package Encode::MIME::Header::ISO_2022_JP; + +use strict; +use warnings; + +use parent qw(Encode::MIME::Header); + +my $obj = + bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => + __PACKAGE__; +Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP'); + +use constant HEAD => '=?ISO-2022-JP?B?'; +use constant TAIL => '?='; + +use Encode::CJKConstants qw(%RE); + +our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +# I owe the below codes totally to +# Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64 + +sub encode { + my $self = shift; + my $str = shift; + return undef unless defined $str; + + utf8::encode($str) if ( Encode::is_utf8($str) ); + Encode::from_to( $str, 'utf8', 'euc-jp' ); + + my ($trailing_crlf) = ( $str =~ /(\n|\r|\x0d\x0a)$/o ); + + $str = _mime_unstructured_header( $str, $self->{bpl} ); + + not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o; + + return $str; +} + +sub _mime_unstructured_header { + my ( $oldheader, $bpl ) = @_; + my $crlf = $oldheader =~ /\n$/; + my ( $header, @words, @wordstmp, $i ) = (''); + + $oldheader =~ s/\s+$//; + + @wordstmp = split /\s+/, $oldheader; + + for ( $i = 0 ; $i < $#wordstmp ; $i++ ) { + if ( $wordstmp[$i] !~ /^[\x21-\x7E]+$/ + and $wordstmp[ $i + 1 ] !~ /^[\x21-\x7E]+$/ ) + { + $wordstmp[ $i + 1 ] = "$wordstmp[$i] $wordstmp[$i + 1]"; + } + else { + push( @words, $wordstmp[$i] ); + } + } + + push( @words, $wordstmp[-1] ); + + for my $word (@words) { + if ( $word =~ /^[\x21-\x7E]+$/ ) { + $header =~ /(?:.*\n)*(.*)/; + if ( length($1) + length($word) > $bpl ) { + $header .= "\n $word"; + } + else { + $header .= $word; + } + } + else { + $header = _add_encoded_word( $word, $header, $bpl ); + } + + $header =~ /(?:.*\n)*(.*)/; + + if ( length($1) == $bpl ) { + $header .= "\n "; + } + else { + $header .= ' '; + } + } + + $header =~ s/\n? $//mg; + + $crlf ? "$header\n" : $header; +} + +sub _add_encoded_word { + my ( $str, $line, $bpl ) = @_; + my $result = ''; + + while ( length($str) ) { + my $target = $str; + $str = ''; + + if ( + length($line) + 22 + + ( $target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o ) * 8 > $bpl ) + { + $line =~ s/[ \t\n\r]*$/\n/; + $result .= $line; + $line = ' '; + } + + while (1) { + my $iso_2022_jp = $target; + Encode::from_to( $iso_2022_jp, 'euc-jp', 'iso-2022-jp' ); + + my $encoded = + HEAD . MIME::Base64::encode_base64( $iso_2022_jp, '' ) . TAIL; + + if ( length($encoded) + length($line) > $bpl ) { + $target =~ + s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o; + $str = $1 . $str; + } + else { + $line .= $encoded; + last; + } + } + + } + + $result . $line; +} + +1; +__END__ + diff --git a/lib/Encode/MIME/Name.pm b/lib/Encode/MIME/Name.pm new file mode 100644 index 0000000..72ec79a --- /dev/null +++ b/lib/Encode/MIME/Name.pm @@ -0,0 +1,103 @@ +package Encode::MIME::Name; +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 1.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +# NOTE: This table must be 1:1 mapping +our %MIME_NAME_OF = ( + 'AdobeStandardEncoding' => 'Adobe-Standard-Encoding', + 'AdobeSymbol' => 'Adobe-Symbol-Encoding', + 'ascii' => 'US-ASCII', + 'big5-hkscs' => 'Big5-HKSCS', + 'cp1026' => 'IBM1026', + 'cp1047' => 'IBM1047', + 'cp1250' => 'windows-1250', + 'cp1251' => 'windows-1251', + 'cp1252' => 'windows-1252', + 'cp1253' => 'windows-1253', + 'cp1254' => 'windows-1254', + 'cp1255' => 'windows-1255', + 'cp1256' => 'windows-1256', + 'cp1257' => 'windows-1257', + 'cp1258' => 'windows-1258', + 'cp37' => 'IBM037', + 'cp424' => 'IBM424', + 'cp437' => 'IBM437', + 'cp500' => 'IBM500', + 'cp775' => 'IBM775', + 'cp850' => 'IBM850', + 'cp852' => 'IBM852', + 'cp855' => 'IBM855', + 'cp857' => 'IBM857', + 'cp860' => 'IBM860', + 'cp861' => 'IBM861', + 'cp862' => 'IBM862', + 'cp863' => 'IBM863', + 'cp864' => 'IBM864', + 'cp865' => 'IBM865', + 'cp866' => 'IBM866', + 'cp869' => 'IBM869', + 'cp936' => 'GBK', + 'euc-cn' => 'EUC-CN', + 'euc-jp' => 'EUC-JP', + 'euc-kr' => 'EUC-KR', + #'gb2312-raw' => 'GB2312', # no, you're wrong, I18N::Charset + 'hp-roman8' => 'hp-roman8', + 'hz' => 'HZ-GB-2312', + 'iso-2022-jp' => 'ISO-2022-JP', + 'iso-2022-jp-1' => 'ISO-2022-JP-1', + 'iso-2022-kr' => 'ISO-2022-KR', + 'iso-8859-1' => 'ISO-8859-1', + 'iso-8859-10' => 'ISO-8859-10', + 'iso-8859-13' => 'ISO-8859-13', + 'iso-8859-14' => 'ISO-8859-14', + 'iso-8859-15' => 'ISO-8859-15', + 'iso-8859-16' => 'ISO-8859-16', + 'iso-8859-2' => 'ISO-8859-2', + 'iso-8859-3' => 'ISO-8859-3', + 'iso-8859-4' => 'ISO-8859-4', + 'iso-8859-5' => 'ISO-8859-5', + 'iso-8859-6' => 'ISO-8859-6', + 'iso-8859-7' => 'ISO-8859-7', + 'iso-8859-8' => 'ISO-8859-8', + 'iso-8859-9' => 'ISO-8859-9', + #'jis0201-raw' => 'JIS_X0201', + #'jis0208-raw' => 'JIS_C6226-1983', + #'jis0212-raw' => 'JIS_X0212-1990', + 'koi8-r' => 'KOI8-R', + 'koi8-u' => 'KOI8-U', + #'ksc5601-raw' => 'KS_C_5601-1987', + 'shiftjis' => 'Shift_JIS', + 'UTF-16' => 'UTF-16', + 'UTF-16BE' => 'UTF-16BE', + 'UTF-16LE' => 'UTF-16LE', + 'UTF-32' => 'UTF-32', + 'UTF-32BE' => 'UTF-32BE', + 'UTF-32LE' => 'UTF-32LE', + 'UTF-7' => 'UTF-7', + 'utf-8-strict' => 'UTF-8', + 'viscii' => 'VISCII', +); + +# NOTE: %MIME_NAME_OF is still 1:1 mapping +our %ENCODE_NAME_OF = map { uc $MIME_NAME_OF{$_} => $_ } keys %MIME_NAME_OF; + +# Add additional 1:N mapping +$MIME_NAME_OF{'utf8'} = 'UTF-8'; + +sub get_mime_name($) { $MIME_NAME_OF{$_[0]} }; + +sub get_encode_name($) { $ENCODE_NAME_OF{uc $_[0]} }; + +1; +__END__ + +=head1 NAME + +Encode::MIME::NAME -- internally used by Encode + +=head1 SEE ALSO + +L + +=cut diff --git a/lib/Encode/PerlIO.pod b/lib/Encode/PerlIO.pod new file mode 100644 index 0000000..1a9269a --- /dev/null +++ b/lib/Encode/PerlIO.pod @@ -0,0 +1,167 @@ +=head1 NAME + +Encode::PerlIO -- a detailed document on Encode and PerlIO + +=head1 Overview + +It is very common to want to do encoding transformations when +reading or writing files, network connections, pipes etc. +If Perl is configured to use the new 'perlio' IO system then +C provides a "layer" (see L) which can transform +data as it is read or written. + +Here is how the blind poet would modernise the encoding: + + use Encode; + open(my $iliad,'<:encoding(iso-8859-7)','iliad.greek'); + open(my $utf8,'>:utf8','iliad.utf8'); + my @epic = <$iliad>; + print $utf8 @epic; + close($utf8); + close($illiad); + +In addition, the new IO system can also be configured to read/write +UTF-8 encoded characters (as noted above, this is efficient): + + open(my $fh,'>:utf8','anything'); + print $fh "Any \x{0021} string \N{SMILEY FACE}\n"; + +Either of the above forms of "layer" specifications can be made the default +for a lexical scope with the C pragma. See L. + +Once a handle is open, its layers can be altered using C. + +Without any such configuration, or if Perl itself is built using the +system's own IO, then write operations assume that the file handle +accepts only I and will C if a character larger than 255 is +written to the handle. When reading, each octet from the handle becomes +a byte-in-a-character. Note that this default is the same behaviour +as bytes-only languages (including Perl before v5.6) would have, +and is sufficient to handle native 8-bit encodings e.g. iso-8859-1, +EBCDIC etc. and any legacy mechanisms for handling other encodings +and binary data. + +In other cases, it is the program's responsibility to transform +characters into bytes using the API above before doing writes, and to +transform the bytes read from a handle into characters before doing +"character operations" (e.g. C, C, ...). + +You can also use PerlIO to convert larger amounts of data you don't +want to bring into memory. For example, to convert between ISO-8859-1 +(Latin 1) and UTF-8 (or UTF-EBCDIC in EBCDIC machines): + + open(F, "<:encoding(iso-8859-1)", "data.txt") or die $!; + open(G, ">:utf8", "data.utf") or die $!; + while () { print G } + + # Could also do "print G " but that would pull + # the whole file into memory just to write it out again. + +More examples: + + open(my $f, "<:encoding(cp1252)") + open(my $g, ">:encoding(iso-8859-2)") + open(my $h, ">:encoding(latin9)") # iso-8859-15 + +See also L for how to change the default encoding of the +data in your script. + +=head1 How does it work? + +Here is a crude diagram of how filehandle, PerlIO, and Encode +interact. + + filehandle <-> PerlIO PerlIO <-> scalar (read/printed) + \ / + Encode + +When PerlIO receives data from either direction, it fills a buffer +(currently with 1024 bytes) and passes the buffer to Encode. +Encode tries to convert the valid part and passes it back to PerlIO, +leaving invalid parts (usually a partial character) in the buffer. +PerlIO then appends more data to the buffer, calls Encode again, +and so on until the data stream ends. + +To do so, PerlIO always calls (de|en)code methods with CHECK set to 1. +This ensures that the method stops at the right place when it +encounters partial character. The following is what happens when +PerlIO and Encode tries to encode (from utf8) more than 1024 bytes +and the buffer boundary happens to be in the middle of a character. + + A B C .... ~ \x{3000} .... + 41 42 43 .... 7E e3 80 80 .... + <- buffer ---------------> + << encoded >>>>>>>>>> + <- next buffer ------ + +Encode converts from the beginning to \x7E, leaving \xe3 in the buffer +because it is invalid (partial character). + +Unfortunately, this scheme does not work well with escape-based +encodings such as ISO-2022-JP. + +=head1 Line Buffering + +Now let's see what happens when you try to decode from ISO-2022-JP and +the buffer ends in the middle of a character. + + JIS208-ESC \x{5f3e} + A B C .... ~ \e $ B |DAN | .... + 41 42 43 .... 7E 1b 24 41 43 46 .... + <- buffer ---------------------------> + << encoded >>>>>>>>>>>>>>>>>>>>>>> + +As you see, the next buffer begins with \x43. But \x43 is 'C' in +ASCII, which is wrong in this case because we are now in JISX 0208 +area so it has to convert \x43\x46, not \x43. Unlike utf8 and EUC, +in escape-based encodings you can't tell if a given octet is a whole +character or just part of it. + +Fortunately PerlIO also supports line buffer if you tell PerlIO to use +one instead of fixed buffer. Since ISO-2022-JP is guaranteed to revert to ASCII at the end of the line, partial +character will never happen when line buffer is used. + +To tell PerlIO to use line buffer, implement -Eneeds_lines method +for your encoding object. See L for details. + +Thanks to these efforts most encodings that come with Encode support +PerlIO but that still leaves following encodings. + + iso-2022-kr + MIME-B + MIME-Header + MIME-Q + +Fortunately iso-2022-kr is hardly used (according to Jungshik) and +MIME-* are very unlikely to be fed to PerlIO because they are for mail +headers. See L for details. + +=head2 How can I tell whether my encoding fully supports PerlIO ? + +As of this writing, any encoding whose class belongs to Encode::XS and +Encode::Unicode works. The Encode module has a C method +which you can use before applying PerlIO encoding to the filehandle. +Here is an example: + + my $use_perlio = perlio_ok($enc); + my $layer = $use_perlio ? "<:raw" : "<:encoding($enc)"; + open my $fh, $layer, $file or die "$file : $!"; + while(<$fh>){ + $_ = decode($enc, $_) unless $use_perlio; + # .... + } + +=head1 SEE ALSO + +L, +L, +L, +L, +L, +L, +L, +L, +the Perl Unicode Mailing List Eperl-unicode@perl.orgE + +=cut + diff --git a/lib/Encode/Supported.pod b/lib/Encode/Supported.pod new file mode 100644 index 0000000..b23f6ca --- /dev/null +++ b/lib/Encode/Supported.pod @@ -0,0 +1,901 @@ +=head1 NAME + +Encode::Supported -- Encodings supported by Encode + +=head1 DESCRIPTION + +=head2 Encoding Names + +Encoding names are case insensitive. White space in names +is ignored. In addition, an encoding may have aliases. +Each encoding has one "canonical" name. The "canonical" +name is chosen from the names of the encoding by picking +the first in the following sequence (with a few exceptions). + +=over 2 + +=item * + +The name used by the Perl community. That includes 'utf8' and 'ascii'. +Unlike aliases, canonical names directly reach the method so such +frequently used words like 'utf8' don't need to do alias lookups. + +=item * + +The MIME name as defined in IETF RFCs. This includes all "iso-"s. + +=item * + +The name in the IANA registry. + +=item * + +The name used by the organization that defined it. + +=back + +In case I canonical names differ from that of the Encode +module, they are always aliased if it ever be implemented. So you can +safely tell if a given encoding is implemented or not just by passing +the canonical name. + +Because of all the alias issues, and because in the general case +encodings have state, "Encode" uses an encoding object internally +once an operation is in progress. + +=head1 Supported Encodings + +As of Perl 5.8.0, at least the following encodings are recognized. +Note that unless otherwise specified, they are all case insensitive +(via alias) and all occurrence of spaces are replaced with '-'. +In other words, "ISO 8859 1" and "iso-8859-1" are identical. + +Encodings are categorized and implemented in several different modules +but you don't have to C to make them available for +most cases. Encode.pm will automatically load those modules on demand. + +=head2 Built-in Encodings + +The following encodings are always available. + + Canonical Aliases Comments & References + ---------------------------------------------------------------- + ascii US-ascii ISO-646-US [ECMA] + ascii-ctrl Special Encoding + iso-8859-1 latin1 [ISO] + null Special Encoding + utf8 UTF-8 [RFC2279] + ---------------------------------------------------------------- + +I and I are special. "null" fails for all character +so when you set fallback mode to PERLQQ, HTMLCREF or XMLCREF, ALL +CHARACTERS will fall back to character references. Ditto for +"ascii-ctrl" except for control characters. For fallback modes, see +L. + +=head2 Encode::Unicode -- other Unicode encodings + +Unicode coding schemes other than native utf8 are supported by +Encode::Unicode, which will be autoloaded on demand. + + ---------------------------------------------------------------- + UCS-2BE UCS-2, iso-10646-1 [IANA, UC] + UCS-2LE [UC] + UTF-16 [UC] + UTF-16BE [UC] + UTF-16LE [UC] + UTF-32 [UC] + UTF-32BE UCS-4 [UC] + UTF-32LE [UC] + UTF-7 [RFC2152] + ---------------------------------------------------------------- + +To find how (UCS-2|UTF-(16|32))(LE|BE)? differ from one another, +see L. + +UTF-7 is a special encoding which "re-encodes" UTF-16BE into a 7-bit +encoding. It is implemented separately by Encode::Unicode::UTF7. + +=head2 Encode::Byte -- Extended ASCII + +Encode::Byte implements most single-byte encodings except for +Symbols and EBCDIC. The following encodings are based on single-byte +encodings implemented as extended ASCII. Most of them map +\x80-\xff (upper half) to non-ASCII characters. + +=over 2 + +=item ISO-8859 and corresponding vendor mappings + +Since there are so many, they are presented in table format with +languages and corresponding encoding names by vendors. Note that +the table is sorted in order of ISO-8859 and the corresponding vendor +mappings are slightly different from that of ISO. See +L for details. + + Lang/Regions ISO/Other Std. DOS Windows Macintosh Others + ---------------------------------------------------------------- + N. America (ASCII) cp437 AdobeStandardEncoding + cp863 (DOSCanadaF) + W. Europe iso-8859-1 cp850 cp1252 MacRoman nextstep + hp-roman8 + cp860 (DOSPortuguese) + Cntrl. Europe iso-8859-2 cp852 cp1250 MacCentralEurRoman + MacCroatian + MacRomanian + MacRumanian + Latin3[1] iso-8859-3 + Latin4[2] iso-8859-4 + Cyrillics iso-8859-5 cp855 cp1251 MacCyrillic + (See also next section) cp866 MacUkrainian + Arabic iso-8859-6 cp864 cp1256 MacArabic + cp1006 MacFarsi + Greek iso-8859-7 cp737 cp1253 MacGreek + cp869 (DOSGreek2) + Hebrew iso-8859-8 cp862 cp1255 MacHebrew + Turkish iso-8859-9 cp857 cp1254 MacTurkish + Nordics iso-8859-10 cp865 + cp861 MacIcelandic + MacSami + Thai iso-8859-11[3] cp874 MacThai + (iso-8859-12 is nonexistent. Reserved for Indics?) + Baltics iso-8859-13 cp775 cp1257 + Celtics iso-8859-14 + Latin9 [4] iso-8859-15 + Latin10 iso-8859-16 + Vietnamese viscii cp1258 MacVietnamese + ---------------------------------------------------------------- + + [1] Esperanto, Maltese, and Turkish. Turkish is now on 8859-9. + [2] Baltics. Now on 8859-10, except for Latvian. + [3] TIS 620 + Non-Breaking Space (0xA0 / U+00A0) + [4] Nicknamed Latin0; the Euro sign as well as French and Finnish + letters that are missing from 8859-1 were added. + +All cp* are also available as ibm-*, ms-*, and windows-* . See also +L. + +Macintosh encodings don't seem to be registered in such entities as +IANA. "Canonical" names in Encode are based upon Apple's Tech Note +1150. See L +for details. + +=item KOI8 - De Facto Standard for the Cyrillic world + +Though ISO-8859 does have ISO-8859-5, the KOI8 series is far more +popular in the Net. L comes with the following KOI charsets. +For gory details, see L + + ---------------------------------------------------------------- + koi8-f + koi8-r cp878 [RFC1489] + koi8-u [RFC2319] + ---------------------------------------------------------------- + +=back + +=head2 gsm0338 - Hentai Latin 1 + +GSM0338 is for GSM handsets. Though it shares alphanumerals with +ASCII, control character ranges and other parts are mapped very +differently, mainly to store Greek characters. There are also escape +sequences (starting with 0x1B) to cover e.g. the Euro sign. + +This was once handled by L but because of all those +unusual specifications, Encode 2.20 has relocated the support to +L. See L for details. + +=over 2 + +=item gsm0338 support before 2.19 + +Some special cases like a trailing 0x00 byte or a lone 0x1B byte are not +well-defined and decode() will return an empty string for them. +One possible workaround is + + $gsm =~ s/\x00\z/\x00\x00/; + $uni = decode("gsm0338", $gsm); + $uni .= "\xA0" if $gsm =~ /\x1B\z/; + +Note that the Encode implementation of GSM0338 does not implement the +reuse of Latin capital letters as Greek capital letters (for example, +the 0x5A is U+005A (LATIN CAPITAL LETTER Z), not U+0396 (GREEK CAPITAL +LETTER ZETA). + +The GSM0338 is also covered in Encode::Byte even though it is not +an "extended ASCII" encoding. + +=back + +=head2 CJK: Chinese, Japanese, Korean (Multibyte) + +Note that Vietnamese is listed above. Also read "Encoding vs Charset" +below. Also note that these are implemented in distinct modules by +countries, due to the size concerns (simplified Chinese is mapped +to 'CN', continental China, while traditional Chinese is mapped to +'TW', Taiwan). Please refer to their respective documentation pages. + +=over 2 + +=item Encode::CN -- Continental China + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + euc-cn [1] MacChineseSimp + (gbk) cp936 [2] + gb12345-raw { GB12345 without CES } + gb2312-raw { GB2312 without CES } + hz + iso-ir-165 + ---------------------------------------------------------------- + + [1] GB2312 is aliased to this. See L + [2] gbk is aliased to this. See L + +=item Encode::JP -- Japan + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + euc-jp + shiftjis cp932 macJapanese + 7bit-jis + iso-2022-jp [RFC1468] + iso-2022-jp-1 [RFC2237] + jis0201-raw { JIS X 0201 (roman + halfwidth kana) without CES } + jis0208-raw { JIS X 0208 (Kanji + fullwidth kana) without CES } + jis0212-raw { JIS X 0212 (Extended Kanji) without CES } + ---------------------------------------------------------------- + +=item Encode::KR -- Korea + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + euc-kr MacKorean [RFC1557] + cp949 [1] + iso-2022-kr [RFC1557] + johab [KS X 1001:1998, Annex 3] + ksc5601-raw { KSC5601 without CES } + ---------------------------------------------------------------- + + [1] ks_c_5601-1987, (x-)?windows-949, and uhc are aliased to this. + See below. + +=item Encode::TW -- Taiwan + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + big5-eten cp950 MacChineseTrad {big5 aliased to big5-eten} + big5-hkscs + ---------------------------------------------------------------- + +=item Encode::HanExtra -- More Chinese via CPAN + +Due to the size concerns, additional Chinese encodings below are +distributed separately on CPAN, under the name Encode::HanExtra. + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + big5ext CMEX's Big5e Extension + big5plus CMEX's Big5+ Extension + cccii Chinese Character Code for Information Interchange + euc-tw EUC (Extended Unix Character) + gb18030 GBK with Traditional Characters + ---------------------------------------------------------------- + +=item Encode::JIS2K -- JIS X 0213 encodings via CPAN + +Due to size concerns, additional Japanese encodings below are +distributed separately on CPAN, under the name Encode::JIS2K. + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + euc-jisx0213 + shiftjisx0123 + iso-2022-jp-3 + jis0213-1-raw + jis0213-2-raw + ---------------------------------------------------------------- + +=back + +=head2 Miscellaneous encodings + +=over 2 + +=item Encode::EBCDIC + +See L for details. + + ---------------------------------------------------------------- + cp37 + cp500 + cp875 + cp1026 + cp1047 + posix-bc + ---------------------------------------------------------------- + +=item Encode::Symbols + +For symbols and dingbats. + + ---------------------------------------------------------------- + symbol + dingbats + MacDingbats + AdobeZdingbat + AdobeSymbol + ---------------------------------------------------------------- + +=item Encode::MIME::Header + +Strictly speaking, MIME header encoding documented in RFC 2047 is more +of encapsulation than encoding. However, their support in modern +world is imperative so they are supported. + + ---------------------------------------------------------------- + MIME-Header [RFC2047] + MIME-B [RFC2047] + MIME-Q [RFC2047] + ---------------------------------------------------------------- + +=item Encode::Guess + +This one is not a name of encoding but a utility that lets you pick up +the most appropriate encoding for a data out of given I. See +L for details. + +=back + +=head1 Unsupported encodings + +The following encodings are not supported as yet; some because they +are rarely used, some because of technical difficulties. They may +be supported by external modules via CPAN in the future, however. + +=over 2 + +=item ISO-2022-JP-2 [RFC1554] + +Not very popular yet. Needs Unicode Database or equivalent to +implement encode() (because it includes JIS X 0208/0212, KSC5601, and +GB2312 simultaneously, whose code points in Unicode overlap. So you +need to lookup the database to determine to what character set a given +Unicode character should belong). + +=item ISO-2022-CN [RFC1922] + +Not very popular. Needs CNS 11643-1 and -2 which are not available in +this module. CNS 11643 is supported (via euc-tw) in Encode::HanExtra. +Audrey Tang may add support for this encoding in her module in future. + +=item Various HP-UX encodings + +The following are unsupported due to the lack of mapping data. + + '8' - arabic8, greek8, hebrew8, kana8, thai8, and turkish8 + '15' - japanese15, korean15, and roi15 + +=item Cyrillic encoding ISO-IR-111 + +Anton Tagunov doubts its usefulness. + +=item ISO-8859-8-1 [Hebrew] + +None of the Encode team knows Hebrew enough (ISO-8859-8, cp1255 and +MacHebrew are supported because and just because there were mappings +available at L). Contributions welcome. + +=item ISIRI 3342, Iran System, ISIRI 2900 [Farsi] + +Ditto. + +=item Thai encoding TCVN + +Ditto. + +=item Vietnamese encodings VPS + +Though Jungshik Shin has reported that Mozilla supports this encoding, +it was too late before 5.8.0 for us to add it. In the future, it +may be available via a separate module. See +L +and +L +if you are interested in helping us. + +=item Various Mac encodings + +The following are unsupported due to the lack of mapping data. + + MacArmenian, MacBengali, MacBurmese, MacEthiopic + MacExtArabic, MacGeorgian, MacKannada, MacKhmer + MacLaotian, MacMalayalam, MacMongolian, MacOriya + MacSinhalese, MacTamil, MacTelugu, MacTibetan + MacVietnamese + +The rest which are already available are based upon the vendor mappings +at L . + +=item (Mac) Indic encodings + +The maps for the following are available at L +but remain unsupported because those encodings need an algorithmical +approach, currently unsupported by F: + + MacDevanagari + MacGurmukhi + MacGujarati + +For details, please see C at +L . + +I believe this issue is prevalent not only for Mac Indics but also in +other Indic encodings, but the above were the only Indic encodings +maps that I could find at L . + +=back + +=head1 Encoding vs. Charset -- terminology + +We are used to using the term (character) I and I interchangeably. But just as confusing the terms byte and +character is dangerous and the terms should be differentiated when +needed, we need to differentiate I and I. + +To understand that, here is a description of how we make computers +grok our characters. + +=over 2 + +=item * + +First we start with which characters to include. We call this +collection of characters I. + +=item * + +Then we have to give each character a unique ID so your computer can +tell the difference between 'a' and 'A'. This itemized character +repertoire is now a I. + +=item * + +If your computer can grow the character set without further +processing, you can go ahead and use it. This is called a I (CCS) or I. ASCII is used this +way for most cases. + +=item * + +But in many cases, especially multi-byte CJK encodings, you have to +tweak a little more. Your network connection may not accept any data +with the Most Significant Bit set, and your computer may not be able to +tell if a given byte is a whole character or just half of it. So you +have to I the character set to use it. + +A I (CES) determines how to encode a given +character set, or a set of multiple character sets. 7bit ISO-2022 is +an example of a CES. You switch between character sets via I. + +=back + +Technically, or mathematically, speaking, a character set encoded in +such a CES that maps character by character may form a CCS. EUC is such +an example. The CES of EUC is as follows: + +=over 2 + +=item * + +Map ASCII unchanged. + +=item * + +Map such a character set that consists of 94 or 96 powered by N +members by adding 0x80 to each byte. + +=item * + +You can also use 0x8e and 0x8f to indicate that the following sequence of +characters belongs to yet another character set. To each following byte +is added the value 0x80. + +=back + +By carefully looking at the encoded byte sequence, you can find that the +byte sequence conforms a unique number. In that sense, EUC is a CCS +generated by a CES above from up to four CCS (complicated?). UTF-8 +falls into this category. See L to find out how +UTF-8 maps Unicode to a byte sequence. + +You may also have found out by now why 7bit ISO-2022 cannot comprise +a CCS. If you look at a byte sequence \x21\x21, you can't tell if +it is two !'s or IDEOGRAPHIC SPACE. EUC maps the latter to \xA1\xA1 +so you have no trouble differentiating between "!!". and S<" ">. + +=head1 Encoding Classification (by Anton Tagunov and Dan Kogai) + +This section tries to classify the supported encodings by their +applicability for information exchange over the Internet and to +choose the most suitable aliases to name them in the context of +such communication. + +=over 2 + +=item * + +To (en|de)code encodings marked by C<(**)>, you need +C, available from CPAN. + +=back + +Encoding names + + US-ASCII UTF-8 ISO-8859-* KOI8-R + Shift_JIS EUC-JP ISO-2022-JP ISO-2022-JP-1 + EUC-KR Big5 GB2312 + +are registered with IANA as preferred MIME names and may +be used over the Internet. + +C has been officialized by JIS X 0208:1997. +L gives details. + +C is the IANA name for C. +See L for details. + +C I encoding is available as C +with Encode. See L for details. + + EUC-CN + KOI8-U [RFC2319] + +have not been registered with IANA (as of March 2002) but +seem to be supported by major web browsers. +The IANA name for C is C. + + KS_C_5601-1987 + +is heavily misused. +See L for details. + +C I encoding is available as C +with Encode. See L for details. + + UTF-16 UTF-16BE UTF-16LE + +are IANA-registered Cs. See [RFC 2781] for details. +Jungshik Shin reports that UTF-16 with a BOM is well accepted +by MS IE 5/6 and NS 4/6. Beware however that + +=over 2 + +=item * + +C support in any software you're going to be +using/interoperating with has probably been less tested +then C support + +=item * + +C coded data seamlessly passes traditional +command piping (C, C, etc.) while C coded +data is likely to cause confusion (with its zero bytes, +for example) + +=item * + +it is beyond the power of words to describe the way HTML browsers +encode non-C form data. To get a general impression, visit +L. +While encoding of form data has stabilized for C encoded pages +(at least IE 5/6, NS 6, and Opera 6 behave consistently), be sure to +expect fun (and cross-browser discrepancies) with C encoded +pages! + +=back + +The rule of thumb is to use C unless you know what +you're doing and unless you really benefit from using C. + + ISO-IR-165 [RFC1345] + VISCII + GB 12345 + GB 18030 (**) (see links below) + EUC-TW (**) + +are totally valid encodings but not registered at IANA. +The names under which they are listed here are probably the +most widely-known names for these encodings and are recommended +names. + + BIG5PLUS (**) + +is a proprietary name. + +=head2 Microsoft-related naming mess + +Microsoft products misuse the following names: + +=over 2 + +=item KS_C_5601-1987 + +Microsoft extension to C. + +Proper names: C, C, C (as used by Mozilla). + +See L +for details. + +Encode aliases C to C to reflect this common +misusage. I C encoding is available as +C. + +See L for details. + +=item GB2312 + +Microsoft extension to C. + +Proper names: C, C. + +C has been registered in the C meaning at +IANA. This has partially repaired the situation: Microsoft's +C has become a superset of the official C. + +Encode aliases C to C in full agreement with +IANA registration. C is supported separately. +I C encoding is available as C. + +See L for details. + +=item Big5 + +Microsoft extension to C. + +Proper name: C. + +Encode separately supports C and C. + +=item Shift_JIS + +Microsoft's understanding of C. + +JIS has not endorsed the full Microsoft standard however. +The official C includes only JIS X 0201 and JIS X 0208 +character sets, while Microsoft has always used C +to encode a wider character repertoire. See C registration for +C. + +As a historical predecessor, Microsoft's variant +probably has more rights for the name, though it may be objected +that Microsoft shouldn't have used JIS as part of the name +in the first place. + +Unambiguous name: C. C name (also used by Mozilla, and +provided as an alias by Encode): C. + +Encode separately supports C and C. + +=back + +=head1 Glossary + +=over 2 + +=item character repertoire + +A collection of unique characters. A I set in the strictest +sense. At this stage, characters are not numbered. + +=item coded character set (CCS) + +A character set that is mapped in a way computers can use directly. +Many character encodings, including EUC, fall in this category. + +=item character encoding scheme (CES) + +An algorithm to map a character set to a byte sequence. You don't +have to be able to tell which character set a given byte sequence +belongs. 7-bit ISO-2022 is a CES but it cannot be a CCS. EUC is an +example of being both a CCS and CES. + +=item charset (in MIME context) + +has long been used in the meaning of C, CES. + +While the word combination C has lost this meaning +in MIME context since [RFC 2130], the C abbreviation has +retained it. This is how [RFC 2277] and [RFC 2278] bless C: + + This document uses the term "charset" to mean a set of rules for + mapping from a sequence of octets to a sequence of characters, such + as the combination of a coded character set and a character encoding + scheme; this is also what is used as an identifier in MIME "charset=" + parameters, and registered in the IANA charset registry ... (Note + that this is NOT a term used by other standards bodies, such as ISO). + [RFC 2277] + +=item EUC + +Extended Unix Character. See ISO-2022. + +=item ISO-2022 + +A CES that was carefully designed to coexist with ASCII. There are a 7 +bit version and an 8 bit version. + +The 7 bit version switches character set via escape sequence so it +cannot form a CCS. Since this is more difficult to handle in programs +than the 8 bit version, the 7 bit version is not very popular except for +iso-2022-jp, the I standard CES for e-mails. + +The 8 bit version can form a CCS. EUC and ISO-8859 are two examples +thereof. Pre-5.6 perl could use them as string literals. + +=item UCS + +Short for I. When you say just UCS, it means +I. + +=item UCS-2 + +ISO/IEC 10646 encoding form: Universal Character Set coded in two +octets. + +=item Unicode + +A character set that aims to include all character repertoires of the +world. Many character sets in various national as well as industrial +standards have become, in a way, just subsets of Unicode. + +=item UTF + +Short for I. Determines how to map a +Unicode character into a byte sequence. + +=item UTF-16 + +A UTF in 16-bit encoding. Can either be in big endian or little +endian. The big endian version is called UTF-16BE (equal to UCS-2 + +surrogate support) and the little endian version is called UTF-16LE. + +=back + +=head1 See Also + +L, +L, +L, L, L, L, +L, L +L, L + +=head1 References + +=over 2 + +=item ECMA + +European Computer Manufacturers Association +L + +=over 2 + +=item ECMA-035 (eq C) + +L + +The specification of ISO-2022 is available from the link above. + +=back + +=item IANA + +Internet Assigned Numbers Authority +L + +=over 2 + +=item Assigned Charset Names by IANA + +L + +Most of the C in Encode derive from this list +so you can directly apply the string you have extracted from MIME +header of mails and web pages. + +=back + +=item ISO + +International Organization for Standardization +L + +=item RFC + +Request For Comments -- need I say more? +L, L, +L + +=item UC + +Unicode Consortium +L + +=over 2 + +=item Unicode Glossary + +L + +The glossary of this document is based upon this site. + +=back + +=back + +=head2 Other Notable Sites + +=over 2 + +=item czyborra.com + +L + +Contains a lot of useful information, especially gory details of ISO +vs. vendor mappings. + +=item CJK.inf + +L + +Somewhat obsolete (last update in 1996), but still useful. Also try + +L + +You will find brief info on C, C and mostly on C. + +=item Jungshik Shin's Hangul FAQ + +L + +And especially its subject 8. + +L + +A comprehensive overview of the Korean (C) standards. + +=item debian.org: "Introduction to i18n" + +A brief description for most of the mentioned CJK encodings is +contained in +L + +=back + +=head2 Offline sources + +=over 2 + +=item C by Ken Lunde + +CJKV Information Processing +1999 O'Reilly & Associates, ISBN : 1-56592-224-7 + +The modern successor of C. + +Features a comprehensive coverage of CJKV character sets and +encodings along with many other issues faced by anyone trying +to better support CJKV languages/scripts in all the areas of +information processing. + +To purchase this book, visit +L +or your favourite bookstore. + +=back + +=cut diff --git a/lib/Encode/Unicode/UTF7.pm b/lib/Encode/Unicode/UTF7.pm new file mode 100644 index 0000000..e686477 --- /dev/null +++ b/lib/Encode/Unicode/UTF7.pm @@ -0,0 +1,133 @@ +# +# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $ +# +package Encode::Unicode::UTF7; +use strict; +use warnings; +use parent qw(Encode::Encoding); +__PACKAGE__->Define('UTF-7'); +our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +use MIME::Base64; +use Encode qw(find_encoding); + +# +# Algorithms taken from Unicode::String by Gisle Aas +# + +our $OPTIONAL_DIRECT_CHARS = 1; +my $specials = quotemeta "\'(),-./:?"; +$OPTIONAL_DIRECT_CHARS + and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; + +# \s will not work because it matches U+3000 DEOGRAPHIC SPACE +# We use qr/[\n\r\t\ ] instead +my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/; +my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/; +my $e_utf16 = find_encoding("UTF-16BE"); + +sub needs_lines { 1 } + +sub encode($$;$) { + my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; + my $len = length($str); + pos($str) = 0; + my $bytes = substr($str, 0, 0); # to propagate taintedness + while ( pos($str) < $len ) { + if ( $str =~ /\G($re_asis+)/ogc ) { + my $octets = $1; + utf8::downgrade($octets); + $bytes .= $octets; + } + elsif ( $str =~ /\G($re_encoded+)/ogsc ) { + if ( $1 eq "+" ) { + $bytes .= "+-"; + } + else { + my $s = $1; + my $base64 = encode_base64( $e_utf16->encode($s), '' ); + $base64 =~ s/=+$//; + $bytes .= "+$base64-"; + } + } + else { + die "This should not happen! (pos=" . pos($str) . ")"; + } + } + $_[1] = '' if $chk; + return $bytes; +} + +sub decode($$;$) { + use re 'taint'; + my ( $obj, $bytes, $chk ) = @_; + return undef unless defined $bytes; + my $len = length($bytes); + my $str = substr($bytes, 0, 0); # to propagate taintedness; + pos($bytes) = 0; + no warnings 'uninitialized'; + while ( pos($bytes) < $len ) { + if ( $bytes =~ /\G([^+]+)/ogc ) { + $str .= $1; + } + elsif ( $bytes =~ /\G\+-/ogc ) { + $str .= "+"; + } + elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) { + my $base64 = $1; + my $pad = length($base64) % 4; + $base64 .= "=" x ( 4 - $pad ) if $pad; + $str .= $e_utf16->decode( decode_base64($base64) ); + } + elsif ( $bytes =~ /\G\+/ogc ) { + $^W and warn "Bad UTF7 data escape"; + $str .= "+"; + } + else { + die "This should not happen " . pos($bytes); + } + } + $_[1] = '' if $chk; + return $str; +} +1; +__END__ + +=head1 NAME + +Encode::Unicode::UTF7 -- UTF-7 encoding + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $utf7 = encode("UTF-7", $utf8); + $utf8 = decode("UTF-7", $ucs2); + +=head1 ABSTRACT + +This module implements UTF-7 encoding documented in RFC 2152. UTF-7, +as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It +is designed to be MTA-safe and expected to be a standard way to +exchange Unicoded mails via mails. But with the advent of UTF-8 and +8-bit compliant MTAs, UTF-7 is hardly ever used. + +UTF-7 was not supported by Encode until version 1.95 because of that. +But Unicode::String, a module by Gisle Aas which adds Unicode supports +to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added +so Encode can supersede Unicode::String 100%. + +=head1 In Practice + +When you want to encode Unicode for mails and web pages, however, do +not use UTF-7 unless you are sure your recipients and readers can +handle it. Very few MUAs and WWW Browsers support these days (only +Mozilla seems to support one). For general cases, use UTF-8 for +message body and MIME-Header for header instead. + +=head1 SEE ALSO + +L, L, L + +RFC 2781 L + +=cut diff --git a/t/Aliases.t b/t/Aliases.t new file mode 100644 index 0000000..8d4752b --- /dev/null +++ b/t/Aliases.t @@ -0,0 +1,198 @@ +#!../perl + +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } +} + +use strict; +use Encode; +use Encode::Alias; +my %a2c; +my @override_tests; +my $ON_EBCDIC; + +sub init_a2c{ + %a2c = ( + 'US-ascii' => 'ascii', + 'ISO-646-US' => 'ascii', + 'UTF-8' => 'utf-8-strict', + 'en_US.UTF-8' => 'utf-8-strict', + 'UCS-2' => 'UCS-2BE', + 'UCS2' => 'UCS-2BE', + 'iso-10646-1' => 'UCS-2BE', + 'ucs2-le' => 'UCS-2LE', + 'ucs2-be' => 'UCS-2BE', + 'utf16' => 'UTF-16', + 'utf32' => 'UTF-32', + 'utf16-be' => 'UTF-16BE', + 'utf32-be' => 'UTF-32BE', + 'utf16-le' => 'UTF-16LE', + 'utf32-le' => 'UTF-32LE', + 'UCS4-BE' => 'UTF-32BE', + 'UCS-4-LE' => 'UTF-32LE', + 'cyrillic' => 'iso-8859-5', + 'arabic' => 'iso-8859-6', + 'greek' => 'iso-8859-7', + 'hebrew' => 'iso-8859-8', + 'thai' => 'iso-8859-11', + 'tis620' => 'iso-8859-11', + 'tis-620' => 'iso-8859-11', + 'WinLatin1' => 'cp1252', + 'WinLatin2' => 'cp1250', + 'WinCyrillic' => 'cp1251', + 'WinGreek' => 'cp1253', + 'WinTurkish' => 'cp1254', + 'WinHebrew' => 'cp1255', + 'WinArabic' => 'cp1256', + 'WinBaltic' => 'cp1257', + 'WinVietnamese' => 'cp1258', + 'Macintosh' => 'MacRoman', + 'koi8r' => 'koi8-r', + 'koi8u' => 'koi8-u', + 'ja_JP.euc' => $ON_EBCDIC ? '' : 'euc-jp', + 'x-euc-jp' => $ON_EBCDIC ? '' : 'euc-jp', + 'zh_CN.euc' => $ON_EBCDIC ? '' : 'euc-cn', + 'x-euc-cn' => $ON_EBCDIC ? '' : 'euc-cn', + 'ko_KR.euc' => $ON_EBCDIC ? '' : 'euc-kr', + 'x-euc-kr' => $ON_EBCDIC ? '' : 'euc-kr', + 'ujis' => $ON_EBCDIC ? '' : 'euc-jp', + 'Shift_JIS' => $ON_EBCDIC ? '' : 'shiftjis', + 'x-sjis' => $ON_EBCDIC ? '' : 'shiftjis', + 'jis' => $ON_EBCDIC ? '' : '7bit-jis', + 'big-5' => $ON_EBCDIC ? '' : 'big5-eten', + 'zh_TW.Big5' => $ON_EBCDIC ? '' : 'big5-eten', + 'tca-big5' => $ON_EBCDIC ? '' : 'big5-eten', + 'big5-hk' => $ON_EBCDIC ? '' : 'big5-hkscs', + 'hkscs-big5' => $ON_EBCDIC ? '' : 'big5-hkscs', + 'GB_2312-80' => $ON_EBCDIC ? '' : 'euc-cn', + 'KS_C_5601-1987' => $ON_EBCDIC ? '' : 'cp949', + # + 'gb12345-raw' => $ON_EBCDIC ? '' : 'gb12345-raw', + 'gb2312-raw' => $ON_EBCDIC ? '' : 'gb2312-raw', + 'jis0201-raw' => $ON_EBCDIC ? '' : 'jis0201-raw', + 'jis0208-raw' => $ON_EBCDIC ? '' : 'jis0208-raw', + 'jis0212-raw' => $ON_EBCDIC ? '' : 'jis0212-raw', + 'ksc5601-raw' => $ON_EBCDIC ? '' : 'ksc5601-raw', + 'cp65000' => 'UTF-7', + 'cp65001' => 'utf-8-strict', + ); + + for my $i (1..11,13..16){ + $a2c{"ISO 8859 $i"} = "iso-8859-$i"; + } + for my $i (1..10){ + $a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]"; + } + for my $k (keys %Encode::Alias::Winlatin2cp){ + my $v = $Encode::Alias::Winlatin2cp{$k}; + $a2c{"Win" . ucfirst($k)} = "cp" . $v; + $a2c{"IBM-$v"} = $a2c{"MS-$v"} = "cp" . $v; + $a2c{"cp-" . $v} = "cp" . $v; + } + my @a2c = keys %a2c; + for my $k (@a2c){ + $a2c{uc($k)} = $a2c{$k}; + $a2c{lc($k)} = $a2c{$k}; + $a2c{lcfirst($k)} = $a2c{$k}; + $a2c{ucfirst($k)} = $a2c{$k}; + } +} + +BEGIN{ + $ON_EBCDIC = ord("A") == 193; + @ARGV and $ON_EBCDIC = $ARGV[0] eq 'EBCDIC'; + $Encode::ON_EBCDIC = $ON_EBCDIC; + init_a2c(); + @override_tests = qw( + myascii:cp1252 + mygreek:cp1253 + myhebrew:iso-8859-2 + myarabic:cp1256 + ueightsomething:utf-8-strict + unknown: + ); +} + +if ($ON_EBCDIC){ + delete @Encode::ExtModule{ + qw(euc-cn gb2312 gb12345 gbk cp936 iso-ir-165 MacChineseSimp + euc-jp iso-2022-jp 7bit-jis shiftjis MacJapanese cp932 + euc-kr ksc5601 cp949 MacKorean + big5 big5-hkscs cp950 MacChineseTrad + gb18030 big5plus euc-tw) + }; +} + +use Test::More tests => (scalar keys %a2c) * 3 + @override_tests; + +print "# alias test; \$ON_EBCDIC == $ON_EBCDIC\n"; + +foreach my $a (keys %a2c){ + print "# $a => $a2c{$a}\n"; + my $e = Encode::find_encoding($a); + is((defined($e) and $e->name), $a2c{$a},$a) + or warn "alias was $a";; +} + +# now we override some of the aliases and see if it works fine + +define_alias( + qr/ascii/i => '"WinLatin1"', + qr/cyrillic/i => '"WinCyrillic"', + qr/arabic/i => '"WinArabic"', + qr/greek/i => '"WinGreek"', + qr/hebrew/i => '"WinHebrew"' + ); + +Encode::find_encoding("myhebrew"); # polute alias cache + +define_alias( sub { + my $enc = shift; + return "iso-8859-2" if $enc =~ /hebrew/i; + return "does-not-exist" if $enc =~ /arabic/i; # should then use other override alias + return "utf-8" if $enc =~ /eight/i; + return "unknown"; +}); + +print "# alias test with alias overrides\n"; + +for my $test (@override_tests) { + my($a, $c) = split /:/, $test; + my $e = Encode::find_encoding($a); + is((defined($e) and $e->name), $c, $a); +} + +print "# alias undef test\n"; + +Encode::Alias->undef_aliases; +foreach my $a (keys %a2c){ + my $e = Encode::find_encoding($a); + ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a") + or warn "alias was $a"; +} + +print "# alias reinit test\n"; + +Encode::Alias->init_aliases; +init_a2c(); +foreach my $a (keys %a2c){ + my $e = Encode::find_encoding($a); + is((defined($e) and $e->name), $a2c{$a}, "Reinit $a") + or warn "alias was $a"; +} +__END__ +for my $k (keys %a2c){ + $k =~ /[A-Z]/ and next; + print "$k => $a2c{$k}\n"; +} + + + diff --git a/t/CJKT.t b/t/CJKT.t new file mode 100644 index 0000000..264daf0 --- /dev/null +++ b/t/CJKT.t @@ -0,0 +1,110 @@ +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } +# should work w/o PerlIO now! +# unless (PerlIO::Layer->find('perlio')){ +# print "1..0 # Skip: PerlIO required\n"; +# exit 0; +# } + $| = 1; +} +use strict; +use Test::More tests => 60; +use Encode; +use File::Basename; +use File::Spec; +use File::Compare qw(compare_text); +our $DEBUG = shift || 0; + +my %Charset = + ( + 'big5-eten' => [qw(big5-eten)], + 'big5-hkscs' => [qw(big5-hkscs)], + gb2312 => [qw(euc-cn hz)], + jisx0201 => [qw(euc-jp shiftjis 7bit-jis)], + jisx0208 => [qw(euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1)], + jisx0212 => [qw(euc-jp 7bit-jis iso-2022-jp-1)], + ksc5601 => [qw(euc-kr iso-2022-kr johab)], + ); + + +my $dir = dirname(__FILE__); +my $seq = 1; + +for my $charset (sort keys %Charset){ + my ($src, $uni, $dst, $txt); + + my $transcoder = find_encoding($Charset{$charset}[0]) or die; + + my $src_enc = File::Spec->catfile($dir,"$charset.enc"); + my $src_utf = File::Spec->catfile($dir,"$charset.utf"); + my $dst_enc = File::Spec->catfile($dir,"$$.enc"); + my $dst_utf = File::Spec->catfile($dir,"$$.utf"); + + open $src, "<$src_enc" or die "$src_enc : $!"; + + if (PerlIO::Layer->find('perlio')){ + binmode($src, ":bytes"); # needed when :utf8 in default open layer + } + + $txt = join('',<$src>); + close($src); + + eval { $uni = $transcoder->decode($txt, 1) } or print $@; + ok(defined($uni), "decode $charset"); $seq++; + is(length($txt),0, "decode $charset completely"); $seq++; + + open $dst, ">$dst_utf" or die "$dst_utf : $!"; + if (PerlIO::Layer->find('perlio')){ + binmode($dst, ":utf8"); + print $dst $uni; + }else{ # ugh! + binmode($dst); + my $raw = $uni; Encode::_utf8_off($raw); + print $dst $raw; + } + + close($dst); + is(compare_text($dst_utf, $src_utf), 0, "$dst_utf eq $src_utf") + or ($DEBUG and rename $dst_utf, "$dst_utf.$seq"); + $seq++; + + open $src, "<$src_utf" or die "$src_utf : $!"; + if (PerlIO::Layer->find('perlio')){ + binmode($src, ":utf8"); + $uni = join('', <$src>); + }else{ # ugh! + binmode($src); + $uni = join('', <$src>); + Encode::_utf8_on($uni); + } + close $src; + + my $unisave = $uni; + eval { $txt = $transcoder->encode($uni,1) } or print $@; + ok(defined($txt), "encode $charset"); $seq++; + is(length($uni), 0, "encode $charset completely"); $seq++; + $uni = $unisave; + + open $dst,">$dst_enc" or die "$dst_utf : $!"; + binmode($dst); + print $dst $txt; + close($dst); + is(compare_text($src_enc, $dst_enc), 0 => "$dst_enc eq $src_enc") + or ($DEBUG and rename $dst_enc, "$dst_enc.$seq"); + $seq++; + + unlink($dst_utf, $dst_enc); + + for my $encoding (@{$Charset{$charset}}){ + my $rt = decode($encoding, encode($encoding, $uni)); + is ($rt, $uni, "RT $encoding"); + } +} diff --git a/t/Encode.t b/t/Encode.t new file mode 100644 index 0000000..0536b4b --- /dev/null +++ b/t/Encode.t @@ -0,0 +1,211 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } +} +use strict; +use Test::More; +use Encode qw(from_to encode decode + encode_utf8 decode_utf8 + find_encoding is_utf8); +use charnames qw(greek); +my @encodings = grep(/iso-?8859/,Encode::encodings()); +my $n = 2; +my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z'); +my @source = qw(ascii iso8859-1 cp1250); +my @destiny = qw(cp1047 cp37 posix-bc); +my @ebcdic_sets = qw(cp1047 cp37 posix-bc); +plan tests => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 3 + 3*8 + 2; + +my $str = join('',map(chr($_),0x20..0x7E)); +my $cpy = $str; +is length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong"; +is $cpy,$str,"ASCII mangled by translating from iso8859-1 to Unicode"; +$cpy = $str; +is from_to($cpy,'Unicode','iso8859-1'),length($str),"Length wrong"; +is $cpy,$str,"ASCII mangled by translating from Unicode to iso8859-1"; + +$str = join('',map(chr($_),0xa0..0xff)); +$cpy = $str; +is length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong"; + +my $sym = Encode->getEncoding('symbol'); +my $uni = $sym->decode(encode(ascii => 'a')); +is "\N{alpha}",substr($uni,0,1),"alpha does not map to symbol 'a'"; +$str = $sym->encode("\N{Beta}"); +is "B",decode(ascii => substr($str,0,1)),"Symbol 'B' does not map to Beta"; + +foreach my $enc (qw(symbol dingbats ascii),@encodings) + { + my $tab = Encode->getEncoding($enc); + is 1,defined($tab),"Could not load $enc"; + $str = join('',map(chr($_),0x20..0x7E)); + $uni = $tab->decode($str); + $cpy = $tab->encode($uni); + is $cpy,$str,"$enc mangled translating to Unicode and back"; + } + +# On ASCII based machines see if we can map several codepoints from +# three distinct ASCII sets to three distinct EBCDIC coded character sets. +# On EBCDIC machines see if we can map from three EBCDIC sets to three +# distinct ASCII sets. + +my @expectation = (240..249, 193..201,209..217,226..233, 129..137,145..153,162..169); +if (ord('A') != 65) { + my @temp = @destiny; + @destiny = @source; + @source = @temp; + undef(@temp); + @expectation = (48..57, 65..90, 97..122); +} + +foreach my $to (@destiny) + { + foreach my $from (@source) + { + my @expected = @expectation; + foreach my $chr (@character_set) + { + my $native_chr = $chr; + my $cpy = $chr; + my $rc = from_to($cpy,$from,$to); + is 1,$rc,"Could not translate from $from to $to"; + is ord($cpy),shift(@expected),"mangled translating $native_chr from $from to $to"; + } + } + } + +# On either ASCII or EBCDIC machines ensure we can take the full one +# byte repetoire to EBCDIC sets and back. + +my $enc_as = 'iso8859-1'; +foreach my $enc_eb (@ebcdic_sets) + { + foreach my $ord (0..255) + { + $str = chr($ord); + my $rc = from_to($str,$enc_as,$enc_eb); + $rc += from_to($str,$enc_eb,$enc_as); + is $rc,2,"return code for $ord $enc_eb -> $enc_as -> $enc_eb was not obtained"; + is $ord,ord($str),"$enc_as mangled translating $ord to $enc_eb and back"; + } + } + +my $mime = find_encoding('iso-8859-2'); +is defined($mime),1,"Cannot find MIME-ish'iso-8859-2'"; +my $x11 = find_encoding('iso8859-2'); +is defined($x11),1,"Cannot find X11-ish 'iso8859-2'"; +is $mime,$x11,"iso8598-2 and iso-8859-2 not same"; +my $spc = find_encoding('iso 8859-2'); +is defined($spc),1,"Cannot find 'iso 8859-2'"; +is $spc,$mime,"iso 8859-2 and iso-8859-2 not same"; + +for my $i (256,128,129,256) + { + my $c = chr($i); + my $s = "$c\n".sprintf("%02X",$i); + is utf8::valid($s),1,"concat of $i botched"; + utf8::upgrade($s); + is utf8::valid($s),1,"concat of $i botched"; + } + +# Spot check a few points in/out of utf8 +for my $i (ord('A'),128,256,0x20AC) + { + my $c = chr($i); + my $o = encode_utf8($c); + is decode_utf8($o),$c,"decode_utf8 not inverse of encode_utf8 for $i"; + is encode('utf8',$c),$o,"utf8 encode by name broken for $i"; + is decode('utf8',$o),$c,"utf8 decode by name broken for $i"; + } + + +# is_utf8 + +ok( is_utf8("\x{100}")); +ok(! is_utf8("a")); +ok(! is_utf8("")); +"\x{100}" =~ /(.)/; +ok( is_utf8($1)); # ID 20011127.151 +$a = $1; +ok( is_utf8($a)); +$a = "\x{100}"; +chop $a; +ok( is_utf8($a)); # weird but true: an empty UTF-8 string + +# non-string arguments +package Encode::Dummy; +use overload q("") => sub { $_[0]->[0] }; +sub new { my $class = shift; bless [ @_ ] => $class } +package main; +ok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar"); +ok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar"); + +# RT#91569 +# decode_utf8 with non-string arguments +ok(decode_utf8(*1), "*main::1"); + +# hash keys +foreach my $name ("UTF-16LE", "UTF-8", "Latin1") { + my $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; + my $kopy = $key; + encode($name, $kopy, Encode::FB_CROAK); + is $key, "whatever\x{CA}", "encode $name with shared hash key scalars"; + undef $key; + $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; + $kopy = $key; + encode($name, $kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); + is $key, "whatever\x{CA}", "encode $name with LEAVE_SRC and shared hash key scalars"; + undef $key; + $key = (keys %{{ "whatever" => '' }})[0]; + $kopy = $key; + decode($name, $kopy, Encode::FB_CROAK); + is $key, "whatever", "decode $name with shared hash key scalars"; + undef $key; + $key = (keys %{{ "whatever" => '' }})[0]; + $kopy = $key; + decode($name, $kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); + is $key, "whatever", "decode $name with LEAVE_SRC and shared hash key scalars"; + + my $enc = find_encoding($name); + undef $key; + $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; + $kopy = $key; + $enc->encode($kopy, Encode::FB_CROAK); + is $key, "whatever\x{CA}", "encode obj $name with shared hash key scalars"; + undef $key; + $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; + $kopy = $key; + $enc->encode($kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); + is $key, "whatever\x{CA}", "encode obj $name with LEAVE_SRC and shared hash key scalars"; + undef $key; + $key = (keys %{{ "whatever" => '' }})[0]; + $kopy = $key; + $enc->decode($kopy, Encode::FB_CROAK); + is $key, "whatever", "decode obj $name with shared hash key scalars"; + undef $key; + $key = (keys %{{ "whatever" => '' }})[0]; + $kopy = $key; + $enc->decode($kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); + is $key, "whatever", "decode obj $name with LEAVE_SRC and shared hash key scalars"; +} + +my $latin1 = find_encoding('latin1'); +my $orig = "\316"; +$orig =~ /(.)/; +is $latin1->encode($1), $orig, '[cpan #115168] passing magic regex globals to encode'; +SKIP: { + skip "Perl Version ($]) is older than v5.16", 1 if $] < 5.016; + *a = $orig; + is $latin1->encode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to encode'; +} diff --git a/t/Encoder.t b/t/Encoder.t new file mode 100644 index 0000000..bfb4d8e --- /dev/null +++ b/t/Encoder.t @@ -0,0 +1,50 @@ +# +# $Id: Encoder.t,v 2.1 2013/09/14 07:51:59 dankogai Exp $ +# + +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + $| = 1; +} + +use strict; +#use Test::More 'no_plan'; +use Test::More tests => 516; +use Encode::Encoder qw(encoder); +use MIME::Base64; +package Encode::Base64; +use parent 'Encode::Encoding'; +__PACKAGE__->Define('base64'); +use MIME::Base64; +sub encode{ + my ($obj, $data) = @_; + return encode_base64($data); +} +sub decode{ + my ($obj, $data) = @_; + return decode_base64($data); +} + +package main; + +my $e = encoder("foo", "ascii"); +ok ($e->data("bar")); +is ($e->data, "bar"); +ok ($e->encoding("latin1")); +is ($e->encoding, "iso-8859-1"); + +my $data = ''; +for my $i (0..255){ + no warnings; + $data .= chr($i); + my $base64 = encode_base64($data); + is(encoder($data)->base64, $base64, "encode"); + is(encoder($base64)->bytes('base64'), $data, "decode"); +} + +1; +__END__ diff --git a/t/Mod_EUCJP.pm b/t/Mod_EUCJP.pm new file mode 100644 index 0000000..4f381c5 --- /dev/null +++ b/t/Mod_EUCJP.pm @@ -0,0 +1,23 @@ +# $Id: Mod_EUCJP.pm,v 2.1 2013/02/18 02:23:56 dankogai Exp $ +# This file is in euc-jp +package Mod_EUCJP; +no warnings "deprecated"; +use encoding "euc-jp"; +sub new { + my $class = shift; + my $str = shift || qw/���ʸ����/; + my $self = bless { + str => '', + }, $class; + $self->set($str); + $self; +} +sub set { + my ($self,$str) = @_; + $self->{str} = $str; + $self; +} +sub str { shift->{str}; } +sub put { print shift->{str}; } +1; +__END__ diff --git a/t/Unicode.t b/t/Unicode.t new file mode 100644 index 0000000..2cc5d54 --- /dev/null +++ b/t/Unicode.t @@ -0,0 +1,209 @@ +# +# $Id: Unicode.t,v 2.3 2012/08/05 23:08:49 dankogai Exp $ +# +# This script is written entirely in ASCII, even though quoted literals +# do include non-BMP unicode characters -- Are you happy, jhi? +# + +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +#use Test::More 'no_plan'; +use Test::More tests => 56; +use Encode qw(encode decode find_encoding); + +# +# see +# http://www.unicode.org/unicode/reports/tr19/ +# + +my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}"; +my $nasty = "$dankogai\x{1abcd}"; +my $fallback = "$dankogai\x{fffd}\x{fffd}"; + +#hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a +#lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd + +my $n_16be = + pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e d8 2a df cd>); +my $n_16le = + pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f 2a d8 cd df>); +my $f_16be = + pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e ff fd>); +my $f_16le = + pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f fd ff>); +my $n_32be = + pack("C*", map {hex($_)} + qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e 00 01 ab cd>); +my $n_32le = + pack("C*", map {hex($_)} + qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00 cd ab 01 00>); + +my $n_16bb = pack('n', 0xFeFF) . $n_16be; +my $n_16lb = pack('v', 0xFeFF) . $n_16le; +my $n_32bb = pack('N', 0xFeFF) . $n_32be; +my $n_32lb = pack('V', 0xFeFF) . $n_32le; + +is($n_16be, encode('UTF-16BE', $nasty), qq{encode UTF-16BE}); +is($n_16le, encode('UTF-16LE', $nasty), qq{encode UTF-16LE}); +is($n_32be, encode('UTF-32BE', $nasty), qq{encode UTF-32BE}); +is($n_32le, encode('UTF-32LE', $nasty), qq{encode UTF-16LE}); + +is($nasty, decode('UTF-16BE', $n_16be), qq{decode UTF-16BE}); +is($nasty, decode('UTF-16LE', $n_16le), qq{decode UTF-16LE}); +is($nasty, decode('UTF-32BE', $n_32be), qq{decode UTF-32BE}); +is($nasty, decode('UTF-32LE', $n_32le), qq{decode UTF-32LE}); + +is($n_16bb, encode('UTF-16', $nasty), qq{encode UTF-16}); +is($n_32bb, encode('UTF-32', $nasty), qq{encode UTF-32}); +is($nasty, decode('UTF-16', $n_16bb), qq{decode UTF-16, bom=be}); +is($nasty, decode('UTF-16', $n_16lb), qq{decode UTF-16, bom=le}); +is($nasty, decode('UTF-32', $n_32bb), qq{decode UTF-32, bom=be}); +is($nasty, decode('UTF-32', $n_32lb), qq{decode UTF-32, bom=le}); + +is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback"); +is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback"); +eval { decode('UCS-2BE', $n_16be, 1) }; +is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception"); +eval { decode('UCS-2LE', $n_16le, 1) }; +is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception"); +is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback"); +is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback"); +eval { encode('UCS-2BE', $nasty, 1) }; +is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception"); +eval { encode('UCS-2LE', $nasty, 1) }; +is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception"); + +{ + my %tests = ( + 'UCS-2BE' => 'n*', + 'UCS-2LE' => 'v*', + 'UTF-16BE' => 'n*', + 'UTF-16LE' => 'v*', + 'UTF-32BE' => 'N*', + 'UTF-32LE' => 'V*', + ); + + while (my ($enc, $pack) = each(%tests)) { + is(decode($enc, pack($pack, 0xD800, 0x263A)), "\x{FFFD}\x{263A}", + "decode $enc (HI surrogate followed by WHITE SMILING FACE)"); + is(decode($enc, pack($pack, 0xDC00, 0x263A)), "\x{FFFD}\x{263A}", + "decode $enc (LO surrogate followed by WHITE SMILING FACE)"); + } +} + +{ + my %tests = ( + 'UTF-16BE' => 'n*', + 'UTF-16LE' => 'v*', + ); + + while (my ($enc, $pack) = each(%tests)) { + is(decode($enc, pack($pack, 0xD800)), "\x{FFFD}", + "decode $enc (HI surrogate)"); + is(decode($enc, pack($pack, 0x263A, 0xD800)), "\x{263A}\x{FFFD}", + "decode $enc (WHITE SMILING FACE followed by HI surrogate)"); + } +} + +{ + my %tests = ( + 'UTF-16BE' => 'n*', + 'UTF-16LE' => 'v*', + ); + + while (my ($enc, $pack) = each(%tests)) { + is(encode($enc, "\x{110000}"), pack($pack, 0xFFFD), + "ordinals greater than U+10FFFF is replaced with U+FFFD"); + } +} + +# +# SvGROW test for (en|de)code_xs +# +SKIP: { + my $utf8 = ''; + for my $j (0,0x10){ + for my $i (0..0xffff){ + $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next; + $utf8 .= ord($j+$i); + } + for my $major ('UTF-16', 'UTF-32'){ + for my $minor ('BE', 'LE'){ + my $enc = $major.$minor; + is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT"); + } + } + } +}; + +# +# CJKT vs. UTF-7 +# + +use File::Spec; +use File::Basename; + +my $dir = dirname(__FILE__); +opendir my $dh, $dir or die "$dir:$!"; +my @file = sort grep {/\.utf$/o} readdir $dh; +closedir $dh; +for my $file (@file){ + my $path = File::Spec->catfile($dir, $file); + open my $fh, '<', $path or die "$path:$!"; + my $content; + if (PerlIO::Layer->find('perlio')){ + binmode $fh => ':utf8'; + $content = join('' => <$fh>); + }else{ # ugh! + binmode $fh; + $content = join('' => <$fh>); + Encode::_utf8_on($content) + } + close $fh; + is(decode("UTF-7", encode("UTF-7", $content)), $content, + "UTF-7 RT:$file"); +} + +# Magic +{ + # see http://rt.perl.org/rt3//Ticket/Display.html?id=60472 + my $work = chr(0x100); + my $encoding = find_encoding("UTF16-BE"); + my $tied; + tie $tied, SomeScalar => \$work; + my $result = $encoding->encode($tied, 1); + is($work, "", "check set magic was applied"); +} + +package SomeScalar; +use Tie::Scalar; +use vars qw(@ISA); +BEGIN { @ISA = 'Tie::Scalar' } + +sub TIESCALAR { + my ($class, $ref) = @_; + return bless $ref, $class; +} + +sub FETCH { + ${$_[0]} +} + +sub STORE { + ${$_[0]} = $_[1]; +} + +1; +__END__ diff --git a/t/at-cn.t b/t/at-cn.t new file mode 100644 index 0000000..c82225e --- /dev/null +++ b/t/at-cn.t @@ -0,0 +1,147 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use Test::More tests => 29; +use Encode; + +no utf8; # we have raw Chinese encodings here + +BEGIN { + use_ok('Encode::CN'); +} + +# Since JP.t already tests basic file IO, we will just focus on +# internal encode / decode test here. Unfortunately, to test +# against all the UniHan characters will take a huge disk space, +# not to mention the time it will take, and the fact that Perl +# did not bundle UniHan.txt anyway. + +# So, here we just test a typical snippet spanning multiple Unicode +# blocks, and hope it can point out obvious errors. + +run_tests('Simplified Chinese only', { + 'utf' => ( +12298.26131.32463.12299.31532.19968.21350. +24406.26352.65306. +22823.21705.20094.20803.65292.19975.29289.36164.22987.65292. +20035.32479.22825.12290. +20113.34892.38632.26045.65292.21697.29289.27969.24418.12290. +22823.26126.22987.32456.65292.20845.20301.26102.25104.65292. +26102.20056.20845.40857.20197.24481.22825.12290. +20094.36947.21464.21270.65292.21508.27491.24615.21629.65292. +20445.21512.22823.21644.65292.20035.21033.36126.12290. +39318.20986.24246.29289.65292.19975.22269.21688.23425.12290 + ), + + 'euc-cn' => join('', +'���׾�����һ��', +'��Ի��', +'����ǬԪ��������ʼ��', +'��ͳ�졣', +'������ʩ��Ʒ�����Ρ�', +'����ʼ�գ���λʱ�ɣ�', +'ʱ�����������졣', +'Ǭ���仯������������', +'���ϴ�ͣ������ꡣ', +'�׳�������������', + ), + + 'gb2312-raw' => join('', +'!6RW>-!75ZR;XT', +'ehT;#:', +'4sTUG,T*#,MrNoWJJ<#,', +'DKM3Ll!#', +'TFPPSjJ)#,F7NoAwPN!#', +'4sCwJ join('', +'!6RW>-!75ZR;XT', +'ehT;#:', +'4sTUG,T*#,MrNoWJJ<#,', +'DKM3Ll!#', +'TFPPSjJ)#,F7NoAwPN!#', +'4sCwJ ( +35937.26352.65306.10. +22825.34892.20581.65292.21531.23376.20197.33258.24378.19981.24687.12290.10. +28508.40857.21247.29992.65292.38451.22312.19979.20063.12290.32. +35265.40857.22312.30000.65292.24503.26045.26222.20063.12290.32. +32456.26085.20094.20094.65292.21453.22797.36947.20063.12290.10. +25110.36291.22312.28170.65292.36827.26080.21646.20063.12290.39134. +40857.22312.22825.65292.22823.20154.36896.20063.12290.32. +20130.40857.26377.24724.65292.30408.19981.21487.20037.20063.12290.10. +29992.20061.65292.22825.24503.19981.21487.20026.39318.20063.12290 + ), + + 'cp936' => join(chr(10), +'��Ի��', +'���н�����������ǿ��Ϣ��', +'DZ�����ã�������Ҳ�� ���������ʩ��Ҳ�� ����ǬǬ��������Ҳ��', +'��Ծ��Ԩ�����޾�Ҳ���������죬������Ҳ�� �����лڣ�ӯ���ɾ�Ҳ��', +'�þţ���²���Ϊ��Ҳ��', + ), + + 'hz' => join(chr(10), +'~{OsT;#:~}', +'~{LlPP=!#,>}WSRTWTG?2;O"!#~}', +'~{G1AzNpSC#,QtTZOBR2!#~} ~{<{AzTZLo#,5BJ)FUR2!#~} ~{VUHUG,G,#,74845@R2!#~}', +'~{;rT>TZT(#,=xN^>LR2!#7IAzTZLl#,4sHKTlR2!#~} ~{?:AzSP;Z#,S/2;?I>CR2!#~}', +'~{SC>E#,Ll5B2;?IN*JWR2!#~}', + ), +}); + +run_tests('Traditional Chinese', { + 'utf', => 20094.65306.20803.12289.20136.12289.21033.12289.35998, + 'gb12345-raw' => 'G,#:T*!":`!"@{!"Uj', + 'gbk' => 'Ǭ��Ԫ���ࡢ����ؑ', +}); + +sub run_tests { + my ($title, $tests) = @_; + my $utf = delete $tests->{'utf'}; + + # $enc = encoding, $str = content + foreach my $enc (sort keys %{$tests}) { + my $str = $tests->{$enc}; + + is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title"); + is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title"); + + my $str2 = $str; + my $utf8 = Encode::encode('utf-8', $utf); + + Encode::from_to($str2, $enc, 'utf-8'); + is($str2, $utf8, "[$enc] from_to => utf8 - $title"); + + Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc + is($utf8, $str, "[$enc] utf8 => from_to - $title"); + } +} diff --git a/t/at-tw.t b/t/at-tw.t new file mode 100644 index 0000000..203fc34 --- /dev/null +++ b/t/at-tw.t @@ -0,0 +1,98 @@ +BEGIN { + if (! -d 'blib' and -d 't'){ chdir 't' }; + unshift @INC, '../lib'; + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use Test::More tests => 17; +use Encode; + +no utf8; # we have raw Chinese encodings here + +BEGIN { + use_ok('Encode::TW'); +} + +# Since JP.t already tests basic file IO, we will just focus on +# internal encode / decode test here. Unfortunately, to test +# against all the UniHan characters will take a huge disk space, +# not to mention the time it will take, and the fact that Perl +# did not bundle UniHan.txt anyway. + +# So, here we just test a typical snippet spanning multiple Unicode +# blocks, and hope it can point out obvious errors. + +run_tests('Basic Big5 range', { + 'utf' => ( +24093.39640.38525.20043.33495.35028.20846.65292. +26389.30343.32771.26352.20271.24248.65108. +25885.25552.35998.20110.23391.38508.20846.65292. +24799.24218.23493.21566.20197.38477.65108 + ), + + 'big5' => (join('', +'�Ұ������]�Ǥ��A�ӬӦҤ�B�e�Q', +'�ᴣ�s�_�s�����A�����G�^�H���Q', + )), + + 'big5-hkscs'=> (join('', +'�Ұ������]�Ǥ��A�ӬӦҤ�B�e�Q', +'�ᴣ�s�_�s�����A�����G�^�H���Q', + )), + + 'cp950' => (join('', +'�Ұ������]�Ǥ��A�ӬӦҤ�B�e�Q', +'�ᴣ�s�_�s�����A�����G�^�H���Q', + )), +}); + +run_tests('Hong Kong Extensions', { + 'utf' => ( +24863.35613.25152.26377.20351.29992.32.80.101.114.108.32. +22021.26379.21451.65292.32102.25105.21707.22021. +25903.25345.12289.24847.35211.21644.40723.21237. +22914.26524.32232.30908.26377.20219.20309.37679.28431. +65292.35531.21578.35380.25105.21707.12290 + ), + + 'big5-hkscs' => join('', +'�P�©Ҧ��ϥ� Perl ��B�͡A���ڒ]�����B�N���M���y', +'�p�G�s�X��������|�A�Чi�D�ڒ]�C' + ), +}); + +sub run_tests { + my ($title, $tests) = @_; + my $utf = delete $tests->{'utf'}; + + # $enc = encoding, $str = content + foreach my $enc (sort keys %{$tests}) { + my $str = $tests->{$enc}; + + is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title"); + is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title"); + + my $str2 = $str; + my $utf8 = Encode::encode('utf-8', $utf); + + Encode::from_to($str2, $enc, 'utf-8'); + is($str2, $utf8, "[$enc] from_to => utf8 - $title"); + + Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc + is($utf8, $str, "[$enc] utf8 => from_to - $title"); + } +} diff --git a/t/big5-eten.enc b/t/big5-eten.enc new file mode 100644 index 0000000..dc6be51 --- /dev/null +++ b/t/big5-eten.enc @@ -0,0 +1,444 @@ +0xa140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa1a0: �������������������������������������������������������������� +0xa1c0: �����¡áġšơǡȡɡʡˡ̡͡ΡϡСѡҡӡԡա֡סء١ڡۡܡݡޡ� +0xa1e0: ������������������������������������������� +0xa240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa2a0: ������ ������������������������������������������������ +0xa2c0: �����¢âĢŢƢǢȢɢʢˢ̢͢΢ϢТѢҢӢԢբ֢עآ٢ڢۢܢݢޢ� +0xa2e0: ������������������������������������������� +0xa340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa3a0: �������������������������������������������������������������� +0xa3e0: �� +0xa440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa4a0: �������������������������������������������������������������� +0xa4c0: �����¤äĤŤƤǤȤɤʤˤ̤ͤΤϤФѤҤӤԤդ֤פؤ٤ڤۤܤݤޤ� +0xa4e0: ������������������������������������������� +0xa540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa5a0: �������������������������������������������������������������� +0xa5c0: �����¥åĥťƥǥȥɥʥ˥̥ͥΥϥХѥҥӥԥե֥ץإ٥ڥۥܥݥޥ� +0xa5e0: ������������������������������������������� +0xa640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa6a0: �������������������������������������������������������������� +0xa6c0: �����¦æĦŦƦǦȦɦʦ˦̦ͦΦϦЦѦҦӦԦզ֦צئ٦ڦۦܦݦަ� +0xa6e0: ������������������������������������������� +0xa740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa7a0: �������������������������������������������������������������� +0xa7c0: �����§çħŧƧǧȧɧʧ˧̧ͧΧϧЧѧҧӧԧէ֧קا٧ڧۧܧݧާ� +0xa7e0: ������������������������������������������� +0xa840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa8a0: �������������������������������������������������������������� +0xa8c0: �����¨èĨŨƨǨȨɨʨ˨̨ͨΨϨШѨҨӨԨը֨רب٨ڨۨܨݨި� +0xa8e0: ������������������������������������������� +0xa940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa9a0: �������������������������������������������������������������� +0xa9c0: �����©éĩũƩǩȩɩʩ˩̩ͩΩϩЩѩҩөԩթ֩שة٩ک۩ܩݩީ� +0xa9e0: ������������������������������������������� +0xaa40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xaa60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaaa0: �������������������������������������������������������������� +0xaac0: �����ªêĪŪƪǪȪɪʪ˪̪ͪΪϪЪѪҪӪԪժ֪תت٪ڪ۪ܪݪު� +0xaae0: ������������������������������������������� +0xab40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xab60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaba0: �������������������������������������������������������������� +0xabc0: �����«ëīūƫǫȫɫʫ˫̫ͫΫϫЫѫҫӫԫի֫׫ث٫ګ۫ܫݫޫ� +0xabe0: ������������������������������������������� +0xac40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xac60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaca0: �������������������������������������������������������������� +0xacc0: �����¬ìĬŬƬǬȬɬʬˬ̬ͬάϬЬѬҬӬԬլ֬׬ج٬ڬ۬ܬݬެ� +0xace0: ������������������������������������������� +0xad40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xad60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xada0: �������������������������������������������������������������� +0xadc0: �����­íĭŭƭǭȭɭʭ˭̭ͭέϭЭѭҭӭԭխ֭׭ح٭ڭۭܭݭޭ� +0xade0: ������������������������������������������� +0xae40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xae60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaea0: �������������������������������������������������������������� +0xaec0: �����®îĮŮƮǮȮɮʮˮ̮ͮήϮЮѮҮӮԮծ֮׮خٮڮۮܮݮޮ� +0xaee0: ������������������������������������������� +0xaf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xaf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xafa0: �������������������������������������������������������������� +0xafc0: �����¯ïįůƯǯȯɯʯ˯̯ͯίϯЯѯүӯԯկ֯ׯدٯگۯܯݯޯ� +0xafe0: ������������������������������������������� +0xb040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb0a0: �������������������������������������������������������������� +0xb0c0: �����°ðİŰưǰȰɰʰ˰̰ͰΰϰаѰҰӰ԰հְװذٰڰ۰ܰݰް� +0xb0e0: ������������������������������������������� +0xb140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb1a0: �������������������������������������������������������������� +0xb1c0: �����±ñıűƱDZȱɱʱ˱̱ͱαϱбѱұӱԱձֱױرٱڱ۱ܱݱޱ� +0xb1e0: ������������������������������������������� +0xb240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb2a0: �������������������������������������������������������������� +0xb2c0: �����²òIJŲƲDzȲɲʲ˲̲ͲβϲвѲҲӲԲղֲײزٲڲ۲ܲݲ޲� +0xb2e0: ������������������������������������������� +0xb340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb3a0: �������������������������������������������������������������� +0xb3c0: �����³óijųƳdzȳɳʳ˳̳ͳγϳгѳҳӳԳճֳ׳سٳڳ۳ܳݳ޳� +0xb3e0: ������������������������������������������� +0xb440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb4a0: �������������������������������������������������������������� +0xb4c0: �����´ôĴŴƴǴȴɴʴ˴̴ʹδϴдѴҴӴԴմִ״شٴڴ۴ܴݴ޴� +0xb4e0: ������������������������������������������� +0xb540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb5a0: �������������������������������������������������������������� +0xb5c0: �����µõĵŵƵǵȵɵʵ˵̵͵εϵеѵҵӵԵյֵ׵صٵڵ۵ܵݵ޵� +0xb5e0: ������������������������������������������� +0xb640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb6a0: �������������������������������������������������������������� +0xb6c0: �����¶öĶŶƶǶȶɶʶ˶̶Ͷζ϶жѶҶӶԶնֶ׶ضٶڶ۶ܶݶ޶� +0xb6e0: ������������������������������������������� +0xb740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb7a0: �������������������������������������������������������������� +0xb7c0: �����·÷ķŷƷǷȷɷʷ˷̷ͷηϷзѷҷӷԷշַ׷طٷڷ۷ܷݷ޷� +0xb7e0: ������������������������������������������� +0xb840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb8a0: �������������������������������������������������������������� +0xb8c0: �����¸øĸŸƸǸȸɸʸ˸̸͸θϸиѸҸӸԸոָ׸ظٸڸ۸ܸݸ޸� +0xb8e0: ������������������������������������������� +0xb940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb9a0: �������������������������������������������������������������� +0xb9c0: �����¹ùĹŹƹǹȹɹʹ˹̹͹ιϹйѹҹӹԹչֹ׹عٹڹ۹ܹݹ޹� +0xb9e0: ������������������������������������������� +0xba40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xba60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbaa0: �������������������������������������������������������������� +0xbac0: �����ºúĺźƺǺȺɺʺ˺̺ͺκϺкѺҺӺԺպֺ׺غٺںۺܺݺ޺� +0xbae0: ������������������������������������������� +0xbb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbba0: �������������������������������������������������������������� +0xbbc0: �����»ûĻŻƻǻȻɻʻ˻̻ͻλϻлѻһӻԻջֻ׻ػٻڻۻܻݻ޻� +0xbbe0: ������������������������������������������� +0xbc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbc60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbca0: �������������������������������������������������������������� +0xbcc0: �����¼üļżƼǼȼɼʼ˼̼ͼμϼмѼҼӼԼռּ׼ؼټڼۼܼݼ޼� +0xbce0: ������������������������������������������� +0xbd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbda0: �������������������������������������������������������������� +0xbdc0: �����½ýĽŽƽǽȽɽʽ˽̽ͽνϽнѽҽӽԽսֽ׽ؽٽڽ۽ܽݽ޽� +0xbde0: ������������������������������������������� +0xbe40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbe60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbea0: �������������������������������������������������������������� +0xbec0: �����¾þľžƾǾȾɾʾ˾̾;ξϾоѾҾӾԾվ־׾ؾپھ۾ܾݾ޾� +0xbee0: ������������������������������������������� +0xbf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbfa0: �������������������������������������������������������������� +0xbfc0: �����¿ÿĿſƿǿȿɿʿ˿̿ͿοϿпѿҿӿԿտֿ׿ؿٿڿۿܿݿ޿� +0xbfe0: ������������������������������������������� +0xc040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc0a0: �������������������������������������������������������������� +0xc0c0: ���������������������������������������������������������������� +0xc0e0: �������������������������������������������������������������� +0xc140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc1a0: �������������������������������������������������������������� +0xc1c0: ���������������������������������������������������������������� +0xc1e0: �������������������������������������������������������������� +0xc240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc2a0: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ +0xc2c0: ���������������������������������������������������������������� +0xc2e0: �������������������������������������������������������������� +0xc340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc3a0: áâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ +0xc3c0: ���������������������������������������������������������������� +0xc3e0: �������������������������������������������������������������� +0xc440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc4a0: ġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿ +0xc4c0: ���������������������������������������������������������������� +0xc4e0: �������������������������������������������������������������� +0xc540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc5a0: šŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ +0xc5c0: ���������������������������������������������������������������� +0xc5e0: �������������������������������������������������������������� +0xc640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc6a0: ơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿ +0xc6c0: ���������������������������������������������������������������� +0xc6e0: �������������������������������������������������������������� +0xc740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc7a0: ǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ +0xc7c0: ���������������������������������������������������������������� +0xc7e0: �������������������������������������������������������������� +0xc840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc8a0: ȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ +0xc8c0: ���������������������������������������������������������������� +0xc8e0: ������������������������������������ �������������������� +0xc940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc9a0: ɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿ +0xc9c0: ���������������������������������������������������������������� +0xc9e0: �������������������������������������������������������������� +0xca40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xca60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcaa0: ʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯʰʱʲʳʴʵʶʷʸʹʺʻʼʽʾʿ +0xcac0: ���������������������������������������������������������������� +0xcae0: �������������������������������������������������������������� +0xcb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcba0: ˡˢˣˤ˥˦˧˨˩˪˫ˬ˭ˮ˯˰˱˲˳˴˵˶˷˸˹˺˻˼˽˾˿ +0xcbc0: ���������������������������������������������������������������� +0xcbe0: �������������������������������������������������������������� +0xcc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcc60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcca0: ̴̵̶̷̸̡̢̧̨̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼̽̾̿ +0xccc0: ���������������������������������������������������������������� +0xcce0: �������������������������������������������������������������� +0xcd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcda0: ͣͤͥͦͧͨͩͪͫͬͭͮͯ͢͡ͰͱͲͳʹ͵Ͷͷ͸͹ͺͻͼͽ;Ϳ +0xcdc0: ���������������������������������������������������������������� +0xcde0: �������������������������������������������������������������� +0xce40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xce60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcea0: Ρ΢ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξο +0xcec0: ���������������������������������������������������������������� +0xcee0: �������������������������������������������������������������� +0xcf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcfa0: ϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿ +0xcfc0: ���������������������������������������������������������������� +0xcfe0: �������������������������������������������������������������� +0xd040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd0a0: СТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп +0xd0c0: ���������������������������������������������������������������� +0xd0e0: �������������������������������������������������������������� +0xd140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd1a0: ѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿ +0xd1c0: ���������������������������������������������������������������� +0xd1e0: �������������������������������������������������������������� +0xd240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd2a0: ҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿ +0xd2c0: ���������������������������������������������������������������� +0xd2e0: �������������������������������������������������������������� +0xd340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd3a0: ӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿ +0xd3c0: ���������������������������������������������������������������� +0xd3e0: �������������������������������������������������������������� +0xd440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd4a0: ԡԢԣԤԥԦԧԨԩԪԫԬԭԮԯ԰ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿ +0xd4c0: ���������������������������������������������������������������� +0xd4e0: �������������������������������������������������������������� +0xd540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd5a0: աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտ +0xd5c0: ���������������������������������������������������������������� +0xd5e0: �������������������������������������������������������������� +0xd640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd6a0: ְֱֲֳִֵֶַָֹֺֻּֽ֢֣֤֥֦֧֪֭֮֡֨֩֫֬֯־ֿ +0xd6c0: ���������������������������������������������������������������� +0xd6e0: �������������������������������������������������������������� +0xd740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd7a0: סעףפץצקרשת׫׬׭׮ׯװױײ׳״׵׶׷׸׹׺׻׼׽׾׿ +0xd7c0: ���������������������������������������������������������������� +0xd7e0: �������������������������������������������������������������� +0xd840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd8a0: ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿ +0xd8c0: ���������������������������������������������������������������� +0xd8e0: �������������������������������������������������������������� +0xd940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd9a0: ١٢٣٤٥٦٧٨٩٪٫٬٭ٮٯٰٱٲٳٴٵٶٷٸٹٺٻټٽپٿ +0xd9c0: ���������������������������������������������������������������� +0xd9e0: �������������������������������������������������������������� +0xda40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xda60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdaa0: ڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿ +0xdac0: ���������������������������������������������������������������� +0xdae0: �������������������������������������������������������������� +0xdb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdba0: ۣۡۢۤۥۦۧۨ۩۪ۭ۫۬ۮۯ۰۱۲۳۴۵۶۷۸۹ۺۻۼ۽۾ۿ +0xdbc0: ���������������������������������������������������������������� +0xdbe0: �������������������������������������������������������������� +0xdc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdc60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdca0: ܡܢܣܤܥܦܧܨܩܪܫܬܭܮܯܱܴܷܸܹܻܼܾܰܲܳܵܶܺܽܿ +0xdcc0: ���������������������������������������������������������������� +0xdce0: �������������������������������������������������������������� +0xdd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdda0: ݡݢݣݤݥݦݧݨݩݪݫݬݭݮݯݰݱݲݳݴݵݶݷݸݹݺݻݼݽݾݿ +0xddc0: ���������������������������������������������������������������� +0xdde0: �������������������������������������������������������������� +0xde40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xde60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdea0: ޡޢޣޤޥަާިީުޫެޭޮޯްޱ޲޳޴޵޶޷޸޹޺޻޼޽޾޿ +0xdec0: ���������������������������������������������������������������� +0xdee0: �������������������������������������������������������������� +0xdf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdfa0: ߡߢߣߤߥߦߧߨߩߪ߲߫߬߭߮߯߰߱߳ߴߵ߶߷߸߹ߺ߻߼߽߾߿ +0xdfc0: ���������������������������������������������������������������� +0xdfe0: �������������������������������������������������������������� +0xe040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe0a0: ������������������������������� +0xe0c0: ���������������������������������������������������������������� +0xe0e0: �������������������������������������������������������������� +0xe140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe1a0: ������������������������������� +0xe1c0: ���������������������������������������������������������������� +0xe1e0: �������������������������������������������������������������� +0xe240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe2a0: ������������������������������� +0xe2c0: ���������������������������������������������������������������� +0xe2e0: �������������������������������������������������������������� +0xe340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe3a0: ������������������������������� +0xe3c0: ���������������������������������������������������������������� +0xe3e0: �������������������������������������������������������������� +0xe440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe4a0: ������������������������������� +0xe4c0: ���������������������������������������������������������������� +0xe4e0: �������������������������������������������������������������� +0xe540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe5a0: ������������������������������� +0xe5c0: ���������������������������������������������������������������� +0xe5e0: �������������������������������������������������������������� +0xe640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe6a0: ������������������������������� +0xe6c0: ���������������������������������������������������������������� +0xe6e0: �������������������������������������������������������������� +0xe740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe7a0: ������������������������������� +0xe7c0: ���������������������������������������������������������������� +0xe7e0: �������������������������������������������������������������� +0xe840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe8a0: ������������������������������� +0xe8c0: ���������������������������������������������������������������� +0xe8e0: �������������������������������������������������������������� +0xe940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe9a0: ������������������������������� +0xe9c0: ���������������������������������������������������������������� +0xe9e0: �������������������������������������������������������������� +0xea40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xea60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeaa0: ������������������������������� +0xeac0: ���������������������������������������������������������������� +0xeae0: �������������������������������������������������������������� +0xeb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xeb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeba0: ������������������������������� +0xebc0: ���������������������������������������������������������������� +0xebe0: �������������������������������������������������������������� +0xec40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xec60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeca0: ������������������������������� +0xecc0: ���������������������������������������������������������������� +0xece0: �������������������������������������������������������������� +0xed40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xed60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeda0: �������������������������������������������������������������� +0xedc0: ���������������������������������������������������������������� +0xede0: �������������������������������������������������������������� +0xee40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xee60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeea0: ������������������������������� +0xeec0: ���������������������������������������������������������������� +0xeee0: �������������������������������������������������������������� +0xef40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xef60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xefa0: ������������������������������� +0xefc0: ���������������������������������������������������������������� +0xefe0: �������������������������������������������������������������� +0xf040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf0a0: ������������������������������� +0xf0c0: ���������������������������������������������������������������� +0xf0e0: �������������������������������������������������������������� +0xf140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf1a0: ������������������������������� +0xf1c0: ���������������������������������������������������������������� +0xf1e0: �������������������������������������������������������������� +0xf240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf2a0: ������������������������������� +0xf2c0: ���������������������������������������������������������������� +0xf2e0: �������������������������������������������������������������� +0xf340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf3a0: ������������������������������� +0xf3c0: ���������������������������������������������������������������� +0xf3e0: �������������������������������������������������������������� +0xf440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf4a0: �������������������������������������������������������������� +0xf4c0: ���������������������������������������������������������������� +0xf4e0: �������������������������������������������������������������� +0xf540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf5a0: �������������������������������������������������������������� +0xf5c0: ���������������������������������������������������������������� +0xf5e0: �������������������������������������������������������������� +0xf640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf6a0: �������������������������������������������������������������� +0xf6c0: ���������������������������������������������������������������� +0xf6e0: �������������������������������������������������������������� +0xf740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf7a0: �������������������������������������������������������������� +0xf7c0: ���������������������������������������������������������������� +0xf7e0: �������������������������������������������������������������� +0xf840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf8a0: �������������������������������������������������������������� +0xf8c0: ���������������������������������������������������������������� +0xf8e0: �������������������������������������������������������������� +0xf940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf9a0: �������������������������������������������������������������� +0xf9c0: ���������������������������������������������������������������� +0xf9e0: ���������������������������������������������������� �� diff --git a/t/big5-eten.utf b/t/big5-eten.utf new file mode 100644 index 0000000..6b4b99a --- /dev/null +++ b/t/big5-eten.utf @@ -0,0 +1,444 @@ +0xa140:  ,、。.‧;:?!︰…‥﹐﹑﹒·﹔﹕﹖﹗|–︱—︳╴︴﹏()︵ +0xa160: ︶{}︷︸〔〕︹︺【】︻︼《》︽︾〈〉︿﹀「」﹁﹂『』﹃﹄﹙﹚ +0xa1a0: ﹛﹜﹝﹞‘’“”〝〞‵′#&*※§〃○●△▲◎☆★◇◆□■▽▼ +0xa1c0: ㊣℅¯ ̄_ˍ﹉﹊﹍﹎﹋﹌﹟﹠﹡+-×÷±√<>=≦≧≠∞≒≡﹢﹣ +0xa1e0: ﹤﹥﹦~∩∪⊥∠∟⊿㏒㏑∫∮∵∴♀♂⊕⊙↑↓←→↖↗↙↘∥∣/ +0xa240: \∕﹨$¥〒¢£%@℃℉﹩﹪﹫㏕㎜㎝㎞㏎㎡㎎㎏㏄°兙兛兞兝兡兣嗧 +0xa260: 瓩糎▁▂▃▄▅▆▇█▏▎▍▌▋▊▉┼┴┬┤├▔─│▕┌┐└┘╭ +0xa2a0: ╮╰╯ ◢◣◥◤╱╲╳0123456789ⅠⅡⅢⅣⅤⅥⅦ +0xa2c0: ⅧⅨⅩ〡〢〣〤〥〦〧〨〩〸〹〺ABCDEFGHIJKLMNOPQ +0xa2e0: RSTUVWXYZabcdefghijklmnopqrstuv +0xa340: wxyzΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδ +0xa360: εζηθικλμνξοπρστυφχψωㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏ +0xa3a0: ㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ˙ˉˊˇˋ +0xa3e0: € +0xa440: 一乙丁七乃九了二人儿入八几刀刁力匕十卜又三下丈上丫丸凡久么也乞于 +0xa460: 亡兀刃勺千叉口土士夕大女子孑孓寸小尢尸山川工己已巳巾干廾弋弓才 +0xa4a0: 丑丐不中丰丹之尹予云井互五亢仁什仃仆仇仍今介仄元允內六兮公冗凶 +0xa4c0: 分切刈勻勾勿化匹午升卅卞厄友及反壬天夫太夭孔少尤尺屯巴幻廿弔引心 +0xa4e0: 戈戶手扎支文斗斤方日曰月木欠止歹毋比毛氏水火爪父爻片牙牛犬王丙 +0xa540: 世丕且丘主乍乏乎以付仔仕他仗代令仙仞充兄冉冊冬凹出凸刊加功包匆北 +0xa560: 匝仟半卉卡占卯卮去可古右召叮叩叨叼司叵叫另只史叱台句叭叻四囚外 +0xa5a0: 央失奴奶孕它尼巨巧左市布平幼弁弘弗必戊打扔扒扑斥旦朮本未末札正 +0xa5c0: 母民氐永汁汀氾犯玄玉瓜瓦甘生用甩田由甲申疋白皮皿目矛矢石示禾穴立 +0xa5e0: 丞丟乒乓乩亙交亦亥仿伉伙伊伕伍伐休伏仲件任仰仳份企伋光兇兆先全 +0xa640: 共再冰列刑划刎刖劣匈匡匠印危吉吏同吊吐吁吋各向名合吃后吆吒因回囝 +0xa660: 圳地在圭圬圯圩夙多夷夸妄奸妃好她如妁字存宇守宅安寺尖屹州帆并年 +0xa6a0: 式弛忙忖戎戌戍成扣扛托收早旨旬旭曲曳有朽朴朱朵次此死氖汝汗汙江 +0xa6c0: 池汐汕污汛汍汎灰牟牝百竹米糸缶羊羽老考而耒耳聿肉肋肌臣自至臼舌舛 +0xa6e0: 舟艮色艾虫血行衣西阡串亨位住佇佗佞伴佛何估佐佑伽伺伸佃佔似但佣 +0xa740: 作你伯低伶余佝佈佚兌克免兵冶冷別判利刪刨劫助努劬匣即卵吝吭吞吾否 +0xa760: 呎吧呆呃吳呈呂君吩告吹吻吸吮吵吶吠吼呀吱含吟听囪困囤囫坊坑址坍 +0xa7a0: 均坎圾坐坏圻壯夾妝妒妨妞妣妙妖妍妤妓妊妥孝孜孚孛完宋宏尬局屁尿 +0xa7c0: 尾岐岑岔岌巫希序庇床廷弄弟彤形彷役忘忌志忍忱快忸忪戒我抄抗抖技扶 +0xa7e0: 抉扭把扼找批扳抒扯折扮投抓抑抆改攻攸旱更束李杏材村杜杖杞杉杆杠 +0xa840: 杓杗步每求汞沙沁沈沉沅沛汪決沐汰沌汨沖沒汽沃汲汾汴沆汶沍沔沘沂灶 +0xa860: 灼災灸牢牡牠狄狂玖甬甫男甸皂盯矣私秀禿究系罕肖肓肝肘肛肚育良芒 +0xa8a0: 芋芍見角言谷豆豕貝赤走足身車辛辰迂迆迅迄巡邑邢邪邦那酉釆里防阮 +0xa8c0: 阱阪阬並乖乳事些亞享京佯依侍佳使佬供例來侃佰併侈佩佻侖佾侏侑佺兔 +0xa8e0: 兒兕兩具其典冽函刻券刷刺到刮制剁劾劻卒協卓卑卦卷卸卹取叔受味呵 +0xa940: 咖呸咕咀呻呷咄咒咆呼咐呱呶和咚呢周咋命咎固垃坷坪坩坡坦坤坼夜奉奇 +0xa960: 奈奄奔妾妻委妹妮姑姆姐姍始姓姊妯妳姒姅孟孤季宗定官宜宙宛尚屈居 +0xa9a0: 屆岷岡岸岩岫岱岳帘帚帖帕帛帑幸庚店府底庖延弦弧弩往征彿彼忝忠忽 +0xa9c0: 念忿怏怔怯怵怖怪怕怡性怩怫怛或戕房戾所承拉拌拄抿拂抹拒招披拓拔拋 +0xa9e0: 拈抨抽押拐拙拇拍抵拚抱拘拖拗拆抬拎放斧於旺昔易昌昆昂明昀昏昕昊 +0xaa40: 昇服朋杭枋枕東果杳杷枇枝林杯杰板枉松析杵枚枓杼杪杲欣武歧歿氓氛泣 +0xaa60: 注泳沱泌泥河沽沾沼波沫法泓沸泄油況沮泗泅泱沿治泡泛泊沬泯泜泖泠 +0xaaa0: 炕炎炒炊炙爬爭爸版牧物狀狎狙狗狐玩玨玟玫玥甽疝疙疚的盂盲直知矽 +0xaac0: 社祀祁秉秈空穹竺糾罔羌羋者肺肥肢肱股肫肩肴肪肯臥臾舍芳芝芙芭芽芟 +0xaae0: 芹花芬芥芯芸芣芰芾芷虎虱初表軋迎返近邵邸邱邶采金長門阜陀阿阻附 +0xab40: 陂隹雨青非亟亭亮信侵侯便俠俑俏保促侶俘俟俊俗侮俐俄係俚俎俞侷兗冒 +0xab60: 冑冠剎剃削前剌剋則勇勉勃勁匍南卻厚叛咬哀咨哎哉咸咦咳哇哂咽咪品 +0xaba0: 哄哈咯咫咱咻咩咧咿囿垂型垠垣垢城垮垓奕契奏奎奐姜姘姿姣姨娃姥姪 +0xabc0: 姚姦威姻孩宣宦室客宥封屎屏屍屋峙峒巷帝帥帟幽庠度建弈弭彥很待徊律 +0xabe0: 徇後徉怒思怠急怎怨恍恰恨恢恆恃恬恫恪恤扁拜挖按拼拭持拮拽指拱拷 +0xac40: 拯括拾拴挑挂政故斫施既春昭映昧是星昨昱昤曷柿染柱柔某柬架枯柵柩柯 +0xac60: 柄柑枴柚查枸柏柞柳枰柙柢柝柒歪殃殆段毒毗氟泉洋洲洪流津洌洱洞洗 +0xaca0: 活洽派洶洛泵洹洧洸洩洮洵洎洫炫為炳炬炯炭炸炮炤爰牲牯牴狩狠狡玷 +0xacc0: 珊玻玲珍珀玳甚甭畏界畎畋疫疤疥疢疣癸皆皇皈盈盆盃盅省盹相眉看盾盼 +0xace0: 眇矜砂研砌砍祆祉祈祇禹禺科秒秋穿突竿竽籽紂紅紀紉紇約紆缸美羿耄 +0xad40: 耐耍耑耶胖胥胚胃胄背胡胛胎胞胤胝致舢苧范茅苣苛苦茄若茂茉苒苗英茁 +0xad60: 苜苔苑苞苓苟苯茆虐虹虻虺衍衫要觔計訂訃貞負赴赳趴軍軌述迦迢迪迥 +0xada0: 迭迫迤迨郊郎郁郃酋酊重閂限陋陌降面革韋韭音頁風飛食首香乘亳倌倍 +0xadc0: 倣俯倦倥俸倩倖倆值借倚倒們俺倀倔倨俱倡個候倘俳修倭倪俾倫倉兼冤冥 +0xade0: 冢凍凌准凋剖剜剔剛剝匪卿原厝叟哨唐唁唷哼哥哲唆哺唔哩哭員唉哮哪 +0xae40: 哦唧唇哽唏圃圄埂埔埋埃堉夏套奘奚娑娘娜娟娛娓姬娠娣娩娥娌娉孫屘宰 +0xae60: 害家宴宮宵容宸射屑展屐峭峽峻峪峨峰島崁峴差席師庫庭座弱徒徑徐恙 +0xaea0: 恣恥恐恕恭恩息悄悟悚悍悔悌悅悖扇拳挈拿捎挾振捕捂捆捏捉挺捐挽挪 +0xaec0: 挫挨捍捌效敉料旁旅時晉晏晃晒晌晅晁書朔朕朗校核案框桓根桂桔栩梳栗 +0xaee0: 桌桑栽柴桐桀格桃株桅栓栘桁殊殉殷氣氧氨氦氤泰浪涕消涇浦浸海浙涓 +0xaf40: 浬涉浮浚浴浩涌涊浹涅浥涔烊烘烤烙烈烏爹特狼狹狽狸狷玆班琉珮珠珪珞 +0xaf60: 畔畝畜畚留疾病症疲疳疽疼疹痂疸皋皰益盍盎眩真眠眨矩砰砧砸砝破砷 +0xafa0: 砥砭砠砟砲祕祐祠祟祖神祝祗祚秤秣秧租秦秩秘窄窈站笆笑粉紡紗紋紊 +0xafc0: 素索純紐紕級紜納紙紛缺罟羔翅翁耆耘耕耙耗耽耿胱脂胰脅胭胴脆胸胳脈 +0xafe0: 能脊胼胯臭臬舀舐航舫舨般芻茫荒荔荊茸荐草茵茴荏茲茹茶茗荀茱茨荃 +0xb040: 虔蚊蚪蚓蚤蚩蚌蚣蚜衰衷袁袂衽衹記訐討訌訕訊託訓訖訏訑豈豺豹財貢起 +0xb060: 躬軒軔軏辱送逆迷退迺迴逃追逅迸邕郡郝郢酒配酌釘針釗釜釙閃院陣陡 +0xb0a0: 陛陝除陘陞隻飢馬骨高鬥鬲鬼乾偺偽停假偃偌做偉健偶偎偕偵側偷偏倏 +0xb0c0: 偯偭兜冕凰剪副勒務勘動匐匏匙匿區匾參曼商啪啦啄啞啡啃啊唱啖問啕唯 +0xb0e0: 啤唸售啜唬啣唳啁啗圈國圉域堅堊堆埠埤基堂堵執培夠奢娶婁婉婦婪婀 +0xb140: 娼婢婚婆婊孰寇寅寄寂宿密尉專將屠屜屝崇崆崎崛崖崢崑崩崔崙崤崧崗巢 +0xb160: 常帶帳帷康庸庶庵庾張強彗彬彩彫得徙從徘御徠徜恿患悉悠您惋悴惦悽 +0xb1a0: 情悻悵惜悼惘惕惆惟悸惚惇戚戛扈掠控捲掖探接捷捧掘措捱掩掉掃掛捫 +0xb1c0: 推掄授掙採掬排掏掀捻捩捨捺敝敖救教敗啟敏敘敕敔斜斛斬族旋旌旎晝晚 +0xb1e0: 晤晨晦晞曹勗望梁梯梢梓梵桿桶梱梧梗械梃棄梭梆梅梔條梨梟梡梂欲殺 +0xb240: 毫毬氫涎涼淳淙液淡淌淤添淺清淇淋涯淑涮淞淹涸混淵淅淒渚涵淚淫淘淪 +0xb260: 深淮淨淆淄涪淬涿淦烹焉焊烽烯爽牽犁猜猛猖猓猙率琅琊球理現琍瓠瓶 +0xb2a0: 瓷甜產略畦畢異疏痔痕疵痊痍皎盔盒盛眷眾眼眶眸眺硫硃硎祥票祭移窒 +0xb2c0: 窕笠笨笛第符笙笞笮粒粗粕絆絃統紮紹紼絀細紳組累終紲紱缽羞羚翌翎習 +0xb2e0: 耜聊聆脯脖脣脫脩脰脤舂舵舷舶船莎莞莘荸莢莖莽莫莒莊莓莉莠荷荻荼 +0xb340: 莆莧處彪蛇蛀蚶蛄蚵蛆蛋蚱蚯蛉術袞袈被袒袖袍袋覓規訪訝訣訥許設訟訛 +0xb360: 訢豉豚販責貫貨貪貧赧赦趾趺軛軟這逍通逗連速逝逐逕逞造透逢逖逛途 +0xb3a0: 部郭都酗野釵釦釣釧釭釩閉陪陵陳陸陰陴陶陷陬雀雪雩章竟頂頃魚鳥鹵 +0xb3c0: 鹿麥麻傢傍傅備傑傀傖傘傚最凱割剴創剩勞勝勛博厥啻喀喧啼喊喝喘喂喜 +0xb3e0: 喪喔喇喋喃喳單喟唾喲喚喻喬喱啾喉喫喙圍堯堪場堤堰報堡堝堠壹壺奠 +0xb440: 婷媚婿媒媛媧孳孱寒富寓寐尊尋就嵌嵐崴嵇巽幅帽幀幃幾廊廁廂廄弼彭復 +0xb460: 循徨惑惡悲悶惠愜愣惺愕惰惻惴慨惱愎惶愉愀愒戟扉掣掌描揀揩揉揆揍 +0xb4a0: 插揣提握揖揭揮捶援揪換摒揚揹敞敦敢散斑斐斯普晰晴晶景暑智晾晷曾 +0xb4c0: 替期朝棺棕棠棘棗椅棟棵森棧棹棒棲棣棋棍植椒椎棉棚楮棻款欺欽殘殖殼 +0xb4e0: 毯氮氯氬港游湔渡渲湧湊渠渥渣減湛湘渤湖湮渭渦湯渴湍渺測湃渝渾滋 +0xb540: 溉渙湎湣湄湲湩湟焙焚焦焰無然煮焜牌犄犀猶猥猴猩琺琪琳琢琥琵琶琴琯 +0xb560: 琛琦琨甥甦畫番痢痛痣痙痘痞痠登發皖皓皴盜睏短硝硬硯稍稈程稅稀窘 +0xb5a0: 窗窖童竣等策筆筐筒答筍筋筏筑粟粥絞結絨絕紫絮絲絡給絢絰絳善翔翕 +0xb5c0: 耋聒肅腕腔腋腑腎脹腆脾腌腓腴舒舜菩萃菸萍菠菅萋菁華菱菴著萊菰萌菌 +0xb5e0: 菽菲菊萸萎萄菜萇菔菟虛蛟蛙蛭蛔蛛蛤蛐蛞街裁裂袱覃視註詠評詞証詁 +0xb640: 詔詛詐詆訴診訶詖象貂貯貼貳貽賁費賀貴買貶貿貸越超趁跎距跋跚跑跌跛 +0xb660: 跆軻軸軼辜逮逵週逸進逶鄂郵鄉郾酣酥量鈔鈕鈣鈉鈞鈍鈐鈇鈑閔閏開閑 +0xb6a0: 間閒閎隊階隋陽隅隆隍陲隄雁雅雄集雇雯雲韌項順須飧飪飯飩飲飭馮馭 +0xb6c0: 黃黍黑亂傭債傲傳僅傾催傷傻傯僇剿剷剽募勦勤勢勣匯嗟嗨嗓嗦嗎嗜嗇嗑 +0xb6e0: 嗣嗤嗯嗚嗡嗅嗆嗥嗉園圓塞塑塘塗塚塔填塌塭塊塢塒塋奧嫁嫉嫌媾媽媼 +0xb740: 媳嫂媲嵩嵯幌幹廉廈弒彙徬微愚意慈感想愛惹愁愈慎慌慄慍愾愴愧愍愆愷 +0xb760: 戡戢搓搾搞搪搭搽搬搏搜搔損搶搖搗搆敬斟新暗暉暇暈暖暄暘暍會榔業 +0xb7a0: 楚楷楠楔極椰概楊楨楫楞楓楹榆楝楣楛歇歲毀殿毓毽溢溯滓溶滂源溝滇 +0xb7c0: 滅溥溘溼溺溫滑準溜滄滔溪溧溴煎煙煩煤煉照煜煬煦煌煥煞煆煨煖爺牒猷 +0xb7e0: 獅猿猾瑯瑚瑕瑟瑞瑁琿瑙瑛瑜當畸瘀痰瘁痲痱痺痿痴痳盞盟睛睫睦睞督 +0xb840: 睹睪睬睜睥睨睢矮碎碰碗碘碌碉硼碑碓硿祺祿禁萬禽稜稚稠稔稟稞窟窠筷 +0xb860: 節筠筮筧粱粳粵經絹綑綁綏絛置罩罪署義羨群聖聘肆肄腱腰腸腥腮腳腫 +0xb8a0: 腹腺腦舅艇蒂葷落萱葵葦葫葉葬葛萼萵葡董葩葭葆虞虜號蛹蜓蜈蜇蜀蛾 +0xb8c0: 蛻蜂蜃蜆蜊衙裟裔裙補裘裝裡裊裕裒覜解詫該詳試詩詰誇詼詣誠話誅詭詢 +0xb8e0: 詮詬詹詻訾詨豢貊貉賊資賈賄貲賃賂賅跡跟跨路跳跺跪跤跦躲較載軾輊 +0xb940: 辟農運遊道遂達逼違遐遇遏過遍遑逾遁鄒鄗酬酪酩釉鈷鉗鈸鈽鉀鈾鉛鉋鉤 +0xb960: 鉑鈴鉉鉍鉅鈹鈿鉚閘隘隔隕雍雋雉雊雷電雹零靖靴靶預頑頓頊頒頌飼飴 +0xb9a0: 飽飾馳馱馴髡鳩麂鼎鼓鼠僧僮僥僖僭僚僕像僑僱僎僩兢凳劃劂匱厭嗾嘀 +0xb9c0: 嘛嘗嗽嘔嘆嘉嘍嘎嗷嘖嘟嘈嘐嗶團圖塵塾境墓墊塹墅塽壽夥夢夤奪奩嫡嫦 +0xb9e0: 嫩嫗嫖嫘嫣孵寞寧寡寥實寨寢寤察對屢嶄嶇幛幣幕幗幔廓廖弊彆彰徹慇 +0xba40: 愿態慷慢慣慟慚慘慵截撇摘摔撤摸摟摺摑摧搴摭摻敲斡旗旖暢暨暝榜榨榕 +0xba60: 槁榮槓構榛榷榻榫榴槐槍榭槌榦槃榣歉歌氳漳演滾漓滴漩漾漠漬漏漂漢 +0xbaa0: 滿滯漆漱漸漲漣漕漫漯澈漪滬漁滲滌滷熔熙煽熊熄熒爾犒犖獄獐瑤瑣瑪 +0xbac0: 瑰瑭甄疑瘧瘍瘋瘉瘓盡監瞄睽睿睡磁碟碧碳碩碣禎福禍種稱窪窩竭端管箕 +0xbae0: 箋筵算箝箔箏箸箇箄粹粽精綻綰綜綽綾綠緊綴網綱綺綢綿綵綸維緒緇綬 +0xbb40: 罰翠翡翟聞聚肇腐膀膏膈膊腿膂臧臺與舔舞艋蓉蒿蓆蓄蒙蒞蒲蒜蓋蒸蓀蓓 +0xbb60: 蒐蒼蓑蓊蜿蜜蜻蜢蜥蜴蜘蝕蜷蜩裳褂裴裹裸製裨褚裯誦誌語誣認誡誓誤 +0xbba0: 說誥誨誘誑誚誧豪貍貌賓賑賒赫趙趕跼輔輒輕輓辣遠遘遜遣遙遞遢遝遛 +0xbbc0: 鄙鄘鄞酵酸酷酴鉸銀銅銘銖鉻銓銜銨鉼銑閡閨閩閣閥閤隙障際雌雒需靼鞅 +0xbbe0: 韶頗領颯颱餃餅餌餉駁骯骰髦魁魂鳴鳶鳳麼鼻齊億儀僻僵價儂儈儉儅凜 +0xbc40: 劇劈劉劍劊勰厲嘮嘻嘹嘲嘿嘴嘩噓噎噗噴嘶嘯嘰墀墟增墳墜墮墩墦奭嬉嫻 +0xbc60: 嬋嫵嬌嬈寮寬審寫層履嶝嶔幢幟幡廢廚廟廝廣廠彈影德徵慶慧慮慝慕憂 +0xbca0: 慼慰慫慾憧憐憫憎憬憚憤憔憮戮摩摯摹撞撲撈撐撰撥撓撕撩撒撮播撫撚 +0xbcc0: 撬撙撢撳敵敷數暮暫暴暱樣樟槨樁樞標槽模樓樊槳樂樅槭樑歐歎殤毅毆漿 +0xbce0: 潼澄潑潦潔澆潭潛潸潮澎潺潰潤澗潘滕潯潠潟熟熬熱熨牖犛獎獗瑩璋璃 +0xbd40: 瑾璀畿瘠瘩瘟瘤瘦瘡瘢皚皺盤瞎瞇瞌瞑瞋磋磅確磊碾磕碼磐稿稼穀稽稷稻 +0xbd60: 窯窮箭箱範箴篆篇篁箠篌糊締練緯緻緘緬緝編緣線緞緩綞緙緲緹罵罷羯 +0xbda0: 翩耦膛膜膝膠膚膘蔗蔽蔚蓮蔬蔭蔓蔑蔣蔡蔔蓬蔥蓿蔆螂蝴蝶蝠蝦蝸蝨蝙 +0xbdc0: 蝗蝌蝓衛衝褐複褒褓褕褊誼諒談諄誕請諸課諉諂調誰論諍誶誹諛豌豎豬賠 +0xbde0: 賞賦賤賬賭賢賣賜質賡赭趟趣踫踐踝踢踏踩踟踡踞躺輝輛輟輩輦輪輜輞 +0xbe40: 輥適遮遨遭遷鄰鄭鄧鄱醇醉醋醃鋅銻銷鋪銬鋤鋁銳銼鋒鋇鋰銲閭閱霄霆震 +0xbe60: 霉靠鞍鞋鞏頡頫頜颳養餓餒餘駝駐駟駛駑駕駒駙骷髮髯鬧魅魄魷魯鴆鴉 +0xbea0: 鴃麩麾黎墨齒儒儘儔儐儕冀冪凝劑劓勳噙噫噹噩噤噸噪器噥噱噯噬噢噶 +0xbec0: 壁墾壇壅奮嬝嬴學寰導彊憲憑憩憊懍憶憾懊懈戰擅擁擋撻撼據擄擇擂操撿 +0xbee0: 擒擔撾整曆曉暹曄曇暸樽樸樺橙橫橘樹橄橢橡橋橇樵機橈歙歷氅濂澱澡 +0xbf40: 濃澤濁澧澳激澹澶澦澠澴熾燉燐燒燈燕熹燎燙燜燃燄獨璜璣璘璟璞瓢甌甍 +0xbf60: 瘴瘸瘺盧盥瞠瞞瞟瞥磨磚磬磧禦積穎穆穌穋窺篙簑築篤篛篡篩篦糕糖縊 +0xbfa0: 縑縈縛縣縞縝縉縐罹羲翰翱翮耨膳膩膨臻興艘艙蕊蕙蕈蕨蕩蕃蕉蕭蕪蕞 +0xbfc0: 螃螟螞螢融衡褪褲褥褫褡親覦諦諺諫諱謀諜諧諮諾謁謂諷諭諳諶諼豫豭貓 +0xbfe0: 賴蹄踱踴蹂踹踵輻輯輸輳辨辦遵遴選遲遼遺鄴醒錠錶鋸錳錯錢鋼錫錄錚 +0xc040: 錐錦錡錕錮錙閻隧隨險雕霎霑霖霍霓霏靛靜靦鞘頰頸頻頷頭頹頤餐館餞餛 +0xc060: 餡餚駭駢駱骸骼髻髭鬨鮑鴕鴣鴦鴨鴒鴛默黔龍龜優償儡儲勵嚎嚀嚐嚅嚇 +0xc0a0: 嚏壕壓壑壎嬰嬪嬤孺尷屨嶼嶺嶽嶸幫彌徽應懂懇懦懋戲戴擎擊擘擠擰擦 +0xc0c0: 擬擱擢擭斂斃曙曖檀檔檄檢檜櫛檣橾檗檐檠歜殮毚氈濘濱濟濠濛濤濫濯澀 +0xc0e0: 濬濡濩濕濮濰燧營燮燦燥燭燬燴燠爵牆獰獲璩環璦璨癆療癌盪瞳瞪瞰瞬 +0xc140: 瞧瞭矯磷磺磴磯礁禧禪穗窿簇簍篾篷簌篠糠糜糞糢糟糙糝縮績繆縷縲繃縫 +0xc160: 總縱繅繁縴縹繈縵縿縯罄翳翼聱聲聰聯聳臆臃膺臂臀膿膽臉膾臨舉艱薪 +0xc1a0: 薄蕾薜薑薔薯薛薇薨薊虧蟀蟑螳蟒蟆螫螻螺蟈蟋褻褶襄褸褽覬謎謗謙講 +0xc1c0: 謊謠謝謄謐豁谿豳賺賽購賸賻趨蹉蹋蹈蹊轄輾轂轅輿避遽還邁邂邀鄹醣醞 +0xc1e0: 醜鍍鎂錨鍵鍊鍥鍋錘鍾鍬鍛鍰鍚鍔闊闋闌闈闆隱隸雖霜霞鞠韓顆颶餵騁 +0xc240: 駿鮮鮫鮪鮭鴻鴿麋黏點黜黝黛鼾齋叢嚕嚮壙壘嬸彞懣戳擴擲擾攆擺擻擷斷 +0xc260: 曜朦檳檬櫃檻檸櫂檮檯歟歸殯瀉瀋濾瀆濺瀑瀏燻燼燾燸獷獵璧璿甕癖癘 +0xc2a0: 癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶 +0xc2c0: 臍臏舊藏薩藍藐藉薰薺薹薦蟯蟬蟲蟠覆覲觴謨謹謬謫豐贅蹙蹣蹦蹤蹟蹕軀 +0xc2e0: 轉轍邇邃邈醫醬釐鎔鎊鎖鎢鎳鎮鎬鎰鎘鎚鎗闔闖闐闕離雜雙雛雞霤鞣鞦 +0xc340: 鞭韹額顏題顎顓颺餾餿餽餮馥騎髁鬃鬆魏魎魍鯊鯉鯽鯈鯀鵑鵝鵠黠鼕鼬儳 +0xc360: 嚥壞壟壢寵龐廬懲懷懶懵攀攏曠曝櫥櫝櫚櫓瀛瀟瀨瀚瀝瀕瀘爆爍牘犢獸 +0xc3a0: 獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩 +0xc3c0: 藝藪藕藤藥藷蟻蠅蠍蟹蟾襠襟襖襞譁譜識證譚譎譏譆譙贈贊蹼蹲躇蹶蹬蹺 +0xc3e0: 蹴轔轎辭邊邋醱醮鏡鏑鏟鏃鏈鏜鏝鏖鏢鏍鏘鏤鏗鏨關隴難霪霧靡韜韻類 +0xc440: 願顛颼饅饉騖騙鬍鯨鯧鯖鯛鶉鵡鵲鵪鵬麒麗麓麴勸嚨嚷嚶嚴嚼壤孀孃孽寶 +0xc460: 巉懸懺攘攔攙曦朧櫬瀾瀰瀲爐獻瓏癢癥礦礪礬礫竇競籌籃籍糯糰辮繽繼 +0xc4a0: 纂罌耀臚艦藻藹蘑藺蘆蘋蘇蘊蠔蠕襤覺觸議譬警譯譟譫贏贍躉躁躅躂醴 +0xc4c0: 釋鐘鐃鏽闡霰飄饒饑馨騫騰騷騵鰓鰍鹹麵黨鼯齟齣齡儷儸囁囀囂夔屬巍懼 +0xc4e0: 懾攝攜斕曩櫻欄櫺殲灌爛犧瓖瓔癩矓籐纏續羼蘗蘭蘚蠣蠢蠡蠟襪襬覽譴 +0xc540: 護譽贓躊躍躋轟辯醺鐮鐳鐵鐺鐸鐲鐫闢霸霹露響顧顥饗驅驃驀騾髏魔魑鰭 +0xc560: 鰥鶯鶴鷂鶸麝黯鼙齜齦齧儼儻囈囊囉孿巔巒彎懿攤權歡灑灘玀瓤疊癮癬 +0xc5a0: 禳籠籟聾聽臟襲襯觼讀贖贗躑躓轡酈鑄鑑鑒霽霾韃韁顫饕驕驍髒鬚鱉鰱 +0xc5c0: 鰾鰻鷓鷗鼴齬齪龔囌巖戀攣攫攪曬欐瓚竊籤籣籥纓纖纔臢蘸蘿蠱變邐邏鑣 +0xc5e0: 鑠鑤靨顯饜驚驛驗髓體髑鱔鱗鱖鷥麟黴囑壩攬灞癱癲矗罐羈蠶蠹衢讓讒 +0xc640: 讖艷贛釀鑪靂靈靄韆顰驟鬢魘鱟鷹鷺鹼鹽鼇齷齲廳欖灣籬籮蠻觀躡釁鑲鑰 +0xc660: 顱饞髖鬣黌灤矚讚鑷韉驢驥纜讜躪釅鑽鑾鑼鱷鱸黷豔鑿鸚爨驪鬱鸛鸞籲 +0xc6a0: ①②③④⑤⑥⑦⑧⑨⑩⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽ⅰⅱⅲⅳⅴⅵⅶⅷⅸⅹ⼂ +0xc6c0: ⼃⼅⼇⼌⼍⼎⼓⼖⼙⼛⼢⼧⼮⼳⼴⼵⼹⼺⽁⽆⽧⽨⾡⾪¨ˆヽヾゝゞ +0xc6e0: 々〆〇ー[]✽ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじ +0xc740: すずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへ +0xc760: べぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんァアィイ +0xc7a0: ゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッ +0xc7c0: ツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャ +0xc7e0: ヤュユョヨラリルレロヮワヰヱヲンヴヵヶАБВГДЕЁЖЗИЙК +0xc840: ЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзий +0xc860: клмнопрстуфхцчшщъыьэюя⇧↸↹𠃌乚𠂊刂 +0xc8a0: 冈𧘇 +0xc8c0: ¬¦'"㈱№℡゛゜⺀⺄⺆⺇⺈⺊⺌⺍⺕⺜ +0xc8e0: ⺝⺥⺧⺪⺬⺮⺶⺼⺾⻆⻊⻌⻍⻏⻖⻗⻞⻣ ʃɐɛɔɵœøŋʊɪ +0xc940: 乂乜凵匚厂万丌乇亍囗兀屮彳丏冇与丮亓仂仉仈冘勼卬厹圠夃夬尐巿旡殳 +0xc960: 毌气爿丱丼仨仜仩仡仝仚刌匜卌圢圣夗夯宁宄尒尻屴屳帄庀庂忉戉扐氕 +0xc9a0: 氶汃氿氻犮犰玊禸肊阞伎优伬仵伔仱伀价伈伝伂伅伢伓伄仴伒冱刓刉刐 +0xc9c0: 劦匢匟卍厊吇囡囟圮圪圴夼妀奼妅奻奾奷奿孖尕尥屼屺屻屾巟幵庄异弚彴 +0xc9e0: 忕忔忏扜扞扤扡扦扢扙扠扚扥旯旮朾朹朸朻机朿朼朳氘汆汒汜汏汊汔汋 +0xca40: 汌灱牞犴犵玎甪癿穵网艸艼芀艽艿虍襾邙邗邘邛邔阢阤阠阣佖伻佢佉体佤 +0xca60: 伾佧佒佟佁佘伭伳伿佡冏冹刜刞刡劭劮匉卣卲厎厏吰吷吪呔呅吙吜吥吘 +0xcaa0: 吽呏呁吨吤呇囮囧囥坁坅坌坉坋坒夆奀妦妘妠妗妎妢妐妏妧妡宎宒尨尪 +0xcac0: 岍岏岈岋岉岒岊岆岓岕巠帊帎庋庉庌庈庍弅弝彸彶忒忑忐忭忨忮忳忡忤忣 +0xcae0: 忺忯忷忻怀忴戺抃抌抎抏抔抇扱扻扺扰抁抈扷扽扲扴攷旰旴旳旲旵杅杇 +0xcb40: 杙杕杌杈杝杍杚杋毐氙氚汸汧汫沄沋沏汱汯汩沚汭沇沕沜汦汳汥汻沎灴灺 +0xcb60: 牣犿犽狃狆狁犺狅玕玗玓玔玒町甹疔疕皁礽耴肕肙肐肒肜芐芏芅芎芑芓 +0xcba0: 芊芃芄豸迉辿邟邡邥邞邧邠阰阨阯阭丳侘佼侅佽侀侇佶佴侉侄佷佌侗佪 +0xcbc0: 侚佹侁佸侐侜侔侞侒侂侕佫佮冞冼冾刵刲刳剆刱劼匊匋匼厒厔咇呿咁咑咂 +0xcbe0: 咈呫呺呾呥呬呴呦咍呯呡呠咘呣呧呤囷囹坯坲坭坫坱坰坶垀坵坻坳坴坢 +0xcc40: 坨坽夌奅妵妺姏姎妲姌姁妶妼姃姖妱妽姀姈妴姇孢孥宓宕屄屇岮岤岠岵岯 +0xcc60: 岨岬岟岣岭岢岪岧岝岥岶岰岦帗帔帙弨弢弣弤彔徂彾彽忞忥怭怦怙怲怋 +0xcca0: 怴怊怗怳怚怞怬怢怍怐怮怓怑怌怉怜戔戽抭抴拑抾抪抶拊抮抳抯抻抩抰 +0xccc0: 抸攽斨斻昉旼昄昒昈旻昃昋昍昅旽昑昐曶朊枅杬枎枒杶杻枘枆构杴枍枌杺 +0xcce0: 枟枑枙枃杽极杸杹枔欥殀歾毞氝沓泬泫泮泙沶泔沭泧沷泐泂沺泃泆泭泲 +0xcd40: 泒泝沴沊沝沀泞泀洰泍泇沰泹泏泩泑炔炘炅炓炆炄炑炖炂炚炃牪狖狋狘狉 +0xcd60: 狜狒狔狚狌狑玤玡玭玦玢玠玬玝瓝瓨甿畀甾疌疘皯盳盱盰盵矸矼矹矻矺 +0xcda0: 矷祂礿秅穸穻竻籵糽耵肏肮肣肸肵肭舠芠苀芫芚芘芛芵芧芮芼芞芺芴芨 +0xcdc0: 芡芩苂芤苃芶芢虰虯虭虮豖迒迋迓迍迖迕迗邲邴邯邳邰阹阽阼阺陃俍俅俓 +0xcde0: 侲俉俋俁俔俜俙侻侳俛俇俖侺俀侹俬剄剉勀勂匽卼厗厖厙厘咺咡咭咥哏 +0xce40: 哃茍咷咮哖咶哅哆咠呰咼咢咾呲哞咰垵垞垟垤垌垗垝垛垔垘垏垙垥垚垕壴 +0xce60: 复奓姡姞姮娀姱姝姺姽姼姶姤姲姷姛姩姳姵姠姾姴姭宨屌峐峘峌峗峋峛 +0xcea0: 峞峚峉峇峊峖峓峔峏峈峆峎峟峸巹帡帢帣帠帤庰庤庢庛庣庥弇弮彖徆怷 +0xcec0: 怹恔恲恞恅恓恇恉恛恌恀恂恟怤恄恘恦恮扂扃拏挍挋拵挎挃拫拹挏挌拸拶 +0xcee0: 挀挓挔拺挕拻拰敁敃斪斿昶昡昲昵昜昦昢昳昫昺昝昴昹昮朏朐柁柲柈枺 +0xcf40: 柜枻柸柘柀枷柅柫柤柟枵柍枳柷柶柮柣柂枹柎柧柰枲柼柆柭柌枮柦柛柺柉 +0xcf60: 柊柃柪柋欨殂殄殶毖毘毠氠氡洨洴洭洟洼洿洒洊泚洳洄洙洺洚洑洀洝浂 +0xcfa0: 洁洘洷洃洏浀洇洠洬洈洢洉洐炷炟炾炱炰炡炴炵炩牁牉牊牬牰牳牮狊狤 +0xcfc0: 狨狫狟狪狦狣玅珌珂珈珅玹玶玵玴珫玿珇玾珃珆玸珋瓬瓮甮畇畈疧疪癹盄 +0xcfe0: 眈眃眄眅眊盷盻盺矧矨砆砑砒砅砐砏砎砉砃砓祊祌祋祅祄秕种秏秖秎窀 +0xd040: 穾竑笀笁籺籸籹籿粀粁紃紈紁罘羑羍羾耇耎耏耔耷胘胇胠胑胈胂胐胅胣胙 +0xd060: 胜胊胕胉胏胗胦胍臿舡芔苙苾苹茇苨茀苕茺苫苖苴苬苡苲苵茌苻苶苰苪 +0xd0a0: 苤苠苺苳苭虷虴虼虳衁衎衧衪衩觓訄訇赲迣迡迮迠郱邽邿郕郅邾郇郋郈 +0xd0c0: 釔釓陔陏陑陓陊陎倞倅倇倓倢倰倛俵俴倳倷倬俶俷倗倜倠倧倵倯倱倎党冔 +0xd0e0: 冓凊凄凅凈凎剡剚剒剞剟剕剢勍匎厞唦哢唗唒哧哳哤唚哿唄唈哫唑唅哱 +0xd140: 唊哻哷哸哠唎唃唋圁圂埌堲埕埒垺埆垽垼垸垶垿埇埐垹埁夎奊娙娖娭娮娕 +0xd160: 娏娗娊娞娳孬宧宭宬尃屖屔峬峿峮峱峷崀峹帩帨庨庮庪庬弳弰彧恝恚恧 +0xd1a0: 恁悢悈悀悒悁悝悃悕悛悗悇悜悎戙扆拲挐捖挬捄捅挶捃揤挹捋捊挼挩捁 +0xd1c0: 挴捘捔捙挭捇挳捚捑挸捗捀捈敊敆旆旃旄旂晊晟晇晑朒朓栟栚桉栲栳栻桋 +0xd1e0: 桏栖栱栜栵栫栭栯桎桄栴栝栒栔栦栨栮桍栺栥栠欬欯欭欱欴歭肂殈毦毤 +0xd240: 毨毣毢毧氥浺浣浤浶洍浡涒浘浢浭浯涑涍淯浿涆浞浧浠涗浰浼浟涂涘洯浨 +0xd260: 涋浾涀涄洖涃浻浽浵涐烜烓烑烝烋缹烢烗烒烞烠烔烍烅烆烇烚烎烡牂牸 +0xd2a0: 牷牶猀狺狴狾狶狳狻猁珓珙珥珖玼珧珣珩珜珒珛珔珝珚珗珘珨瓞瓟瓴瓵 +0xd2c0: 甡畛畟疰痁疻痄痀疿疶疺皊盉眝眛眐眓眒眣眑眕眙眚眢眧砣砬砢砵砯砨砮 +0xd2e0: 砫砡砩砳砪砱祔祛祏祜祓祒祑秫秬秠秮秭秪秜秞秝窆窉窅窋窌窊窇竘笐 +0xd340: 笄笓笅笏笈笊笎笉笒粄粑粊粌粈粍粅紞紝紑紎紘紖紓紟紒紏紌罜罡罞罠罝 +0xd360: 罛羖羒翃翂翀耖耾耹胺胲胹胵脁胻脀舁舯舥茳茭荄茙荑茥荖茿荁茦茜茢 +0xd3a0: 荂荎茛茪茈茼荍茖茤茠茷茯茩荇荅荌荓茞茬荋茧荈虓虒蚢蚨蚖蚍蚑蚞蚇 +0xd3c0: 蚗蚆蚋蚚蚅蚥蚙蚡蚧蚕蚘蚎蚝蚐蚔衃衄衭衵衶衲袀衱衿衯袃衾衴衼訒豇豗 +0xd3e0: 豻貤貣赶赸趵趷趶軑軓迾迵适迿迻逄迼迶郖郠郙郚郣郟郥郘郛郗郜郤酐 +0xd440: 酎酏釕釢釚陜陟隼飣髟鬯乿偰偪偡偞偠偓偋偝偲偈偍偁偛偊偢倕偅偟偩偫 +0xd460: 偣偤偆偀偮偳偗偑凐剫剭剬剮勖勓匭厜啵啶唼啍啐唴唪啑啢唶唵唰啒啅 +0xd4a0: 唌唲啥啎唹啈唭唻啀啋圊圇埻堔埢埶埜埴堀埭埽堈埸堋埳埏堇埮埣埲埥 +0xd4c0: 埬埡堎埼堐埧堁堌埱埩埰堍堄奜婠婘婕婧婞娸娵婭婐婟婥婬婓婤婗婃婝婒 +0xd4e0: 婄婛婈媎娾婍娹婌婰婩婇婑婖婂婜孲孮寁寀屙崞崋崝崚崠崌崨崍崦崥崏 +0xd540: 崰崒崣崟崮帾帴庱庴庹庲庳弶弸徛徖徟悊悐悆悾悰悺惓惔惏惤惙惝惈悱惛 +0xd560: 悷惊悿惃惍惀挲捥掊掂捽掽掞掭掝掗掫掎捯掇掐据掯捵掜捭掮捼掤挻掟 +0xd5a0: 捸掅掁掑掍捰敓旍晥晡晛晙晜晢朘桹梇梐梜桭桮梮梫楖桯梣梬梩桵桴梲 +0xd5c0: 梏桷梒桼桫桲梪梀桱桾梛梖梋梠梉梤桸桻梑梌梊桽欶欳欷欸殑殏殍殎殌氪 +0xd5e0: 淀涫涴涳湴涬淩淢涷淶淔渀淈淠淟淖涾淥淜淝淛淴淊涽淭淰涺淕淂淏淉 +0xd640: 淐淲淓淽淗淍淣涻烺焍烷焗烴焌烰焄烳焐烼烿焆焓焀烸烶焋焂焎牾牻牼牿 +0xd660: 猝猗猇猑猘猊猈狿猏猞玈珶珸珵琄琁珽琇琀珺珼珿琌琋珴琈畤畣痎痒痏 +0xd6a0: 痋痌痑痐皏皉盓眹眯眭眱眲眴眳眽眥眻眵硈硒硉硍硊硌砦硅硐祤祧祩祪 +0xd6c0: 祣祫祡离秺秸秶秷窏窔窐笵筇笴笥笰笢笤笳笘笪笝笱笫笭笯笲笸笚笣粔粘 +0xd6e0: 粖粣紵紽紸紶紺絅紬紩絁絇紾紿絊紻紨罣羕羜羝羛翊翋翍翐翑翇翏翉耟 +0xd740: 耞耛聇聃聈脘脥脙脛脭脟脬脞脡脕脧脝脢舑舸舳舺舴舲艴莐莣莨莍荺荳莤 +0xd760: 荴莏莁莕莙荵莔莩荽莃莌莝莛莪莋荾莥莯莈莗莰荿莦莇莮荶莚虙虖蚿蚷 +0xd7a0: 蛂蛁蛅蚺蚰蛈蚹蚳蚸蛌蚴蚻蚼蛃蚽蚾衒袉袕袨袢袪袚袑袡袟袘袧袙袛袗 +0xd7c0: 袤袬袌袓袎覂觖觙觕訰訧訬訞谹谻豜豝豽貥赽赻赹趼跂趹趿跁軘軞軝軜軗 +0xd7e0: 軠軡逤逋逑逜逌逡郯郪郰郴郲郳郔郫郬郩酖酘酚酓酕釬釴釱釳釸釤釹釪 +0xd840: 釫釷釨釮镺閆閈陼陭陫陱陯隿靪頄飥馗傛傕傔傞傋傣傃傌傎傝偨傜傒傂傇 +0xd860: 兟凔匒匑厤厧喑喨喥喭啷噅喢喓喈喏喵喁喣喒喤啽喌喦啿喕喡喎圌堩堷 +0xd8a0: 堙堞堧堣堨埵塈堥堜堛堳堿堶堮堹堸堭堬堻奡媯媔媟婺媢媞婸媦婼媥媬 +0xd8c0: 媕媮娷媄媊媗媃媋媩婻婽媌媜媏媓媝寪寍寋寔寑寊寎尌尰崷嵃嵫嵁嵋崿崵 +0xd8e0: 嵑嵎嵕崳崺嵒崽崱嵙嵂崹嵉崸崼崲崶嵀嵅幄幁彘徦徥徫惉悹惌惢惎惄愔 +0xd940: 惲愊愖愅惵愓惸惼惾惁愃愘愝愐惿愄愋扊掔掱掰揎揥揨揯揃撝揳揊揠揶揕 +0xd960: 揲揵摡揟掾揝揜揄揘揓揂揇揌揋揈揰揗揙攲敧敪敤敜敨敥斌斝斞斮旐旒 +0xd9a0: 晼晬晻暀晱晹晪晲朁椌棓椄棜椪棬棪棱椏棖棷棫棤棶椓椐棳棡椇棌椈楰 +0xd9c0: 梴椑棯棆椔棸棐棽棼棨椋椊椗棎棈棝棞棦棴棑椆棔棩椕椥棇欹欻欿欼殔殗 +0xd9e0: 殙殕殽毰毲毳氰淼湆湇渟湉溈渼渽湅湢渫渿湁湝湳渜渳湋湀湑渻渃渮湞 +0xda40: 湨湜湡渱渨湠湱湫渹渢渰湓湥渧湸湤湷湕湹湒湦渵渶湚焠焞焯烻焮焱焣焥 +0xda60: 焢焲焟焨焺焛牋牚犈犉犆犅犋猒猋猰猢猱猳猧猲猭猦猣猵猌琮琬琰琫琖 +0xdaa0: 琚琡琭琱琤琣琝琩琠琲瓻甯畯畬痧痚痡痦痝痟痤痗皕皒盚睆睇睄睍睅睊 +0xdac0: 睎睋睌矞矬硠硤硥硜硭硱硪确硰硩硨硞硢祴祳祲祰稂稊稃稌稄窙竦竤筊笻 +0xdae0: 筄筈筌筎筀筘筅粢粞粨粡絘絯絣絓絖絧絪絏絭絜絫絒絔絩絑絟絎缾缿罥 +0xdb40: 罦羢羠羡翗聑聏聐胾胔腃腊腒腏腇脽腍脺臦臮臷臸臹舄舼舽舿艵茻菏菹萣 +0xdb60: 菀菨萒菧菤菼菶萐菆菈菫菣莿萁菝菥菘菿菡菋菎菖菵菉萉萏菞萑萆菂菳 +0xdba0: 菕菺菇菑菪萓菃菬菮菄菻菗菢萛菛菾蛘蛢蛦蛓蛣蛚蛪蛝蛫蛜蛬蛩蛗蛨蛑 +0xdbc0: 衈衖衕袺裗袹袸裀袾袶袼袷袽袲褁裉覕覘覗觝觚觛詎詍訹詙詀詗詘詄詅詒 +0xdbe0: 詈詑詊詌詏豟貁貀貺貾貰貹貵趄趀趉跘跓跍跇跖跜跏跕跙跈跗跅軯軷軺 +0xdc40: 軹軦軮軥軵軧軨軶軫軱軬軴軩逭逴逯鄆鄬鄄郿郼鄈郹郻鄁鄀鄇鄅鄃酡酤酟 +0xdc60: 酢酠鈁鈊鈥鈃鈚鈦鈏鈌鈀鈒釿釽鈆鈄鈧鈂鈜鈤鈙鈗鈅鈖镻閍閌閐隇陾隈 +0xdca0: 隉隃隀雂雈雃雱雰靬靰靮頇颩飫鳦黹亃亄亶傽傿僆傮僄僊傴僈僂傰僁傺 +0xdcc0: 傱僋僉傶傸凗剺剸剻剼嗃嗛嗌嗐嗋嗊嗝嗀嗔嗄嗩喿嗒喍嗏嗕嗢嗖嗈嗲嗍嗙 +0xdce0: 嗂圔塓塨塤塏塍塉塯塕塎塝塙塥塛堽塣塱壼嫇嫄嫋媺媸媱媵媰媿嫈媻嫆 +0xdd40: 媷嫀嫊媴媶嫍媹媐寖寘寙尟尳嵱嵣嵊嵥嵲嵬嵞嵨嵧嵢巰幏幎幊幍幋廅廌廆 +0xdd60: 廋廇彀徯徭惷慉慊愫慅愶愲愮慆愯慏愩慀戠酨戣戥戤揅揱揫搐搒搉搠搤 +0xdda0: 搳摃搟搕搘搹搷搢搣搌搦搰搨摁搵搯搊搚摀搥搧搋揧搛搮搡搎敯斒旓暆 +0xddc0: 暌暕暐暋暊暙暔晸朠楦楟椸楎楢楱椿楅楪椹楂楗楙楺楈楉椵楬椳椽楥棰楸 +0xdde0: 椴楩楀楯楄楶楘楁楴楌椻楋椷楜楏楑椲楒椯楻椼歆歅歃歂歈歁殛嗀毻毼 +0xde40: 毹毷毸溛滖滈溏滀溟溓溔溠溱溹滆滒溽滁溞滉溷溰滍溦滏溲溾滃滜滘溙溒 +0xde60: 溎溍溤溡溿溳滐滊溗溮溣煇煔煒煣煠煁煝煢煲煸煪煡煂煘煃煋煰煟煐煓 +0xdea0: 煄煍煚牏犍犌犑犐犎猼獂猻猺獀獊獉瑄瑊瑋瑒瑑瑗瑀瑏瑐瑎瑂瑆瑍瑔瓡 +0xdec0: 瓿瓾瓽甝畹畷榃痯瘏瘃痷痾痼痹痸瘐痻痶痭痵痽皙皵盝睕睟睠睒睖睚睩睧 +0xdee0: 睔睙睭矠碇碚碔碏碄碕碅碆碡碃硹碙碀碖硻祼禂祽祹稑稘稙稒稗稕稢稓 +0xdf40: 稛稐窣窢窞竫筦筤筭筴筩筲筥筳筱筰筡筸筶筣粲粴粯綈綆綀綍絿綅絺綎絻 +0xdf60: 綃絼綌綔綄絽綒罭罫罧罨罬羦羥羧翛翜耡腤腠腷腜腩腛腢腲朡腞腶腧腯 +0xdfa0: 腄腡舝艉艄艀艂艅蓱萿葖葶葹蒏蒍葥葑葀蒆葧萰葍葽葚葙葴葳葝蔇葞萷 +0xdfc0: 萺萴葺葃葸萲葅萩菙葋萯葂萭葟葰萹葎葌葒葯蓅蒎萻葇萶萳葨葾葄萫葠葔 +0xdfe0: 葮葐蜋蜄蛷蜌蛺蛖蛵蝍蛸蜎蜉蜁蛶蜍蜅裖裋裍裎裞裛裚裌裐覅覛觟觥觤 +0xe040: 觡觠觢觜触詶誆詿詡訿詷誂誄詵誃誁詴詺谼豋豊豥豤豦貆貄貅賌赨赩趑趌 +0xe060: 趎趏趍趓趔趐趒跰跠跬跱跮跐跩跣跢跧跲跫跴輆軿輁輀輅輇輈輂輋遒逿 +0xe0a0: 遄遉逽鄐鄍鄏鄑鄖鄔鄋鄎酮酯鉈鉒鈰鈺鉦鈳鉥鉞銃鈮鉊鉆鉭鉬鉏鉠鉧鉯 +0xe0c0: 鈶鉡鉰鈱鉔鉣鉐鉲鉎鉓鉌鉖鈲閟閜閞閛隒隓隑隗雎雺雽雸雵靳靷靸靲頏頍 +0xe0e0: 頎颬飶飹馯馲馰馵骭骫魛鳪鳭鳧麀黽僦僔僗僨僳僛僪僝僤僓僬僰僯僣僠 +0xe140: 凘劀劁勩勫匰厬嘧嘕嘌嘒嗼嘏嘜嘁嘓嘂嗺嘝嘄嗿嗹墉塼墐墘墆墁塿塴墋塺 +0xe160: 墇墑墎塶墂墈塻墔墏壾奫嫜嫮嫥嫕嫪嫚嫭嫫嫳嫢嫠嫛嫬嫞嫝嫙嫨嫟孷寠 +0xe1a0: 寣屣嶂嶀嵽嶆嵺嶁嵷嶊嶉嶈嵾嵼嶍嵹嵿幘幙幓廘廑廗廎廜廕廙廒廔彄彃 +0xe1c0: 彯徶愬愨慁慞慱慳慒慓慲慬憀慴慔慺慛慥愻慪慡慖戩戧戫搫摍摛摝摴摶摲 +0xe1e0: 摳摽摵摦撦摎撂摞摜摋摓摠摐摿搿摬摫摙摥摷敳斠暡暠暟朅朄朢榱榶槉 +0xe240: 榠槎榖榰榬榼榑榙榎榧榍榩榾榯榿槄榽榤槔榹槊榚槏榳榓榪榡榞槙榗榐槂 +0xe260: 榵榥槆歊歍歋殞殟殠毃毄毾滎滵滱漃漥滸漷滻漮漉潎漙漚漧漘漻漒滭漊 +0xe2a0: 漶潳滹滮漭潀漰漼漵滫漇漎潃漅滽滶漹漜滼漺漟漍漞漈漡熇熐熉熀熅熂 +0xe2c0: 熏煻熆熁熗牄牓犗犕犓獃獍獑獌瑢瑳瑱瑵瑲瑧瑮甀甂甃畽疐瘖瘈瘌瘕瘑瘊 +0xe2e0: 瘔皸瞁睼瞅瞂睮瞀睯睾瞃碲碪碴碭碨硾碫碞碥碠碬碢碤禘禊禋禖禕禔禓 +0xe340: 禗禈禒禐稫穊稰稯稨稦窨窫窬竮箈箜箊箑箐箖箍箌箛箎箅箘劄箙箤箂粻粿 +0xe360: 粼粺綧綷緂綣綪緁緀緅綝緎緄緆緋緌綯綹綖綼綟綦綮綩綡緉罳翢翣翥翞 +0xe3a0: 耤聝聜膉膆膃膇膍膌膋舕蒗蒤蒡蒟蒺蓎蓂蒬蒮蒫蒹蒴蓁蓍蒪蒚蒱蓐蒝蒧 +0xe3c0: 蒻蒢蒔蓇蓌蒛蒩蒯蒨蓖蒘蒶蓏蒠蓗蓔蓒蓛蒰蒑虡蜳蜣蜨蝫蝀蜮蜞蜡蜙蜛蝃 +0xe3e0: 蜬蝁蜾蝆蜠蜲蜪蜭蜼蜒蜺蜱蜵蝂蜦蜧蜸蜤蜚蜰蜑裷裧裱裲裺裾裮裼裶裻 +0xe440: 裰裬裫覝覡覟覞觩觫觨誫誙誋誒誏誖谽豨豩賕賏賗趖踉踂跿踍跽踊踃踇踆 +0xe460: 踅跾踀踄輐輑輎輍鄣鄜鄠鄢鄟鄝鄚鄤鄡鄛酺酲酹酳銥銤鉶銛鉺銠銔銪銍 +0xe4a0: 銦銚銫鉹銗鉿銣鋮銎銂銕銢鉽銈銡銊銆銌銙銧鉾銇銩銝銋鈭隞隡雿靘靽 +0xe4c0: 靺靾鞃鞀鞂靻鞄鞁靿韎韍頖颭颮餂餀餇馝馜駃馹馻馺駂馽駇骱髣髧鬾鬿魠 +0xe4e0: 魡魟鳱鳲鳵麧僿儃儰僸儆儇僶僾儋儌僽儊劋劌勱勯噈噂噌嘵噁噊噉噆噘 +0xe540: 噚噀嘳嘽嘬嘾嘸嘪嘺圚墫墝墱墠墣墯墬墥墡壿嫿嫴嫽嫷嫶嬃嫸嬂嫹嬁嬇嬅 +0xe560: 嬏屧嶙嶗嶟嶒嶢嶓嶕嶠嶜嶡嶚嶞幩幝幠幜緳廛廞廡彉徲憋憃慹憱憰憢憉 +0xe5a0: 憛憓憯憭憟憒憪憡憍慦憳戭摮摰撖撠撅撗撜撏撋撊撌撣撟摨撱撘敶敺敹 +0xe5c0: 敻斲斳暵暰暩暲暷暪暯樀樆樗槥槸樕槱槤樠槿槬槢樛樝槾樧槲槮樔槷槧橀 +0xe5e0: 樈槦槻樍槼槫樉樄樘樥樏槶樦樇槴樖歑殥殣殢殦氁氀毿氂潁漦潾澇濆澒 +0xe640: 澍澉澌潢潏澅潚澖潶潬澂潕潲潒潐潗澔澓潝漀潡潫潽潧澐潓澋潩潿澕潣潷 +0xe660: 潪潻熲熯熛熰熠熚熩熵熝熥熞熤熡熪熜熧熳犘犚獘獒獞獟獠獝獛獡獚獙 +0xe6a0: 獢璇璉璊璆璁瑽璅璈瑼瑹甈甇畾瘥瘞瘙瘝瘜瘣瘚瘨瘛皜皝皞皛瞍瞏瞉瞈 +0xe6c0: 磍碻磏磌磑磎磔磈磃磄磉禚禡禠禜禢禛歶稹窲窴窳箷篋箾箬篎箯箹篊箵糅 +0xe6e0: 糈糌糋緷緛緪緧緗緡縃緺緦緶緱緰緮緟罶羬羰羭翭翫翪翬翦翨聤聧膣膟 +0xe740: 膞膕膢膙膗舖艏艓艒艐艎艑蔤蔻蔏蔀蔩蔎蔉蔍蔟蔊蔧蔜蓻蔫蓺蔈蔌蓴蔪蓲 +0xe760: 蔕蓷蓫蓳蓼蔒蓪蓩蔖蓾蔨蔝蔮蔂蓽蔞蓶蔱蔦蓧蓨蓰蓯蓹蔘蔠蔰蔋蔙蔯虢 +0xe7a0: 蝖蝣蝤蝷蟡蝳蝘蝔蝛蝒蝡蝚蝑蝞蝭蝪蝐蝎蝟蝝蝯蝬蝺蝮蝜蝥蝏蝻蝵蝢蝧 +0xe7c0: 蝩衚褅褌褔褋褗褘褙褆褖褑褎褉覢覤覣觭觰觬諏諆誸諓諑諔諕誻諗誾諀諅 +0xe7e0: 諘諃誺誽諙谾豍貏賥賟賙賨賚賝賧趠趜趡趛踠踣踥踤踮踕踛踖踑踙踦踧 +0xe840: 踔踒踘踓踜踗踚輬輤輘輚輠輣輖輗遳遰遯遧遫鄯鄫鄩鄪鄲鄦鄮醅醆醊醁醂 +0xe860: 醄醀鋐鋃鋄鋀鋙銶鋏鋱鋟鋘鋩鋗鋝鋌鋯鋂鋨鋊鋈鋎鋦鋍鋕鋉鋠鋞鋧鋑鋓 +0xe8a0: 銵鋡鋆銴镼閬閫閮閰隤隢雓霅霈霂靚鞊鞎鞈韐韏頞頝頦頩頨頠頛頧颲餈 +0xe8c0: 飺餑餔餖餗餕駜駍駏駓駔駎駉駖駘駋駗駌骳髬髫髳髲髱魆魃魧魴魱魦魶魵 +0xe8e0: 魰魨魤魬鳼鳺鳽鳿鳷鴇鴀鳹鳻鴈鴅鴄麃黓鼏鼐儜儓儗儚儑凞匴叡噰噠噮 +0xe940: 噳噦噣噭噲噞噷圜圛壈墽壉墿墺壂墼壆嬗嬙嬛嬡嬔嬓嬐嬖嬨嬚嬠嬞寯嶬嶱 +0xe960: 嶩嶧嶵嶰嶮嶪嶨嶲嶭嶯嶴幧幨幦幯廩廧廦廨廥彋徼憝憨憖懅憴懆懁懌憺 +0xe9a0: 憿憸憌擗擖擐擏擉撽撉擃擛擳擙攳敿敼斢曈暾曀曊曋曏暽暻暺曌朣樴橦 +0xe9c0: 橉橧樲橨樾橝橭橶橛橑樨橚樻樿橁橪橤橐橏橔橯橩橠樼橞橖橕橍橎橆歕歔 +0xe9e0: 歖殧殪殫毈毇氄氃氆澭濋澣濇澼濎濈潞濄澽澞濊澨瀄澥澮澺澬澪濏澿澸 +0xea40: 澢濉澫濍澯澲澰燅燂熿熸燖燀燁燋燔燊燇燏熽燘熼燆燚燛犝犞獩獦獧獬獥 +0xea60: 獫獪瑿璚璠璔璒璕璡甋疀瘯瘭瘱瘽瘳瘼瘵瘲瘰皻盦瞚瞝瞡瞜瞛瞢瞣瞕瞙 +0xeaa0: 瞗磝磩磥磪磞磣磛磡磢磭磟磠禤穄穈穇窶窸窵窱窷篞篣篧篝篕篥篚篨篹 +0xeac0: 篔篪篢篜篫篘篟糒糔糗糐糑縒縡縗縌縟縠縓縎縜縕縚縢縋縏縖縍縔縥縤罃 +0xeae0: 罻罼罺羱翯耪耩聬膱膦膮膹膵膫膰膬膴膲膷膧臲艕艖艗蕖蕅蕫蕍蕓蕡蕘 +0xeb40: 蕀蕆蕤蕁蕢蕄蕑蕇蕣蔾蕛蕱蕎蕮蕵蕕蕧蕠薌蕦蕝蕔蕥蕬虣虥虤螛螏螗螓螒 +0xeb60: 螈螁螖螘蝹螇螣螅螐螑螝螄螔螜螚螉褞褦褰褭褮褧褱褢褩褣褯褬褟觱諠 +0xeba0: 諢諲諴諵諝謔諤諟諰諈諞諡諨諿諯諻貑貒貐賵賮賱賰賳赬赮趥趧踳踾踸 +0xebc0: 蹀蹅踶踼踽蹁踰踿躽輶輮輵輲輹輷輴遶遹遻邆郺鄳鄵鄶醓醐醑醍醏錧錞錈 +0xebe0: 錟錆錏鍺錸錼錛錣錒錁鍆錭錎錍鋋錝鋺錥錓鋹鋷錴錂錤鋿錩錹錵錪錔錌 +0xec40: 錋鋾錉錀鋻錖閼闍閾閹閺閶閿閵閽隩雔霋霒霐鞙鞗鞔韰韸頵頯頲餤餟餧餩 +0xec60: 馞駮駬駥駤駰駣駪駩駧骹骿骴骻髶髺髹髷鬳鮀鮅鮇魼魾魻鮂鮓鮒鮐魺鮕 +0xeca0: 魽鮈鴥鴗鴠鴞鴔鴩鴝鴘鴢鴐鴙鴟麈麆麇麮麭黕黖黺鼒鼽儦儥儢儤儠儩勴 +0xecc0: 嚓嚌嚍嚆嚄嚃噾嚂噿嚁壖壔壏壒嬭嬥嬲嬣嬬嬧嬦嬯嬮孻寱寲嶷幬幪徾徻懃 +0xece0: 憵憼懧懠懥懤懨懞擯擩擣擫擤擨斁斀斶旚曒檍檖檁檥檉檟檛檡檞檇檓檎 +0xed40: 檕檃檨檤檑橿檦檚檅檌檒歛殭氉濌澩濴濔濣濜濭濧濦濞濲濝濢濨燡燱燨燲 +0xed60: 燤燰燢獳獮獯璗璲璫璐璪璭璱璥璯甐甑甒甏疄癃癈癉癇皤盩瞵瞫瞲瞷瞶 +0xeda0: 瞴瞱瞨矰磳磽礂磻磼磲礅磹磾礄禫禨穜穛穖穘穔穚窾竀竁簅簏篲簀篿篻 +0xedc0: 簎篴簋篳簂簉簃簁篸篽簆篰篱簐簊糨縭縼繂縳顈縸縪繉繀繇縩繌縰縻縶繄 +0xede0: 縺罅罿罾罽翴翲耬膻臄臌臊臅臇膼臩艛艚艜薃薀薏薧薕薠薋薣蕻薤薚薞 +0xee40: 蕷蕼薉薡蕺蕸蕗薎薖薆薍薙薝薁薢薂薈薅蕹蕶薘薐薟虨螾螪螭蟅螰螬螹螵 +0xee60: 螼螮蟉蟃蟂蟌螷螯蟄蟊螴螶螿螸螽蟞螲褵褳褼褾襁襒褷襂覭覯覮觲觳謞 +0xeea0: 謘謖謑謅謋謢謏謒謕謇謍謈謆謜謓謚豏豰豲豱豯貕貔賹赯蹎蹍蹓蹐蹌蹇 +0xeec0: 轃轀邅遾鄸醚醢醛醙醟醡醝醠鎡鎃鎯鍤鍖鍇鍼鍘鍜鍶鍉鍐鍑鍠鍭鎏鍌鍪鍹 +0xeee0: 鍗鍕鍒鍏鍱鍷鍻鍡鍞鍣鍧鎀鍎鍙闇闀闉闃闅閷隮隰隬霠霟霘霝霙鞚鞡鞜 +0xef40: 鞞鞝韕韔韱顁顄顊顉顅顃餥餫餬餪餳餲餯餭餱餰馘馣馡騂駺駴駷駹駸駶駻 +0xef60: 駽駾駼騃骾髾髽鬁髼魈鮚鮨鮞鮛鮦鮡鮥鮤鮆鮢鮠鮯鴳鵁鵧鴶鴮鴯鴱鴸鴰 +0xefa0: 鵅鵂鵃鴾鴷鵀鴽翵鴭麊麉麍麰黈黚黻黿鼤鼣鼢齔龠儱儭儮嚘嚜嚗嚚嚝嚙 +0xefc0: 奰嬼屩屪巀幭幮懘懟懭懮懱懪懰懫懖懩擿攄擽擸攁攃擼斔旛曚曛曘櫅檹檽 +0xefe0: 櫡櫆檺檶檷櫇檴檭歞毉氋瀇瀌瀍瀁瀅瀔瀎濿瀀濻瀦濼濷瀊爁燿燹爃燽獶 +0xf040: 璸瓀璵瓁璾璶璻瓂甔甓癜癤癙癐癓癗癚皦皽盬矂瞺磿礌礓礔礉礐礒礑禭禬 +0xf060: 穟簜簩簙簠簟簭簝簦簨簢簥簰繜繐繖繣繘繢繟繑繠繗繓羵羳翷翸聵臑臒 +0xf0a0: 臐艟艞薴藆藀藃藂薳薵薽藇藄薿藋藎藈藅薱薶藒蘤薸薷薾虩蟧蟦蟢蟛蟫 +0xf0c0: 蟪蟥蟟蟳蟤蟔蟜蟓蟭蟘蟣螤蟗蟙蠁蟴蟨蟝襓襋襏襌襆襐襑襉謪謧謣謳謰謵 +0xf0e0: 譇謯謼謾謱謥謷謦謶謮謤謻謽謺豂豵貙貘貗賾贄贂贀蹜蹢蹠蹗蹖蹞蹥蹧 +0xf140: 蹛蹚蹡蹝蹩蹔轆轇轈轋鄨鄺鄻鄾醨醥醧醯醪鎵鎌鎒鎷鎛鎝鎉鎧鎎鎪鎞鎦鎕 +0xf160: 鎈鎙鎟鎍鎱鎑鎲鎤鎨鎴鎣鎥闒闓闑隳雗雚巂雟雘雝霣霢霥鞬鞮鞨鞫鞤鞪 +0xf1a0: 鞢鞥韗韙韖韘韺顐顑顒颸饁餼餺騏騋騉騍騄騑騊騅騇騆髀髜鬈鬄鬅鬩鬵 +0xf1c0: 魊魌魋鯇鯆鯃鮿鯁鮵鮸鯓鮶鯄鮹鮽鵜鵓鵏鵊鵛鵋鵙鵖鵌鵗鵒鵔鵟鵘鵚麎麌 +0xf1e0: 黟鼁鼀鼖鼥鼫鼪鼩鼨齌齕儴儵劖勷厴嚫嚭嚦嚧嚪嚬壚壝壛夒嬽嬾嬿巃幰 +0xf240: 徿懻攇攐攍攉攌攎斄旞旝曞櫧櫠櫌櫑櫙櫋櫟櫜櫐櫫櫏櫍櫞歠殰氌瀙瀧瀠瀖 +0xf260: 瀫瀡瀢瀣瀩瀗瀤瀜瀪爌爊爇爂爅犥犦犤犣犡瓋瓅璷瓃甖癠矉矊矄矱礝礛 +0xf2a0: 礡礜礗礞禰穧穨簳簼簹簬簻糬糪繶繵繸繰繷繯繺繲繴繨罋罊羃羆羷翽翾 +0xf2c0: 聸臗臕艤艡艣藫藱藭藙藡藨藚藗藬藲藸藘藟藣藜藑藰藦藯藞藢蠀蟺蠃蟶蟷 +0xf2e0: 蠉蠌蠋蠆蟼蠈蟿蠊蠂襢襚襛襗襡襜襘襝襙覈覷覶觶譐譈譊譀譓譖譔譋譕 +0xf340: 譑譂譒譗豃豷豶貚贆贇贉趬趪趭趫蹭蹸蹳蹪蹯蹻軂轒轑轏轐轓辴酀鄿醰醭 +0xf360: 鏞鏇鏏鏂鏚鏐鏹鏬鏌鏙鎩鏦鏊鏔鏮鏣鏕鏄鏎鏀鏒鏧镽闚闛雡霩霫霬霨霦 +0xf3a0: 鞳鞷鞶韝韞韟顜顙顝顗颿颽颻颾饈饇饃馦馧騚騕騥騝騤騛騢騠騧騣騞騜 +0xf3c0: 騔髂鬋鬊鬎鬌鬷鯪鯫鯠鯞鯤鯦鯢鯰鯔鯗鯬鯜鯙鯥鯕鯡鯚鵷鶁鶊鶄鶈鵱鶀鵸 +0xf3e0: 鶆鶋鶌鵽鵫鵴鵵鵰鵩鶅鵳鵻鶂鵯鵹鵿鶇鵨麔麑黀黼鼭齀齁齍齖齗齘匷嚲 +0xf440: 嚵嚳壣孅巆巇廮廯忀忁懹攗攖攕攓旟曨曣曤櫳櫰櫪櫨櫹櫱櫮櫯瀼瀵瀯瀷瀴 +0xf460: 瀱灂瀸瀿瀺瀹灀瀻瀳灁爓爔犨獽獼璺皫皪皾盭矌矎矏矍矲礥礣礧礨礤礩 +0xf4a0: 禲穮穬穭竷籉籈籊籇籅糮繻繾纁纀羺翿聹臛臙舋艨艩蘢藿蘁藾蘛蘀藶蘄 +0xf4c0: 蘉蘅蘌藽蠙蠐蠑蠗蠓蠖襣襦覹觷譠譪譝譨譣譥譧譭趮躆躈躄轙轖轗轕轘轚 +0xf4e0: 邍酃酁醷醵醲醳鐋鐓鏻鐠鐏鐔鏾鐕鐐鐨鐙鐍鏵鐀鏷鐇鐎鐖鐒鏺鐉鏸鐊鏿 +0xf540: 鏼鐌鏶鐑鐆闞闠闟霮霯鞹鞻韽韾顠顢顣顟飁飂饐饎饙饌饋饓騲騴騱騬騪騶 +0xf560: 騩騮騸騭髇髊髆鬐鬒鬑鰋鰈鯷鰅鰒鯸鱀鰇鰎鰆鰗鰔鰉鶟鶙鶤鶝鶒鶘鶐鶛 +0xf5a0: 鶠鶔鶜鶪鶗鶡鶚鶢鶨鶞鶣鶿鶩鶖鶦鶧麙麛麚黥黤黧黦鼰鼮齛齠齞齝齙龑 +0xf5c0: 儺儹劘劗囃嚽嚾孈孇巋巏廱懽攛欂櫼欃櫸欀灃灄灊灈灉灅灆爝爚爙獾甗癪 +0xf5e0: 矐礭礱礯籔籓糲纊纇纈纋纆纍罍羻耰臝蘘蘪蘦蘟蘣蘜蘙蘧蘮蘡蘠蘩蘞蘥 +0xf640: 蠩蠝蠛蠠蠤蠜蠫衊襭襩襮襫觺譹譸譅譺譻贐贔趯躎躌轞轛轝酆酄酅醹鐿鐻 +0xf660: 鐶鐩鐽鐼鐰鐹鐪鐷鐬鑀鐱闥闤闣霵霺鞿韡顤飉飆飀饘饖騹騽驆驄驂驁騺 +0xf6a0: 騿髍鬕鬗鬘鬖鬺魒鰫鰝鰜鰬鰣鰨鰩鰤鰡鶷鶶鶼鷁鷇鷊鷏鶾鷅鷃鶻鶵鷎鶹 +0xf6c0: 鶺鶬鷈鶱鶭鷌鶳鷍鶲鹺麜黫黮黭鼛鼘鼚鼱齎齥齤龒亹囆囅囋奱孋孌巕巑廲 +0xf6e0: 攡攠攦攢欋欈欉氍灕灖灗灒爞爟犩獿瓘瓕瓙瓗癭皭礵禴穰穱籗籜籙籛籚 +0xf740: 糴糱纑罏羇臞艫蘴蘵蘳蘬蘲蘶蠬蠨蠦蠪蠥襱覿覾觻譾讄讂讆讅譿贕躕躔躚 +0xf760: 躒躐躖躗轠轢酇鑌鑐鑊鑋鑏鑇鑅鑈鑉鑆霿韣顪顩飋饔饛驎驓驔驌驏驈驊 +0xf7a0: 驉驒驐髐鬙鬫鬻魖魕鱆鱈鰿鱄鰹鰳鱁鰼鰷鰴鰲鰽鰶鷛鷒鷞鷚鷋鷐鷜鷑鷟 +0xf7c0: 鷩鷙鷘鷖鷵鷕鷝麶黰鼵鼳鼲齂齫龕龢儽劙壨壧奲孍巘蠯彏戁戃戄攩攥斖曫 +0xf7e0: 欑欒欏毊灛灚爢玂玁玃癰矔籧籦纕艬蘺虀蘹蘼蘱蘻蘾蠰蠲蠮蠳襶襴襳觾 +0xf840: 讌讎讋讈豅贙躘轤轣醼鑢鑕鑝鑗鑞韄韅頀驖驙鬞鬟鬠鱒鱘鱐鱊鱍鱋鱕鱙鱌 +0xf860: 鱎鷻鷷鷯鷣鷫鷸鷤鷶鷡鷮鷦鷲鷰鷢鷬鷴鷳鷨鷭黂黐黲黳鼆鼜鼸鼷鼶齃齏 +0xf8a0: 齱齰齮齯囓囍孎屭攭曭曮欓灟灡灝灠爣瓛瓥矕礸禷禶籪纗羉艭虃蠸蠷蠵 +0xf8c0: 衋讔讕躞躟躠躝醾醽釂鑫鑨鑩雥靆靃靇韇韥驞髕魙鱣鱧鱦鱢鱞鱠鸂鷾鸇鸃 +0xf8e0: 鸆鸅鸀鸁鸉鷿鷽鸄麠鼞齆齴齵齶囔攮斸欘欙欗欚灢爦犪矘矙礹籩籫糶纚 +0xf940: 纘纛纙臠臡虆虇虈襹襺襼襻觿讘讙躥躤躣鑮鑭鑯鑱鑳靉顲饟鱨鱮鱭鸋鸍鸐 +0xf960: 鸏鸒鸑麡黵鼉齇齸齻齺齹圞灦籯蠼趲躦釃鑴鑸鑶鑵驠鱴鱳鱱鱵鸔鸓黶鼊 +0xf9a0: 龤灨灥糷虪蠾蠽蠿讞貜躩軉靋顳顴飌饡馫驤驦驧鬤鸕鸗齈戇欞爧虌躨钂 +0xf9c0: 钀钁驩驨鬮鸙爩虋讟钃鱹麷癵驫鱺鸝灩灪麤齾齉龘碁銹裏墻恒粧嫺╔╦╗ +0xf9e0: ╠╬╣╚╩╝╒╤╕╞╪╡╘╧╛╓╥╖╟╫╢╙╨╜║═ ▓ diff --git a/t/big5-hkscs.enc b/t/big5-hkscs.enc new file mode 100644 index 0000000..8270539 --- /dev/null +++ b/t/big5-hkscs.enc @@ -0,0 +1,590 @@ +0x8840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x8860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x88a0: �������������������� +0x8940: �@�A �C �F�G�H�I �L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x8960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x89a0: ������������ �������� ������ ���������������������� +0x89c0: ���‰� �ʼnƉljȉɉʉˉ͉̉ΉωЉщ҉ӉԉՉ։׉؉ىډۉ܉݉މ� +0x89e0: ������������������������������������������� +0x8a40: �@�A �C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x8a60: �`�a�b �d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t �v�w�x�y�z�{�|�}�~ +0x8aa0: �������������������� ���������� ���������������� ���������� +0x8ac0: �����ŠÊĊŊƊ� �Ɋʊˊ� �ΊϊЊъҊӊԊՊ֊׊؊يڊۊ� �� +0x8ae0: ������������������������ ������������������ +0x8b40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S �U�V�W�X�Y�Z�[�\�]�^�_ +0x8b60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x8ba0: �������������������������������������������������������������� +0x8bc0: �����‹ËċŋƋNjȋɋʋˋ̋͋΋ϋЋыҋӋԋՋ֋׋؋ًڋۋ� �ދ� +0x8be0: ����������������������������������������� +0x8c40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x8c60: �`�a �c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x8ca0: ���������� �������������������������������������������������� +0x8cc0: �����ŒÌČ� �Ɍʌˌ� �ΌόЌьҌӌԌՌ֌׌،ٌ� �� +0x8d60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x8da0: �������������������������������������������������������������� +0x8dc0: �����ÍčōƍǍȍɍʍˍ͍̍΍ύЍэҍӍԍՍ֍׍؍ٍڍۍ܍ݍލ� +0x8de0: ������������������������������������������� +0x8e40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x8e60: �`�a�b�c�d�e�f�g�h �j�k�l�m�n �p�q�r�s�t�u�v�w�x�y�z�{�|�} +0x8ea0: �������������������� ���������������� ���������������������� +0x8ec0: �����ŽÎĎŎƎǎȎɎʎˎ� �Ύ� �юҎӎԎՎ֎׎؎َڎێ܎ݎގ� +0x8ee0: ������������������������������������������� +0x8f40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V �X�Y�Z�[�\�]�^�_ +0x8f60: �`�a�b�c�d�e�f�g�h �j�k�l�m �o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x8fa0: �������������������������������������������������������������� +0x8fc0: �����ÏďŏƏǏȏɏ� �͏ΏϏЏяҏӏԏՏ֏׏؏ُڏۏ܏ݏޏ� +0x8fe0: ����������������������������������������� +0x9040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9060: �`�a�b�c�d�e�f�g�h�i�j�k�l �n�o�p�q�r�s�t�u�v�w�x�y �{�|�}�~ +0x90a0: �������������������������������������������������������������� +0x90c0: �����ÐĐŐƐǐȐɐʐː̐͐ΐϐАѐҐӐԐՐ֐אِؐڐ� �ݐސ� +0x90e0: ������������������� ������������������������ +0x9140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x91a0: ������������������������������������������������������������ +0x91c0: �����‘ÑđőƑǑȑɑʑˑ̑͑ΑϑБёґӑԑՑ֑בّؑڑۑܑݑޑ� +0x91e0: ������������������������������������������� +0x9240: �@�A�B�C �E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x92a0: ���������������������������� �������������������������� +0x92c0: �����’ÒĒŒƒ� �ɒʒ˒̒͒Βϒ� �ҒӒԒՒ֒גْؒڒےܒݒޒ� +0x92e0: ������������������������������������������� +0x9340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x93a0: �������������������������������������������������������������� +0x93c0: �����“ÓēœƓǓȓɓʓ˓͓̓ΓϓГѓғӓԓՓ֓דؓٓړۓܓݓޓ� +0x93e0: ������������������������������������������� +0x9440: �@�A�B�C�D�E�F �H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x94a0: �������������������������������������������������������������� +0x94c0: �����”ÔĔŔƔǔȔ� �˔͔̔ΔϔДєҔӔԔՔ֔הؔٔڔ۔ܔݔޔ� +0x94e0: ������������������������������������������� +0x9540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x95a0: �������������������������������������������������������������� +0x95c0: �����•ÕĕŕƕǕȕɕʕ˕͕̕ΕϕЕѕҕӕԕՕ֕ו� �ڕەܕݕޕ� +0x95e0: ������������������������������������������� +0x9640: �@�A�B�C �E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x96a0: �������������������������������������������������������������� +0x96c0: �����–ÖĖŖƖǖȖɖʖ˖̖͖ΖϖЖіҖӖԖՖ֖זٖؖږۖܖݖޖ� +0x96e0: ��������������� ���������������������� ���� +0x9740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x97a0: �������������������������������������������������������������� +0x97c0: �����—×ėŗƗǗȗɗʗ˗̗͗ΗϗЗїҗӗԗ՗֗חؗٗڗۗܗݗޗ� +0x97e0: ������������������������������������������� +0x9840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x98a0: �������������������������������������������������������������� +0x98c0: �����˜ØĘŘƘǘȘɘʘ˘̘͘ΘϘИјҘӘԘ՘֘טؘ٘ژۘܘݘޘ� +0x98e0: ������������������������������������������� +0x9940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x99a0: �������������������������������������������������������������� +0x99c0: �����™ÙęřƙǙșəʙ˙̙͙ΙϙЙљҙәԙՙ֙יؙٙڙۙܙݙޙ� +0x99e0: ������������������������������������������� +0x9a40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9a60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x9aa0: �������������������������������������������������������������� +0x9ac0: �����šÚĚŚƚǚȚɚʚ˚͚̚ΚϚКњҚӚԚ՚֚ךؚٚښۚܚݚޚ� +0x9ae0: ������������������������������������������� +0x9b40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9b60: �` �b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u �w �y�z �|�}�~ +0x9ba0: �������������������������������������������������������������� +0x9bc0: �����›Ûě� �Ǜțɛʛ˛̛͛ΛϛЛћқӛԛ՛֛כ؛ٛڛۛܛ� �� +0x9be0: �������������� ����������� ���������������� +0x9c40: �@�A �C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R �T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9c60: �`�a �c�d�e�f�g �i�j �l�m�n�o�p�q�r�s�t�u�v �x�y�z�{�|�}�~ +0x9ca0: ������������������������������������������������������ ���� +0x9cc0: �����œÜĜŜƜǜȜɜʜ˜̜͜Μ� �ќҜӜԜ՜֜ל؜ٜڜۜܜݜޜ� +0x9ce0: ������������������������������������������� +0x9d40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V �X�Y �[�\�]�^�_ +0x9d60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x9da0: �������������������������������������������������������������� +0x9dc0: ������ �ŝƝǝȝɝʝ˝̝͝ΝϝНѝҝӝԝ՝֝ם؝ٝڝ۝ܝݝޝ� +0x9de0: ������������������������������������������� +0x9e40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9e60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x9ea0: ���������������� �������������������������������������������� +0x9ec0: �����žÞĞŞƞǞȞɞʞ˞̞͞ΞϞОўҞӞԞ՞֞מ؞ٞڞ۞ܞݞޞ� +0x9ee0: ����������������� ���������������������� �� +0x9f40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0x9f60: �a�b�c�d�e �g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0x9fa0: �������������������������������������������������������������� +0x9fc0: �����ŸßğşƟǟȟɟ� �̟͟ΟϟПџҟӟԟ՟֟� �ٟڟ۟ܟݟޟ� +0x9fe0: ������������������������������������������� +0xa040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa060: �`�a�b �d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v �x�y�z�{�|�}�~ +0xa0a0: �������������������������������������������������������������� +0xa0c0: ����� àĠŠƠǠȠɠʠˠ̠͠ΠϠРѠҠӠ� �֠נؠ٠ڠ۠ܠݠ� +0xa0e0: ����� �������������������������������������� +0xa140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa1a0: �������������������������������������������������������������� +0xa1c0: �����¡áġšơǡȡɡʡˡ̡͡ΡϡСѡҡӡԡա֡סء١ڡۡܡݡޡ� +0xa1e0: ������������������������������������������� +0xa240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa2a0: ������ ������������������������������������������������ +0xa2c0: �����¢âĢŢƢǢȢɢʢˢ̢͢΢ϢТѢҢӢԢբ֢עآ٢ڢۢܢݢޢ� +0xa2e0: ������������������������������������������� +0xa340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa3a0: �������������������������������������������������������������� +0xa3e0: �� +0xa440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa4a0: �������������������������������������������������������������� +0xa4c0: �����¤äĤŤƤǤȤɤʤˤ̤ͤΤϤФѤҤӤԤդ֤פؤ٤ڤۤܤݤޤ� +0xa4e0: ������������������������������������������� +0xa540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa5a0: �������������������������������������������������������������� +0xa5c0: �����¥åĥťƥǥȥɥʥ˥̥ͥΥϥХѥҥӥԥե֥ץإ٥ڥۥܥݥޥ� +0xa5e0: ������������������������������������������� +0xa640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa6a0: �������������������������������������������������������������� +0xa6c0: �����¦æĦŦƦǦȦɦʦ˦̦ͦΦϦЦѦҦӦԦզ֦צئ٦ڦۦܦݦަ� +0xa6e0: ������������������������������������������� +0xa740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa7a0: �������������������������������������������������������������� +0xa7c0: �����§çħŧƧǧȧɧʧ˧̧ͧΧϧЧѧҧӧԧէ֧קا٧ڧۧܧݧާ� +0xa7e0: ������������������������������������������� +0xa840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa8a0: �������������������������������������������������������������� +0xa8c0: �����¨èĨŨƨǨȨɨʨ˨̨ͨΨϨШѨҨӨԨը֨רب٨ڨۨܨݨި� +0xa8e0: ������������������������������������������� +0xa940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xa960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xa9a0: �������������������������������������������������������������� +0xa9c0: �����©éĩũƩǩȩɩʩ˩̩ͩΩϩЩѩҩөԩթ֩שة٩ک۩ܩݩީ� +0xa9e0: ������������������������������������������� +0xaa40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xaa60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaaa0: �������������������������������������������������������������� +0xaac0: �����ªêĪŪƪǪȪɪʪ˪̪ͪΪϪЪѪҪӪԪժ֪תت٪ڪ۪ܪݪު� +0xaae0: ������������������������������������������� +0xab40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xab60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaba0: �������������������������������������������������������������� +0xabc0: �����«ëīūƫǫȫɫʫ˫̫ͫΫϫЫѫҫӫԫի֫׫ث٫ګ۫ܫݫޫ� +0xabe0: ������������������������������������������� +0xac40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xac60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaca0: �������������������������������������������������������������� +0xacc0: �����¬ìĬŬƬǬȬɬʬˬ̬ͬάϬЬѬҬӬԬլ֬׬ج٬ڬ۬ܬݬެ� +0xace0: ������������������������������������������� +0xad40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xad60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xada0: �������������������������������������������������������������� +0xadc0: �����­íĭŭƭǭȭɭʭ˭̭ͭέϭЭѭҭӭԭխ֭׭ح٭ڭۭܭݭޭ� +0xade0: ������������������������������������������� +0xae40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xae60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xaea0: �������������������������������������������������������������� +0xaec0: �����®îĮŮƮǮȮɮʮˮ̮ͮήϮЮѮҮӮԮծ֮׮خٮڮۮܮݮޮ� +0xaee0: ������������������������������������������� +0xaf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xaf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xafa0: �������������������������������������������������������������� +0xafc0: �����¯ïįůƯǯȯɯʯ˯̯ͯίϯЯѯүӯԯկ֯ׯدٯگۯܯݯޯ� +0xafe0: ������������������������������������������� +0xb040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb0a0: �������������������������������������������������������������� +0xb0c0: �����°ðİŰưǰȰɰʰ˰̰ͰΰϰаѰҰӰ԰հְװذٰڰ۰ܰݰް� +0xb0e0: ������������������������������������������� +0xb140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb1a0: �������������������������������������������������������������� +0xb1c0: �����±ñıűƱDZȱɱʱ˱̱ͱαϱбѱұӱԱձֱױرٱڱ۱ܱݱޱ� +0xb1e0: ������������������������������������������� +0xb240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb2a0: �������������������������������������������������������������� +0xb2c0: �����²òIJŲƲDzȲɲʲ˲̲ͲβϲвѲҲӲԲղֲײزٲڲ۲ܲݲ޲� +0xb2e0: ������������������������������������������� +0xb340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb3a0: �������������������������������������������������������������� +0xb3c0: �����³óijųƳdzȳɳʳ˳̳ͳγϳгѳҳӳԳճֳ׳سٳڳ۳ܳݳ޳� +0xb3e0: ������������������������������������������� +0xb440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb4a0: �������������������������������������������������������������� +0xb4c0: �����´ôĴŴƴǴȴɴʴ˴̴ʹδϴдѴҴӴԴմִ״شٴڴ۴ܴݴ޴� +0xb4e0: ������������������������������������������� +0xb540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb5a0: �������������������������������������������������������������� +0xb5c0: �����µõĵŵƵǵȵɵʵ˵̵͵εϵеѵҵӵԵյֵ׵صٵڵ۵ܵݵ޵� +0xb5e0: ������������������������������������������� +0xb640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb6a0: �������������������������������������������������������������� +0xb6c0: �����¶öĶŶƶǶȶɶʶ˶̶Ͷζ϶жѶҶӶԶնֶ׶ضٶڶ۶ܶݶ޶� +0xb6e0: ������������������������������������������� +0xb740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb7a0: �������������������������������������������������������������� +0xb7c0: �����·÷ķŷƷǷȷɷʷ˷̷ͷηϷзѷҷӷԷշַ׷طٷڷ۷ܷݷ޷� +0xb7e0: ������������������������������������������� +0xb840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb8a0: �������������������������������������������������������������� +0xb8c0: �����¸øĸŸƸǸȸɸʸ˸̸͸θϸиѸҸӸԸոָ׸ظٸڸ۸ܸݸ޸� +0xb8e0: ������������������������������������������� +0xb940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xb960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xb9a0: �������������������������������������������������������������� +0xb9c0: �����¹ùĹŹƹǹȹɹʹ˹̹͹ιϹйѹҹӹԹչֹ׹عٹڹ۹ܹݹ޹� +0xb9e0: ������������������������������������������� +0xba40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xba60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbaa0: �������������������������������������������������������������� +0xbac0: �����ºúĺźƺǺȺɺʺ˺̺ͺκϺкѺҺӺԺպֺ׺غٺںۺܺݺ޺� +0xbae0: ������������������������������������������� +0xbb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbba0: �������������������������������������������������������������� +0xbbc0: �����»ûĻŻƻǻȻɻʻ˻̻ͻλϻлѻһӻԻջֻ׻ػٻڻۻܻݻ޻� +0xbbe0: ������������������������������������������� +0xbc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbc60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbca0: �������������������������������������������������������������� +0xbcc0: �����¼üļżƼǼȼɼʼ˼̼ͼμϼмѼҼӼԼռּ׼ؼټڼۼܼݼ޼� +0xbce0: ������������������������������������������� +0xbd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbda0: �������������������������������������������������������������� +0xbdc0: �����½ýĽŽƽǽȽɽʽ˽̽ͽνϽнѽҽӽԽսֽ׽ؽٽڽ۽ܽݽ޽� +0xbde0: ������������������������������������������� +0xbe40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbe60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbea0: �������������������������������������������������������������� +0xbec0: �����¾þľžƾǾȾɾʾ˾̾;ξϾоѾҾӾԾվ־׾ؾپھ۾ܾݾ޾� +0xbee0: ������������������������������������������� +0xbf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xbf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xbfa0: �������������������������������������������������������������� +0xbfc0: �����¿ÿĿſƿǿȿɿʿ˿̿ͿοϿпѿҿӿԿտֿ׿ؿٿڿۿܿݿ޿� +0xbfe0: ������������������������������������������� +0xc040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc0a0: �������������������������������������������������������������� +0xc0c0: ���������������������������������������������������������������� +0xc0e0: �������������������������������������������������������������� +0xc140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc1a0: �������������������������������������������������������������� +0xc1c0: ���������������������������������������������������������������� +0xc1e0: �������������������������������������������������������������� +0xc240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc2a0: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ +0xc2c0: ���������������������������������������������������������������� +0xc2e0: �������������������������������������������������������������� +0xc340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc3a0: áâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ +0xc3c0: ���������������������������������������������������������������� +0xc3e0: �������������������������������������������������������������� +0xc440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc4a0: ġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿ +0xc4c0: ���������������������������������������������������������������� +0xc4e0: �������������������������������������������������������������� +0xc540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc5a0: šŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ +0xc5c0: ���������������������������������������������������������������� +0xc5e0: �������������������������������������������������������������� +0xc640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc6a0: ơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿ +0xc6c0: ���������������������������������������������������������������� +0xc6e0: �������������������������������������������������������������� +0xc740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc7a0: ǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ +0xc7c0: ���������������������������������������������������������������� +0xc7e0: �������������������������������������������������������������� +0xc840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc8a0: ȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ +0xc8c0: ���������������������������������������������������������������� +0xc8e0: ������������������������������������ �������������������� +0xc940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xc960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xc9a0: ɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿ +0xc9c0: ���������������������������������������������������������������� +0xc9e0: �������������������������������������������������������������� +0xca40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xca60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcaa0: ʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯʰʱʲʳʴʵʶʷʸʹʺʻʼʽʾʿ +0xcac0: ���������������������������������������������������������������� +0xcae0: �������������������������������������������������������������� +0xcb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcba0: ˡˢˣˤ˥˦˧˨˩˪˫ˬ˭ˮ˯˰˱˲˳˴˵˶˷˸˹˺˻˼˽˾˿ +0xcbc0: ���������������������������������������������������������������� +0xcbe0: �������������������������������������������������������������� +0xcc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcc60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcca0: ̴̵̶̷̸̡̢̧̨̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼̽̾̿ +0xccc0: ���������������������������������������������������������������� +0xcce0: �������������������������������������������������������������� +0xcd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcda0: ͣͤͥͦͧͨͩͪͫͬͭͮͯ͢͡ͰͱͲͳʹ͵Ͷͷ͸͹ͺͻͼͽ;Ϳ +0xcdc0: ���������������������������������������������������������������� +0xcde0: �������������������������������������������������������������� +0xce40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xce60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcea0: Ρ΢ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξο +0xcec0: ���������������������������������������������������������������� +0xcee0: �������������������������������������������������������������� +0xcf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xcf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xcfa0: ϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿ +0xcfc0: ���������������������������������������������������������������� +0xcfe0: �������������������������������������������������������������� +0xd040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd0a0: СТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп +0xd0c0: ���������������������������������������������������������������� +0xd0e0: �������������������������������������������������������������� +0xd140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd1a0: ѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿ +0xd1c0: ���������������������������������������������������������������� +0xd1e0: �������������������������������������������������������������� +0xd240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd2a0: ҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿ +0xd2c0: ���������������������������������������������������������������� +0xd2e0: �������������������������������������������������������������� +0xd340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd3a0: ӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿ +0xd3c0: ���������������������������������������������������������������� +0xd3e0: �������������������������������������������������������������� +0xd440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd4a0: ԡԢԣԤԥԦԧԨԩԪԫԬԭԮԯ԰ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿ +0xd4c0: ���������������������������������������������������������������� +0xd4e0: �������������������������������������������������������������� +0xd540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd5a0: աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտ +0xd5c0: ���������������������������������������������������������������� +0xd5e0: �������������������������������������������������������������� +0xd640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd6a0: ְֱֲֳִֵֶַָֹֺֻּֽ֢֣֤֥֦֧֪֭֮֡֨֩֫֬֯־ֿ +0xd6c0: ���������������������������������������������������������������� +0xd6e0: �������������������������������������������������������������� +0xd740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd7a0: סעףפץצקרשת׫׬׭׮ׯװױײ׳״׵׶׷׸׹׺׻׼׽׾׿ +0xd7c0: ���������������������������������������������������������������� +0xd7e0: �������������������������������������������������������������� +0xd840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd8a0: ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿ +0xd8c0: ���������������������������������������������������������������� +0xd8e0: �������������������������������������������������������������� +0xd940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xd960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xd9a0: ١٢٣٤٥٦٧٨٩٪٫٬٭ٮٯٰٱٲٳٴٵٶٷٸٹٺٻټٽپٿ +0xd9c0: ���������������������������������������������������������������� +0xd9e0: �������������������������������������������������������������� +0xda40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xda60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdaa0: ڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿ +0xdac0: ���������������������������������������������������������������� +0xdae0: �������������������������������������������������������������� +0xdb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdba0: ۣۡۢۤۥۦۧۨ۩۪ۭ۫۬ۮۯ۰۱۲۳۴۵۶۷۸۹ۺۻۼ۽۾ۿ +0xdbc0: ���������������������������������������������������������������� +0xdbe0: �������������������������������������������������������������� +0xdc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdc60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdca0: ܡܢܣܤܥܦܧܨܩܪܫܬܭܮܯܱܴܷܸܹܻܼܾܰܲܳܵܶܺܽܿ +0xdcc0: ���������������������������������������������������������������� +0xdce0: �������������������������������������������������������������� +0xdd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdda0: ݡݢݣݤݥݦݧݨݩݪݫݬݭݮݯݰݱݲݳݴݵݶݷݸݹݺݻݼݽݾݿ +0xddc0: ���������������������������������������������������������������� +0xdde0: �������������������������������������������������������������� +0xde40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xde60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdea0: ޡޢޣޤޥަާިީުޫެޭޮޯްޱ޲޳޴޵޶޷޸޹޺޻޼޽޾޿ +0xdec0: ���������������������������������������������������������������� +0xdee0: �������������������������������������������������������������� +0xdf40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xdf60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xdfa0: ߡߢߣߤߥߦߧߨߩߪ߲߫߬߭߮߯߰߱߳ߴߵ߶߷߸߹ߺ߻߼߽߾߿ +0xdfc0: ���������������������������������������������������������������� +0xdfe0: �������������������������������������������������������������� +0xe040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe0a0: ������������������������������� +0xe0c0: ���������������������������������������������������������������� +0xe0e0: �������������������������������������������������������������� +0xe140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe1a0: ������������������������������� +0xe1c0: ���������������������������������������������������������������� +0xe1e0: �������������������������������������������������������������� +0xe240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe2a0: ������������������������������� +0xe2c0: ���������������������������������������������������������������� +0xe2e0: �������������������������������������������������������������� +0xe340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe3a0: ������������������������������� +0xe3c0: ���������������������������������������������������������������� +0xe3e0: �������������������������������������������������������������� +0xe440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe4a0: ������������������������������� +0xe4c0: ���������������������������������������������������������������� +0xe4e0: �������������������������������������������������������������� +0xe540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe5a0: ������������������������������� +0xe5c0: ���������������������������������������������������������������� +0xe5e0: �������������������������������������������������������������� +0xe640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe6a0: ������������������������������� +0xe6c0: ���������������������������������������������������������������� +0xe6e0: �������������������������������������������������������������� +0xe740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe7a0: ������������������������������� +0xe7c0: ���������������������������������������������������������������� +0xe7e0: �������������������������������������������������������������� +0xe840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe8a0: ������������������������������� +0xe8c0: ���������������������������������������������������������������� +0xe8e0: �������������������������������������������������������������� +0xe940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xe960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xe9a0: ������������������������������� +0xe9c0: ���������������������������������������������������������������� +0xe9e0: �������������������������������������������������������������� +0xea40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xea60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeaa0: ������������������������������� +0xeac0: ���������������������������������������������������������������� +0xeae0: �������������������������������������������������������������� +0xeb40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xeb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeba0: ������������������������������� +0xebc0: ���������������������������������������������������������������� +0xebe0: �������������������������������������������������������������� +0xec40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xec60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeca0: ������������������������������� +0xecc0: ���������������������������������������������������������������� +0xece0: �������������������������������������������������������������� +0xed40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xed60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeda0: �������������������������������������������������������������� +0xedc0: ���������������������������������������������������������������� +0xede0: �������������������������������������������������������������� +0xee40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xee60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xeea0: ������������������������������� +0xeec0: ���������������������������������������������������������������� +0xeee0: �������������������������������������������������������������� +0xef40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xef60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xefa0: ������������������������������� +0xefc0: ���������������������������������������������������������������� +0xefe0: �������������������������������������������������������������� +0xf040: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf060: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf0a0: ������������������������������� +0xf0c0: ���������������������������������������������������������������� +0xf0e0: �������������������������������������������������������������� +0xf140: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf160: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf1a0: ������������������������������� +0xf1c0: ���������������������������������������������������������������� +0xf1e0: �������������������������������������������������������������� +0xf240: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf260: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf2a0: ������������������������������� +0xf2c0: ���������������������������������������������������������������� +0xf2e0: �������������������������������������������������������������� +0xf340: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf360: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf3a0: ������������������������������� +0xf3c0: ���������������������������������������������������������������� +0xf3e0: �������������������������������������������������������������� +0xf440: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf460: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf4a0: �������������������������������������������������������������� +0xf4c0: ���������������������������������������������������������������� +0xf4e0: �������������������������������������������������������������� +0xf540: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf560: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf5a0: �������������������������������������������������������������� +0xf5c0: ���������������������������������������������������������������� +0xf5e0: �������������������������������������������������������������� +0xf640: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf660: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf6a0: �������������������������������������������������������������� +0xf6c0: ���������������������������������������������������������������� +0xf6e0: �������������������������������������������������������������� +0xf740: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf760: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf7a0: �������������������������������������������������������������� +0xf7c0: ���������������������������������������������������������������� +0xf7e0: �������������������������������������������������������������� +0xf840: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf860: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf8a0: �������������������������������������������������������������� +0xf8c0: ���������������������������������������������������������������� +0xf8e0: �������������������������������������������������������������� +0xf940: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xf960: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xf9a0: �������������������������������������������������������������� +0xf9c0: ���������������������������������������������������������������� +0xf9e0: ���������������������������������������������������� �� +0xfa40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^ +0xfa60: �`�a�b�c�d�e �g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xfaa0: �������������������������������������������������������� ���� +0xfac0: ���������� ������������������������������ �������������������� +0xfae0: �������������������������������������������������������������� +0xfb40: �@�A�B�C�D�E�F�G �I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xfb60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xfba0: ���������������������������������������������� �������������� +0xfbc0: ���������������������������������������������������������������� +0xfbe0: �������������������������������������� ���������� ���������� +0xfc40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N �P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xfc60: �`�a�b�c�d�e�f�g�h�i�j�k �m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xfca0: ������������������������������������������������ ������������ +0xfcc0: ���������������������������������������������������������������� +0xfce0: ���� ���������������������������� �������������������������� +0xfd40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q�R�S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xfd60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n�o�p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xfda0: �������������������������������������������� ���� �������� +0xfdc0: ���������������������������������������������������������������� +0xfde0: ���������������������������������� �������������������������� +0xfe40: �@�A�B�C�D�E�F�G�H�I�J�K�L�M�N�O�P�Q �S�T�U�V�W�X�Y�Z�[�\�]�^�_ +0xfe60: �`�a�b�c�d�e�f�g�h�i�j�k�l�m�n �p�q�r�s�t�u�v�w�x�y�z�{�|�}�~ +0xfea0: ������������������ ������������������������������������������ +0xfec0: ���������������������������������������������������������� ���� +0xfee0: �������������������������������������������������������������� diff --git a/t/big5-hkscs.utf b/t/big5-hkscs.utf new file mode 100644 index 0000000..73d7f63 --- /dev/null +++ b/t/big5-hkscs.utf @@ -0,0 +1,590 @@ +0x8840: ĀÁǍÀĒÉĚÈŌÓ +0x8860: ǑÒẾỀÊāáǎàɑēéěèīíǐìōóǒòūúǔùǖǘǚ +0x88a0: ǜüếềêɡ +0x8940:  攊 丽滝鵎釟 撑会伨侨兖兴农凤务动医华发变团声处备夲 +0x8960: 头学实実岚庆总斉柾栄桥济炼电纤纬纺织经统缆缷艺苏药视设询车轧轮 +0x89a0: 琑糼緍楆竉刧 醌碸酞肼 贋胶 肟黇䳍鷉鸌䰾鸊㗁 +0x89c0: 溚舾甙 䤑马骏龙禇两亁亀亇亿仫伷㑌侽㹈倃傈㑽㒓㒥円夅凛 +0x89e0: 凼刅争剹劐匧㗇厩㕑厰㕓参吣㕭㕲㚁咓咣咴咹哐哯唘唣唨㖘唿㖥㖿嗗㗅 +0x8a40: 唥 喐㧬蹆䁓睺㨴䟕擝撍蹾 +0x8a60:  䟴骲㩧㿭㔆鵮頕 䏙撴哣㧻 +0x8aa0: 擪蹨 䠋㿺塳 啹䂻䎺 䪴膪飵 +0x8ac0: 捹㧾跀嚡摼㹃  㦒㨆㕸噒㒼氽  +0x8ae0: 羓㗻㾓 㿹搲 +0x8b40: 㨘閪哌苄喹 鰦骶煀腭胬尜脴 +0x8b60: 㞗卟醶㗝㘉嚯婔垜 +0x8ba0: 㜃墙剨㘚箲孨䠀䬬鼧䧧鰟鮍嗻㗲嚉 +0x8bc0: 丨夂靑乛亻㔾尣彑忄㣺扌攵歺氵氺灬爫丬犭罒礻糹罓㓁 耂 +0x8be0: 肀卝衤见讠贝钅镸长门韦页风飞饣鱼鸟黄歯龜丷阝户钢 +0x8c40: 倻淾㷉袏灷峵䬠㕙愢辧釶熑朙玺㲋䬐磤琂冮䀉橣 +0x8c60: 䈣蘏 稪靕灍匤鏴盙矝亣俰傼丯众吴綋墒壐庒庙忂斋 +0x8ca0: 椙橃泿 爀玌㻛嬕璹讃窓篬糃繬苸薗袐躹迏蕟駠鈡 +0x8cc0: 䁱䊢娚 顨杫䉶圽 藖芿䲁嵻宖 繛 +0x8d60: 崾嵈嵖㷼㠏嶤嶹㠠㠸幂庽弥徃㤈㤔㤿㥍惗愽峥㦉憷憹懏㦸戬抐拥挘㧸嚱 +0x8da0: 㨃揢揻搇摚㩋擀崕嘡龟㪗斆㪽旿晓㫲暒㬢朖㭂枤栀㭘桊梄㭲㭱㭻椉楃牜 +0x8dc0: 楤榟榅㮼槖㯝橥橴橱檂㯬檙㯲檫檵櫔櫶殁毁毪汵沪㳋洂洆洦涁㳯涤涱渕渘 +0x8de0: 温溆溻滢滚齿滨滩漤漴㵆澁澾㵪㵵熷岙㶊瀬㶑灐灔灯灿炉䏁㗱 +0x8e40: 垾焾㙎榢孴穉穥穽窻窰竂竃燑䇊竚竝竪䇯咲笋筕笩 +0x8e60: 箢筯莜篐萡箒 㶭蒒篺 簵籄粃粦晽糉糇糦籴糳糵 +0x8ea0: 繧䔝絝璍綉綫焵綳 緤㴓緵緥 繮纒䌫鑬縧罀罁罇礶 +0x8ec0: 駡羗羣䕜䔃翺 耈耝 耯耻耼聡䦉朥肧脇 +0x8ee0: 脚墰汿擧舘橓䑺舩俹蓢荢芪椛䇛 +0x8f40: 蕋苐茚㛁艻苢茘㶿茝嗬莅䔋莬 菓㑾橗蕚㒖 +0x8f60: 葘葱㷓䓤檧葊祘 蓞 莑䒠蒓蓤䉀䕃蔴嫲䔧蕳䔖枿蘖 +0x8fa0: 藁蘂䕪蘨㙈号虾蝱蟮螱蟚蠏噡虬桖䘏衅衆 +0x8fc0: 衞袜䙛袴袵揁装睷覇覊 覧覼觧誜瞓釾誐竩䜓煼謌謟 +0x8fe0: 謿譌譍誩讐讛誯䘕衏貛㜥賖贒贃賛灜贑㻐 +0x9040: 趩㭼竧躭躶軃鋔輙輭辥錃辳䤪廸迹 +0x9060: 㦀逷遡邨郄邮 酧㫰醩釄粬鈎沟鉁鉢  +0x90a0: 錬鍫炏嫃䥥鉄鍳鑛躼閅閦鐦閠濶䊹䧟氜陻隖 +0x90c0: 䅬隣懚隶磵隽双䦡霱虂霶䨏䔽䖅灵孁霛 靗孊 +0x90e0: 靟鐥僐鞉鞟鞱鞾韀韒韠韮琜 韵䫑頴頳顋顦㬎㵑 +0x9140: 飊颷飈飇䫿喰飡飦飬鍸餹䭲駵騌騻騐驘㛄髠髢髴䰎 +0x9160: 鬔鬭倴鬴㣃魐魀婅鮎鰂鯿鰌鷔鵾鶃鸎梈 +0x91a0: 鷄鴹麐麕麞麢䴴麪麯黁㭠㧥㴝伲㞾鼂鼈䮖鐤鼗 +0x91c0: 鼹嚟嚊齅馸韲葿齢齩竜龎爖䮾煷玞禟鍩鏳鋬鎁鏋 +0x91e0: 爗㻫睲穃烐煾炣㻇㜢㛡㜣坛 +0x9240: 蔃 葕䓴柹㜳㰕㷧塬栐䁗哋嚞 +0x9260: 嚒鏆鎜仸儫㠙亼佋侊婨㦙㐵伩諚亘 +0x92a0: 働儍侢伃佂倮偬傁俌俥偘僼 湶浲冨凃䓝 +0x92c0: 赺剙劤勡 䙺熌槑 㻞璙琔瑖玘䮎叐㖄爏喴 +0x92e0: 响圝鉝雴鍦埝垍坿㘾壋媙娬妸銏婾嫏娒㛵洅瑃娡 +0x9340: 媁鏠璌焅䥲鐈鎽㞠尞岞幞幈廍孏㜁㛝㛓脪 +0x9360: 弌弎婫孄蘔衠恾忛㺸懀憙憘恵 +0x93a0: 摱㨩挷撶挱揑护搻敫楲㯴唍曎 +0x93c0: 㫠䆐磮㑤暎晫䮓昰晣昞㣑 +0x93e0: 㮙瓐㮖枏梶栞㯄檾㡣樳橒櫉欅攑梘橌㯗橺歗鎠鋲 +0x9440: 銉鑧涥漋 㶏渄娽渊塇洤硂焻烱牐犇犔兹瑺 +0x9460: 㼆㺱悧㻳瓌琼鎇琷䒟䕑疃㽣㽘畕癳㬙瑨㫻 +0x94a0: 㷍㻿釺圲鍂僟睸眎眏睻㩞琸璛㺿䃈錇 +0x94c0: 砞碍碈磒珐祙䄎 蒖禥樭稺秴䅮䄲鈵秱㖗啫㕰㚪 +0x94e0: 竢婙娍磰娪竾䇹籝籭䈑糍粎籼粮檲緜縇緓罎 +0x9540: 綗䉪柖埄翝笧笌駦虅驣樜㧢騟蒀 +0x9560: 䓪脷䐂胆脉腂飃艢艥葓蘐媆䅿嬫嫤蚠蠭娂 +0x95a0: 衮佅袇袿裦襥襍襔㺭蒣䛵䛏㟲訽訜彍鈫旔焩烄鵭 +0x95c0: 貟賩妚矃姰䍮㛔踪躧輰轊䋴汘澻䢛潹溋鯩㚵邻 啱䤆醻鐄䁢 +0x95e0: 鐧蓥訫閙閧閗閖瑅㻂㻧随㻌琒瑫㻼靁 +0x9640: 桇䨝 鍨銺嬑譩䤼珹鞛靱餸巁頟鋶釥䓀 +0x9660: 飜㼀鈪䤥萔餻饍㷽馛䭯馪驜檏騡嫾騯䮐馼䮽䮗鍽塲堢 +0x96a0: 硄棅㵽鑘㤧慐愇鱏鱓鱻鰵鰐魿鯏鮟鴡䲮鸘䲰鴌 +0x96c0: 鶥蒽藼䔳萠藮秢䤭㵢鏛銾鍈碹鉷鑍俤㑀 +0x96e0: 遤砽硔碶硋㚚佲濚濙 瀞吔垻壳垊鴖埗焴㒯燫  +0x9740: 愌嫎娋䊼㜬䭻鎻鎸葲妔綨鋥珢㻩璴 +0x9760: 㻡櫘珳珻㻖瑈炥銄珦鍟錱鎆䤵煫 +0x97a0: 嚤唂秄緾䔮鐁㜊妰媡㛢㚰鉟婹鍴 +0x97c0: 㳍䪖㦊僴㵩㵌煵䋻渏䓫浗灧沯㳖渂漌㵯畑㚼㓈䚀㻚䡱姄 +0x97e0: 鉮䤾轁堒埈㛖烾梹楧樚萾䓟䓎 +0x9840: 漗茽菭妉媂婡婱㜭姯㛇熎鎐暚婮娫樫 +0x9860: 焝侰峂樌炦焳㶥泟繥姫崯㷳彜綤萦 +0x98a0: 咅坾㿥瀃嵰玏糓俈翧狍猐猸猹獁獈㺩遬燵 +0x98c0: 珡臶㻊県㻑沢国琙琞琟㻢㻰㻴㻺瓓㼎㽓畂畭畲疍㽼痈痜㿀癍㿗癴㿜発 +0x98e0: 熈嘣覀塩䀝睃䀹条䁅㗛瞘䁪䁯属瞾矋売砘点砜䂨砹硇硑硦葈礳栃礲䄃 +0x9940: 䄉禑禙辻稆込䅧窑䆲窼艹䇄竏竛䇏両筢筬筻簒簛䉠䉺类粜䊌粸䊔糭输烀 +0x9960: 総緔緐緽羮羴犟䎗耠耥笹耮耱联㷌垴炠肷胩䏭脌猪脎脒畠脔䐁㬹腖腙腚 +0x99a0: 䐓堺腼膄䐥膓䐭膥埯臁臤艔䒏芦艶苊苘苿䒰荗险榊萅烵葤惣蒈䔄蒾蓡蓸 +0x99c0: 蔐蔸蕒䔻蕯蕰藠䕷虲蚒蚲蛯际螋䘆䘗袮裿褤襇覑訩訸誔誴豑賔賲贜䞘塟 +0x99e0: 跃䟭仮踺嗘坔蹱嗵躰䠷軎転軤軭軲辷迁迊迌逳駄䢭飠鈓䤞鈨鉘鉫銱銮銿 +0x9a40: 鋣鋫鋳鋴鋽鍃鎄鎭䥅䥑麿鐗匁鐝鐭鐾䥪鑔鑹锭関䦧间阳䧥枠䨤靀䨵鞲韂噔 +0x9a60: 䫤惨颹䬙飱塄餎餙冴餜餷饂饝饢䭰駅䮝騼鬏窃魩鮁鯝鯱鯴䱭鰠㝯鵉鰺 +0x9aa0: 黾噐鶓鶽鷀鷼银辶鹻麬麱麽黆铜黢黱黸竈齄椚铃妬塀铁㞹 +0x9ac0: 块煳呪咞惧噺楕鰯螥 +0x9ae0: 尠帋朞㙇卤蒭讁乸炻 +0x9b40: 拃熘桕槩㛈苽覥辠鞸顇骽 +0x9b60:  徱晈暿   墵朎 +0x9ba0: 椘䣐䪸凒妟㮾垈㦛 +0x9bc0: 㝢譞 爉奥軚劏圿煱喼 㑳 +0x9be0: 䜘偦㓻 䝼垡煑 遖譢 +0x9c40: 嵛 諪䯀鑥憕娧 嚹乪陖涏㘘襷 +0x9c60:  筂 穅 騦㙟禃崬 䛐画补墶 +0x9ca0: 㜜㱔銁錰氹钟蠧裵溸㦤㚹 䔿暶 +0x9cc0: 襃囖䃟㦡熭荦 䲷筃祾澵樃 +0x9ce0: 厢鎿栶靝嶅圕頣嶫斾槕叒㰑朶 +0x9d40: 㗊䣺揦砈鉕䏲䏟姸  㷷 +0x9d60: 运犏嚋纟䲤镇熢䶑递䶜达嗁 +0x9da0: 辺边䔉繿潖檱仪㓤㜺躀㷫亚嚿踎 +0x9dc0: 孭揞 攰嘭吚㷆䱽嘢嘞罉奵蝰东脗鵞贘瘻鱅癎 +0x9de0: 瞹鍅吲腈苷嘥脲萘肽嗪祢噃吖㗎嘅嗱曱㘭甴嗰喺咗啲廐 +0x9e40: 麫絚嗞抝靭咔賍燶酶揼掹揾啩鱲冚㓟冧呍唞唓癦踭疱肶蠄螆 +0x9e60: 裇膶萜䓬猄宐茋噻酰鈈牦䝎䃺 +0x9ea0: 鱝攟䣳 熣纎鵐业丄㕷嬍沲卧㚬㧜卽㚥墚舭 +0x9ec0: 呋垪㩒獴䴉鯭䱛葜挮紥㨪逈勌㹴㙺䗩 +0x9ee0: 癀嫰硺䞶墧䂿噼鮋嵴癔麅䳡 㟻愙噝垧刴㖭 鵼 +0x9f40: 籖鬹埞屓擓蚭凾嶎霃麁遌笟鬂峑箣扨挵髿篏鬪籾 +0x9f60: 籂粆鰕篼鬉 鰛齚啳寃俽麘俲剠㸆勑坧偖妷帒韈鶫轜呩鞴饀鞺匬愰 +0x9fa0: 椬叚鰊鴂䰻陁榀傦畆駚剳酙隁酜酑捿櫊嘑醎畺抅獏籰 +0x9fc0: 盖鮝个莾衂届槀 坺刟巵从氱伹咜哚劚趂㗾 㗳歒酼龥鮗頮 +0x9fe0: 颴骺麨麄煺笔毺蠘罸嘠蹷齓跔蹏鸜踁抂踨蹵竓稾磘泪詧瘇 +0xa040: 鼦泎蟖痃硓贌狢獱謭猂瓱賫蘯徺袠䒷詾惽癧髗鵄鍮 +0xa060: 鮏蟵 賷猬霡鮰㗖犲䰇籑饊慙䰄麖慽坟慯抦戹 㩜懢厪捤栂㗒 +0xa0a0: 嵗迚僙礆匲阸䁥矾糂糚稭聦聣絍甅瓲覔舚朌聢聛瓰 +0xa0c0: 脃眤覉畓螩蟎臈螌詉貭譃眫瓸蓚㘵榲趦覩 涹蟁瓧㷛煶悤憜㳑 +0xa0e0: 恷罱 惩䭾删㰘峁䕢嬟齐麦 +0xa140:  ,、。.‧;:?!︰…‥﹐﹑﹒·﹔﹕﹖﹗|–︱—︳╴︴﹏()︵ +0xa160: ︶{}︷︸〔〕︹︺【】︻︼《》︽︾〈〉︿﹀「」﹁﹂『』﹃﹄﹙﹚ +0xa1a0: ﹛﹜﹝﹞‘’“”〝〞‵′#&*※§〃○●△▲◎☆★◇◆□■▽▼ +0xa1c0: ㊣℅¯ ̄_ˍ﹉﹊﹍﹎﹋﹌﹟﹠﹡+-×÷±√<>=≦≧≠∞≒≡﹢﹣ +0xa1e0: ﹤﹥﹦~∩∪⊥∠∟⊿㏒㏑∫∮∵∴♀♂⊕⊙↑↓←→↖↗↙↘∥∣/ +0xa240: \∕﹨$¥〒¢£%@℃℉﹩﹪﹫㏕㎜㎝㎞㏎㎡㎎㎏㏄°兙兛兞兝兡兣嗧 +0xa260: 瓩糎▁▂▃▄▅▆▇█▏▎▍▌▋▊▉┼┴┬┤├▔─│▕┌┐└┘╭ +0xa2a0: ╮╰╯ ◢◣◥◤╱╲╳0123456789ⅠⅡⅢⅣⅤⅥⅦ +0xa2c0: ⅧⅨⅩ〡〢〣〤〥〦〧〨〩〸〹〺ABCDEFGHIJKLMNOPQ +0xa2e0: RSTUVWXYZabcdefghijklmnopqrstuv +0xa340: wxyzΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδ +0xa360: εζηθικλμνξοπρστυφχψωㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏ +0xa3a0: ㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ˙ˉˊˇˋ +0xa3e0: € +0xa440: 一乙丁七乃九了二人儿入八几刀刁力匕十卜又三下丈上丫丸凡久么也乞于 +0xa460: 亡兀刃勺千叉口土士夕大女子孑孓寸小尢尸山川工己已巳巾干廾弋弓才 +0xa4a0: 丑丐不中丰丹之尹予云井互五亢仁什仃仆仇仍今介仄元允內六兮公冗凶 +0xa4c0: 分切刈勻勾勿化匹午升卅卞厄友及反壬天夫太夭孔少尤尺屯巴幻廿弔引心 +0xa4e0: 戈戶手扎支文斗斤方日曰月木欠止歹毋比毛氏水火爪父爻片牙牛犬王丙 +0xa540: 世丕且丘主乍乏乎以付仔仕他仗代令仙仞充兄冉冊冬凹出凸刊加功包匆北 +0xa560: 匝仟半卉卡占卯卮去可古右召叮叩叨叼司叵叫另只史叱台句叭叻四囚外 +0xa5a0: 央失奴奶孕它尼巨巧左市布平幼弁弘弗必戊打扔扒扑斥旦朮本未末札正 +0xa5c0: 母民氐永汁汀氾犯玄玉瓜瓦甘生用甩田由甲申疋白皮皿目矛矢石示禾穴立 +0xa5e0: 丞丟乒乓乩亙交亦亥仿伉伙伊伕伍伐休伏仲件任仰仳份企伋光兇兆先全 +0xa640: 共再冰列刑划刎刖劣匈匡匠印危吉吏同吊吐吁吋各向名合吃后吆吒因回囝 +0xa660: 圳地在圭圬圯圩夙多夷夸妄奸妃好她如妁字存宇守宅安寺尖屹州帆并年 +0xa6a0: 式弛忙忖戎戌戍成扣扛托收早旨旬旭曲曳有朽朴朱朵次此死氖汝汗汙江 +0xa6c0: 池汐汕污汛汍汎灰牟牝百竹米糸缶羊羽老考而耒耳聿肉肋肌臣自至臼舌舛 +0xa6e0: 舟艮色艾虫血行衣西阡串亨位住佇佗佞伴佛何估佐佑伽伺伸佃佔似但佣 +0xa740: 作你伯低伶余佝佈佚兌克免兵冶冷別判利刪刨劫助努劬匣即卵吝吭吞吾否 +0xa760: 呎吧呆呃吳呈呂君吩告吹吻吸吮吵吶吠吼呀吱含吟听囪困囤囫坊坑址坍 +0xa7a0: 均坎圾坐坏圻壯夾妝妒妨妞妣妙妖妍妤妓妊妥孝孜孚孛完宋宏尬局屁尿 +0xa7c0: 尾岐岑岔岌巫希序庇床廷弄弟彤形彷役忘忌志忍忱快忸忪戒我抄抗抖技扶 +0xa7e0: 抉扭把扼找批扳抒扯折扮投抓抑抆改攻攸旱更束李杏材村杜杖杞杉杆杠 +0xa840: 杓杗步每求汞沙沁沈沉沅沛汪決沐汰沌汨沖沒汽沃汲汾汴沆汶沍沔沘沂灶 +0xa860: 灼災灸牢牡牠狄狂玖甬甫男甸皂盯矣私秀禿究系罕肖肓肝肘肛肚育良芒 +0xa8a0: 芋芍見角言谷豆豕貝赤走足身車辛辰迂迆迅迄巡邑邢邪邦那酉釆里防阮 +0xa8c0: 阱阪阬並乖乳事些亞享京佯依侍佳使佬供例來侃佰併侈佩佻侖佾侏侑佺兔 +0xa8e0: 兒兕兩具其典冽函刻券刷刺到刮制剁劾劻卒協卓卑卦卷卸卹取叔受味呵 +0xa940: 咖呸咕咀呻呷咄咒咆呼咐呱呶和咚呢周咋命咎固垃坷坪坩坡坦坤坼夜奉奇 +0xa960: 奈奄奔妾妻委妹妮姑姆姐姍始姓姊妯妳姒姅孟孤季宗定官宜宙宛尚屈居 +0xa9a0: 屆岷岡岸岩岫岱岳帘帚帖帕帛帑幸庚店府底庖延弦弧弩往征彿彼忝忠忽 +0xa9c0: 念忿怏怔怯怵怖怪怕怡性怩怫怛或戕房戾所承拉拌拄抿拂抹拒招披拓拔拋 +0xa9e0: 拈抨抽押拐拙拇拍抵拚抱拘拖拗拆抬拎放斧於旺昔易昌昆昂明昀昏昕昊 +0xaa40: 昇服朋杭枋枕東果杳杷枇枝林杯杰板枉松析杵枚枓杼杪杲欣武歧歿氓氛泣 +0xaa60: 注泳沱泌泥河沽沾沼波沫法泓沸泄油況沮泗泅泱沿治泡泛泊沬泯泜泖泠 +0xaaa0: 炕炎炒炊炙爬爭爸版牧物狀狎狙狗狐玩玨玟玫玥甽疝疙疚的盂盲直知矽 +0xaac0: 社祀祁秉秈空穹竺糾罔羌羋者肺肥肢肱股肫肩肴肪肯臥臾舍芳芝芙芭芽芟 +0xaae0: 芹花芬芥芯芸芣芰芾芷虎虱初表軋迎返近邵邸邱邶采金長門阜陀阿阻附 +0xab40: 陂隹雨青非亟亭亮信侵侯便俠俑俏保促侶俘俟俊俗侮俐俄係俚俎俞侷兗冒 +0xab60: 冑冠剎剃削前剌剋則勇勉勃勁匍南卻厚叛咬哀咨哎哉咸咦咳哇哂咽咪品 +0xaba0: 哄哈咯咫咱咻咩咧咿囿垂型垠垣垢城垮垓奕契奏奎奐姜姘姿姣姨娃姥姪 +0xabc0: 姚姦威姻孩宣宦室客宥封屎屏屍屋峙峒巷帝帥帟幽庠度建弈弭彥很待徊律 +0xabe0: 徇後徉怒思怠急怎怨恍恰恨恢恆恃恬恫恪恤扁拜挖按拼拭持拮拽指拱拷 +0xac40: 拯括拾拴挑挂政故斫施既春昭映昧是星昨昱昤曷柿染柱柔某柬架枯柵柩柯 +0xac60: 柄柑枴柚查枸柏柞柳枰柙柢柝柒歪殃殆段毒毗氟泉洋洲洪流津洌洱洞洗 +0xaca0: 活洽派洶洛泵洹洧洸洩洮洵洎洫炫為炳炬炯炭炸炮炤爰牲牯牴狩狠狡玷 +0xacc0: 珊玻玲珍珀玳甚甭畏界畎畋疫疤疥疢疣癸皆皇皈盈盆盃盅省盹相眉看盾盼 +0xace0: 眇矜砂研砌砍祆祉祈祇禹禺科秒秋穿突竿竽籽紂紅紀紉紇約紆缸美羿耄 +0xad40: 耐耍耑耶胖胥胚胃胄背胡胛胎胞胤胝致舢苧范茅苣苛苦茄若茂茉苒苗英茁 +0xad60: 苜苔苑苞苓苟苯茆虐虹虻虺衍衫要觔計訂訃貞負赴赳趴軍軌述迦迢迪迥 +0xada0: 迭迫迤迨郊郎郁郃酋酊重閂限陋陌降面革韋韭音頁風飛食首香乘亳倌倍 +0xadc0: 倣俯倦倥俸倩倖倆值借倚倒們俺倀倔倨俱倡個候倘俳修倭倪俾倫倉兼冤冥 +0xade0: 冢凍凌准凋剖剜剔剛剝匪卿原厝叟哨唐唁唷哼哥哲唆哺唔哩哭員唉哮哪 +0xae40: 哦唧唇哽唏圃圄埂埔埋埃堉夏套奘奚娑娘娜娟娛娓姬娠娣娩娥娌娉孫屘宰 +0xae60: 害家宴宮宵容宸射屑展屐峭峽峻峪峨峰島崁峴差席師庫庭座弱徒徑徐恙 +0xaea0: 恣恥恐恕恭恩息悄悟悚悍悔悌悅悖扇拳挈拿捎挾振捕捂捆捏捉挺捐挽挪 +0xaec0: 挫挨捍捌效敉料旁旅時晉晏晃晒晌晅晁書朔朕朗校核案框桓根桂桔栩梳栗 +0xaee0: 桌桑栽柴桐桀格桃株桅栓栘桁殊殉殷氣氧氨氦氤泰浪涕消涇浦浸海浙涓 +0xaf40: 浬涉浮浚浴浩涌涊浹涅浥涔烊烘烤烙烈烏爹特狼狹狽狸狷玆班琉珮珠珪珞 +0xaf60: 畔畝畜畚留疾病症疲疳疽疼疹痂疸皋皰益盍盎眩真眠眨矩砰砧砸砝破砷 +0xafa0: 砥砭砠砟砲祕祐祠祟祖神祝祗祚秤秣秧租秦秩秘窄窈站笆笑粉紡紗紋紊 +0xafc0: 素索純紐紕級紜納紙紛缺罟羔翅翁耆耘耕耙耗耽耿胱脂胰脅胭胴脆胸胳脈 +0xafe0: 能脊胼胯臭臬舀舐航舫舨般芻茫荒荔荊茸荐草茵茴荏茲茹茶茗荀茱茨荃 +0xb040: 虔蚊蚪蚓蚤蚩蚌蚣蚜衰衷袁袂衽衹記訐討訌訕訊託訓訖訏訑豈豺豹財貢起 +0xb060: 躬軒軔軏辱送逆迷退迺迴逃追逅迸邕郡郝郢酒配酌釘針釗釜釙閃院陣陡 +0xb0a0: 陛陝除陘陞隻飢馬骨高鬥鬲鬼乾偺偽停假偃偌做偉健偶偎偕偵側偷偏倏 +0xb0c0: 偯偭兜冕凰剪副勒務勘動匐匏匙匿區匾參曼商啪啦啄啞啡啃啊唱啖問啕唯 +0xb0e0: 啤唸售啜唬啣唳啁啗圈國圉域堅堊堆埠埤基堂堵執培夠奢娶婁婉婦婪婀 +0xb140: 娼婢婚婆婊孰寇寅寄寂宿密尉專將屠屜屝崇崆崎崛崖崢崑崩崔崙崤崧崗巢 +0xb160: 常帶帳帷康庸庶庵庾張強彗彬彩彫得徙從徘御徠徜恿患悉悠您惋悴惦悽 +0xb1a0: 情悻悵惜悼惘惕惆惟悸惚惇戚戛扈掠控捲掖探接捷捧掘措捱掩掉掃掛捫 +0xb1c0: 推掄授掙採掬排掏掀捻捩捨捺敝敖救教敗啟敏敘敕敔斜斛斬族旋旌旎晝晚 +0xb1e0: 晤晨晦晞曹勗望梁梯梢梓梵桿桶梱梧梗械梃棄梭梆梅梔條梨梟梡梂欲殺 +0xb240: 毫毬氫涎涼淳淙液淡淌淤添淺清淇淋涯淑涮淞淹涸混淵淅淒渚涵淚淫淘淪 +0xb260: 深淮淨淆淄涪淬涿淦烹焉焊烽烯爽牽犁猜猛猖猓猙率琅琊球理現琍瓠瓶 +0xb2a0: 瓷甜產略畦畢異疏痔痕疵痊痍皎盔盒盛眷眾眼眶眸眺硫硃硎祥票祭移窒 +0xb2c0: 窕笠笨笛第符笙笞笮粒粗粕絆絃統紮紹紼絀細紳組累終紲紱缽羞羚翌翎習 +0xb2e0: 耜聊聆脯脖脣脫脩脰脤舂舵舷舶船莎莞莘荸莢莖莽莫莒莊莓莉莠荷荻荼 +0xb340: 莆莧處彪蛇蛀蚶蛄蚵蛆蛋蚱蚯蛉術袞袈被袒袖袍袋覓規訪訝訣訥許設訟訛 +0xb360: 訢豉豚販責貫貨貪貧赧赦趾趺軛軟這逍通逗連速逝逐逕逞造透逢逖逛途 +0xb3a0: 部郭都酗野釵釦釣釧釭釩閉陪陵陳陸陰陴陶陷陬雀雪雩章竟頂頃魚鳥鹵 +0xb3c0: 鹿麥麻傢傍傅備傑傀傖傘傚最凱割剴創剩勞勝勛博厥啻喀喧啼喊喝喘喂喜 +0xb3e0: 喪喔喇喋喃喳單喟唾喲喚喻喬喱啾喉喫喙圍堯堪場堤堰報堡堝堠壹壺奠 +0xb440: 婷媚婿媒媛媧孳孱寒富寓寐尊尋就嵌嵐崴嵇巽幅帽幀幃幾廊廁廂廄弼彭復 +0xb460: 循徨惑惡悲悶惠愜愣惺愕惰惻惴慨惱愎惶愉愀愒戟扉掣掌描揀揩揉揆揍 +0xb4a0: 插揣提握揖揭揮捶援揪換摒揚揹敞敦敢散斑斐斯普晰晴晶景暑智晾晷曾 +0xb4c0: 替期朝棺棕棠棘棗椅棟棵森棧棹棒棲棣棋棍植椒椎棉棚楮棻款欺欽殘殖殼 +0xb4e0: 毯氮氯氬港游湔渡渲湧湊渠渥渣減湛湘渤湖湮渭渦湯渴湍渺測湃渝渾滋 +0xb540: 溉渙湎湣湄湲湩湟焙焚焦焰無然煮焜牌犄犀猶猥猴猩琺琪琳琢琥琵琶琴琯 +0xb560: 琛琦琨甥甦畫番痢痛痣痙痘痞痠登發皖皓皴盜睏短硝硬硯稍稈程稅稀窘 +0xb5a0: 窗窖童竣等策筆筐筒答筍筋筏筑粟粥絞結絨絕紫絮絲絡給絢絰絳善翔翕 +0xb5c0: 耋聒肅腕腔腋腑腎脹腆脾腌腓腴舒舜菩萃菸萍菠菅萋菁華菱菴著萊菰萌菌 +0xb5e0: 菽菲菊萸萎萄菜萇菔菟虛蛟蛙蛭蛔蛛蛤蛐蛞街裁裂袱覃視註詠評詞証詁 +0xb640: 詔詛詐詆訴診訶詖象貂貯貼貳貽賁費賀貴買貶貿貸越超趁跎距跋跚跑跌跛 +0xb660: 跆軻軸軼辜逮逵週逸進逶鄂郵鄉郾酣酥量鈔鈕鈣鈉鈞鈍鈐鈇鈑閔閏開閑 +0xb6a0: 間閒閎隊階隋陽隅隆隍陲隄雁雅雄集雇雯雲韌項順須飧飪飯飩飲飭馮馭 +0xb6c0: 黃黍黑亂傭債傲傳僅傾催傷傻傯僇剿剷剽募勦勤勢勣匯嗟嗨嗓嗦嗎嗜嗇嗑 +0xb6e0: 嗣嗤嗯嗚嗡嗅嗆嗥嗉園圓塞塑塘塗塚塔填塌塭塊塢塒塋奧嫁嫉嫌媾媽媼 +0xb740: 媳嫂媲嵩嵯幌幹廉廈弒彙徬微愚意慈感想愛惹愁愈慎慌慄慍愾愴愧愍愆愷 +0xb760: 戡戢搓搾搞搪搭搽搬搏搜搔損搶搖搗搆敬斟新暗暉暇暈暖暄暘暍會榔業 +0xb7a0: 楚楷楠楔極椰概楊楨楫楞楓楹榆楝楣楛歇歲毀殿毓毽溢溯滓溶滂源溝滇 +0xb7c0: 滅溥溘溼溺溫滑準溜滄滔溪溧溴煎煙煩煤煉照煜煬煦煌煥煞煆煨煖爺牒猷 +0xb7e0: 獅猿猾瑯瑚瑕瑟瑞瑁琿瑙瑛瑜當畸瘀痰瘁痲痱痺痿痴痳盞盟睛睫睦睞督 +0xb840: 睹睪睬睜睥睨睢矮碎碰碗碘碌碉硼碑碓硿祺祿禁萬禽稜稚稠稔稟稞窟窠筷 +0xb860: 節筠筮筧粱粳粵經絹綑綁綏絛置罩罪署義羨群聖聘肆肄腱腰腸腥腮腳腫 +0xb8a0: 腹腺腦舅艇蒂葷落萱葵葦葫葉葬葛萼萵葡董葩葭葆虞虜號蛹蜓蜈蜇蜀蛾 +0xb8c0: 蛻蜂蜃蜆蜊衙裟裔裙補裘裝裡裊裕裒覜解詫該詳試詩詰誇詼詣誠話誅詭詢 +0xb8e0: 詮詬詹詻訾詨豢貊貉賊資賈賄貲賃賂賅跡跟跨路跳跺跪跤跦躲較載軾輊 +0xb940: 辟農運遊道遂達逼違遐遇遏過遍遑逾遁鄒鄗酬酪酩釉鈷鉗鈸鈽鉀鈾鉛鉋鉤 +0xb960: 鉑鈴鉉鉍鉅鈹鈿鉚閘隘隔隕雍雋雉雊雷電雹零靖靴靶預頑頓頊頒頌飼飴 +0xb9a0: 飽飾馳馱馴髡鳩麂鼎鼓鼠僧僮僥僖僭僚僕像僑僱僎僩兢凳劃劂匱厭嗾嘀 +0xb9c0: 嘛嘗嗽嘔嘆嘉嘍嘎嗷嘖嘟嘈嘐嗶團圖塵塾境墓墊塹墅塽壽夥夢夤奪奩嫡嫦 +0xb9e0: 嫩嫗嫖嫘嫣孵寞寧寡寥實寨寢寤察對屢嶄嶇幛幣幕幗幔廓廖弊彆彰徹慇 +0xba40: 愿態慷慢慣慟慚慘慵截撇摘摔撤摸摟摺摑摧搴摭摻敲斡旗旖暢暨暝榜榨榕 +0xba60: 槁榮槓構榛榷榻榫榴槐槍榭槌榦槃榣歉歌氳漳演滾漓滴漩漾漠漬漏漂漢 +0xbaa0: 滿滯漆漱漸漲漣漕漫漯澈漪滬漁滲滌滷熔熙煽熊熄熒爾犒犖獄獐瑤瑣瑪 +0xbac0: 瑰瑭甄疑瘧瘍瘋瘉瘓盡監瞄睽睿睡磁碟碧碳碩碣禎福禍種稱窪窩竭端管箕 +0xbae0: 箋筵算箝箔箏箸箇箄粹粽精綻綰綜綽綾綠緊綴網綱綺綢綿綵綸維緒緇綬 +0xbb40: 罰翠翡翟聞聚肇腐膀膏膈膊腿膂臧臺與舔舞艋蓉蒿蓆蓄蒙蒞蒲蒜蓋蒸蓀蓓 +0xbb60: 蒐蒼蓑蓊蜿蜜蜻蜢蜥蜴蜘蝕蜷蜩裳褂裴裹裸製裨褚裯誦誌語誣認誡誓誤 +0xbba0: 說誥誨誘誑誚誧豪貍貌賓賑賒赫趙趕跼輔輒輕輓辣遠遘遜遣遙遞遢遝遛 +0xbbc0: 鄙鄘鄞酵酸酷酴鉸銀銅銘銖鉻銓銜銨鉼銑閡閨閩閣閥閤隙障際雌雒需靼鞅 +0xbbe0: 韶頗領颯颱餃餅餌餉駁骯骰髦魁魂鳴鳶鳳麼鼻齊億儀僻僵價儂儈儉儅凜 +0xbc40: 劇劈劉劍劊勰厲嘮嘻嘹嘲嘿嘴嘩噓噎噗噴嘶嘯嘰墀墟增墳墜墮墩墦奭嬉嫻 +0xbc60: 嬋嫵嬌嬈寮寬審寫層履嶝嶔幢幟幡廢廚廟廝廣廠彈影德徵慶慧慮慝慕憂 +0xbca0: 慼慰慫慾憧憐憫憎憬憚憤憔憮戮摩摯摹撞撲撈撐撰撥撓撕撩撒撮播撫撚 +0xbcc0: 撬撙撢撳敵敷數暮暫暴暱樣樟槨樁樞標槽模樓樊槳樂樅槭樑歐歎殤毅毆漿 +0xbce0: 潼澄潑潦潔澆潭潛潸潮澎潺潰潤澗潘滕潯潠潟熟熬熱熨牖犛獎獗瑩璋璃 +0xbd40: 瑾璀畿瘠瘩瘟瘤瘦瘡瘢皚皺盤瞎瞇瞌瞑瞋磋磅確磊碾磕碼磐稿稼穀稽稷稻 +0xbd60: 窯窮箭箱範箴篆篇篁箠篌糊締練緯緻緘緬緝編緣線緞緩綞緙緲緹罵罷羯 +0xbda0: 翩耦膛膜膝膠膚膘蔗蔽蔚蓮蔬蔭蔓蔑蔣蔡蔔蓬蔥蓿蔆螂蝴蝶蝠蝦蝸蝨蝙 +0xbdc0: 蝗蝌蝓衛衝褐複褒褓褕褊誼諒談諄誕請諸課諉諂調誰論諍誶誹諛豌豎豬賠 +0xbde0: 賞賦賤賬賭賢賣賜質賡赭趟趣踫踐踝踢踏踩踟踡踞躺輝輛輟輩輦輪輜輞 +0xbe40: 輥適遮遨遭遷鄰鄭鄧鄱醇醉醋醃鋅銻銷鋪銬鋤鋁銳銼鋒鋇鋰銲閭閱霄霆震 +0xbe60: 霉靠鞍鞋鞏頡頫頜颳養餓餒餘駝駐駟駛駑駕駒駙骷髮髯鬧魅魄魷魯鴆鴉 +0xbea0: 鴃麩麾黎墨齒儒儘儔儐儕冀冪凝劑劓勳噙噫噹噩噤噸噪器噥噱噯噬噢噶 +0xbec0: 壁墾壇壅奮嬝嬴學寰導彊憲憑憩憊懍憶憾懊懈戰擅擁擋撻撼據擄擇擂操撿 +0xbee0: 擒擔撾整曆曉暹曄曇暸樽樸樺橙橫橘樹橄橢橡橋橇樵機橈歙歷氅濂澱澡 +0xbf40: 濃澤濁澧澳激澹澶澦澠澴熾燉燐燒燈燕熹燎燙燜燃燄獨璜璣璘璟璞瓢甌甍 +0xbf60: 瘴瘸瘺盧盥瞠瞞瞟瞥磨磚磬磧禦積穎穆穌穋窺篙簑築篤篛篡篩篦糕糖縊 +0xbfa0: 縑縈縛縣縞縝縉縐罹羲翰翱翮耨膳膩膨臻興艘艙蕊蕙蕈蕨蕩蕃蕉蕭蕪蕞 +0xbfc0: 螃螟螞螢融衡褪褲褥褫褡親覦諦諺諫諱謀諜諧諮諾謁謂諷諭諳諶諼豫豭貓 +0xbfe0: 賴蹄踱踴蹂踹踵輻輯輸輳辨辦遵遴選遲遼遺鄴醒錠錶鋸錳錯錢鋼錫錄錚 +0xc040: 錐錦錡錕錮錙閻隧隨險雕霎霑霖霍霓霏靛靜靦鞘頰頸頻頷頭頹頤餐館餞餛 +0xc060: 餡餚駭駢駱骸骼髻髭鬨鮑鴕鴣鴦鴨鴒鴛默黔龍龜優償儡儲勵嚎嚀嚐嚅嚇 +0xc0a0: 嚏壕壓壑壎嬰嬪嬤孺尷屨嶼嶺嶽嶸幫彌徽應懂懇懦懋戲戴擎擊擘擠擰擦 +0xc0c0: 擬擱擢擭斂斃曙曖檀檔檄檢檜櫛檣橾檗檐檠歜殮毚氈濘濱濟濠濛濤濫濯澀 +0xc0e0: 濬濡濩濕濮濰燧營燮燦燥燭燬燴燠爵牆獰獲璩環璦璨癆療癌盪瞳瞪瞰瞬 +0xc140: 瞧瞭矯磷磺磴磯礁禧禪穗窿簇簍篾篷簌篠糠糜糞糢糟糙糝縮績繆縷縲繃縫 +0xc160: 總縱繅繁縴縹繈縵縿縯罄翳翼聱聲聰聯聳臆臃膺臂臀膿膽臉膾臨舉艱薪 +0xc1a0: 薄蕾薜薑薔薯薛薇薨薊虧蟀蟑螳蟒蟆螫螻螺蟈蟋褻褶襄褸褽覬謎謗謙講 +0xc1c0: 謊謠謝謄謐豁谿豳賺賽購賸賻趨蹉蹋蹈蹊轄輾轂轅輿避遽還邁邂邀鄹醣醞 +0xc1e0: 醜鍍鎂錨鍵鍊鍥鍋錘鍾鍬鍛鍰鍚鍔闊闋闌闈闆隱隸雖霜霞鞠韓顆颶餵騁 +0xc240: 駿鮮鮫鮪鮭鴻鴿麋黏點黜黝黛鼾齋叢嚕嚮壙壘嬸彞懣戳擴擲擾攆擺擻擷斷 +0xc260: 曜朦檳檬櫃檻檸櫂檮檯歟歸殯瀉瀋濾瀆濺瀑瀏燻燼燾燸獷獵璧璿甕癖癘 +0xc2a0: 癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶 +0xc2c0: 臍臏舊藏薩藍藐藉薰薺薹薦蟯蟬蟲蟠覆覲觴謨謹謬謫豐贅蹙蹣蹦蹤蹟蹕軀 +0xc2e0: 轉轍邇邃邈醫醬釐鎔鎊鎖鎢鎳鎮鎬鎰鎘鎚鎗闔闖闐闕離雜雙雛雞霤鞣鞦 +0xc340: 鞭韹額顏題顎顓颺餾餿餽餮馥騎髁鬃鬆魏魎魍鯊鯉鯽鯈鯀鵑鵝鵠黠鼕鼬儳 +0xc360: 嚥壞壟壢寵龐廬懲懷懶懵攀攏曠曝櫥櫝櫚櫓瀛瀟瀨瀚瀝瀕瀘爆爍牘犢獸 +0xc3a0: 獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩 +0xc3c0: 藝藪藕藤藥藷蟻蠅蠍蟹蟾襠襟襖襞譁譜識證譚譎譏譆譙贈贊蹼蹲躇蹶蹬蹺 +0xc3e0: 蹴轔轎辭邊邋醱醮鏡鏑鏟鏃鏈鏜鏝鏖鏢鏍鏘鏤鏗鏨關隴難霪霧靡韜韻類 +0xc440: 願顛颼饅饉騖騙鬍鯨鯧鯖鯛鶉鵡鵲鵪鵬麒麗麓麴勸嚨嚷嚶嚴嚼壤孀孃孽寶 +0xc460: 巉懸懺攘攔攙曦朧櫬瀾瀰瀲爐獻瓏癢癥礦礪礬礫竇競籌籃籍糯糰辮繽繼 +0xc4a0: 纂罌耀臚艦藻藹蘑藺蘆蘋蘇蘊蠔蠕襤覺觸議譬警譯譟譫贏贍躉躁躅躂醴 +0xc4c0: 釋鐘鐃鏽闡霰飄饒饑馨騫騰騷騵鰓鰍鹹麵黨鼯齟齣齡儷儸囁囀囂夔屬巍懼 +0xc4e0: 懾攝攜斕曩櫻欄櫺殲灌爛犧瓖瓔癩矓籐纏續羼蘗蘭蘚蠣蠢蠡蠟襪襬覽譴 +0xc540: 護譽贓躊躍躋轟辯醺鐮鐳鐵鐺鐸鐲鐫闢霸霹露響顧顥饗驅驃驀騾髏魔魑鰭 +0xc560: 鰥鶯鶴鷂鶸麝黯鼙齜齦齧儼儻囈囊囉孿巔巒彎懿攤權歡灑灘玀瓤疊癮癬 +0xc5a0: 禳籠籟聾聽臟襲襯觼讀贖贗躑躓轡酈鑄鑑鑒霽霾韃韁顫饕驕驍髒鬚鱉鰱 +0xc5c0: 鰾鰻鷓鷗鼴齬齪龔囌巖戀攣攫攪曬欐瓚竊籤籣籥纓纖纔臢蘸蘿蠱變邐邏鑣 +0xc5e0: 鑠鑤靨顯饜驚驛驗髓體髑鱔鱗鱖鷥麟黴囑壩攬灞癱癲矗罐羈蠶蠹衢讓讒 +0xc640: 讖艷贛釀鑪靂靈靄韆顰驟鬢魘鱟鷹鷺鹼鹽鼇齷齲廳欖灣籬籮蠻觀躡釁鑲鑰 +0xc660: 顱饞髖鬣黌灤矚讚鑷韉驢驥纜讜躪釅鑽鑾鑼鱷鱸黷豔鑿鸚爨驪鬱鸛鸞籲 +0xc6a0: ①②③④⑤⑥⑦⑧⑨⑩⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽ⅰⅱⅲⅳⅴⅵⅶⅷⅸⅹ丶 +0xc6c0: 丿亅亠冂冖冫勹匸卩厶夊宀巛⼳广⼵彐彡攴⽆疒⽨辵⾪¨ˆヽヾゝゞ +0xc6e0: 々〆〇ー[]✽ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじ +0xc740: すずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへ +0xc760: べぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんァアィイ +0xc7a0: ゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッ +0xc7c0: ツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャ +0xc7e0: ヤュユョヨラリルレロヮワヰヱヲンヴヵヶАБВГДЕЁЖЗИЙК +0xc840: ЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзий +0xc860: клмнопрстуфхцчшщъыьэюя⇧↸↹乚刂 +0xc8a0: 冈 +0xc8c0: ¬¦'"㈱№℡゛゜⺀⺄⺆⺇⺈⺊⺌⺍⺕⺜ +0xc8e0: ⺝⺥⺧⺪⺬⺮⺶⺼⺾⻆⻊⻌⻍⻏⻖⻗⻞⻣ ʃɐɛɔɵœøŋʊɪ +0xc940: 乂乜凵匚厂万丌乇亍囗兀屮彳丏冇与丮亓仂仉仈冘勼卬厹圠夃夬尐巿旡殳 +0xc960: 毌气爿丱丼仨仜仩仡仝仚刌匜卌圢圣夗夯宁宄尒尻屴屳帄庀庂忉戉扐氕 +0xc9a0: 氶汃氿氻犮犰玊禸肊阞伎优伬仵伔仱伀价伈伝伂伅伢伓伄仴伒冱刓刉刐 +0xc9c0: 劦匢匟卍厊吇囡囟圮圪圴夼妀奼妅奻奾奷奿孖尕尥屼屺屻屾巟幵庄异弚彴 +0xc9e0: 忕忔忏扜扞扤扡扦扢扙扠扚扥旯旮朾朹朸朻机朿朼朳氘汆汒汜汏汊汔汋 +0xca40: 汌灱牞犴犵玎甪癿穵网艸艼芀艽艿虍襾邙邗邘邛邔阢阤阠阣佖伻佢佉体佤 +0xca60: 伾佧佒佟佁佘伭伳伿佡冏冹刜刞刡劭劮匉卣卲厎厏吰吷吪呔呅吙吜吥吘 +0xcaa0: 吽呏呁吨吤呇囮囧囥坁坅坌坉坋坒夆奀妦妘妠妗妎妢妐妏妧妡宎宒尨尪 +0xcac0: 岍岏岈岋岉岒岊岆岓岕巠帊帎庋庉庌庈庍弅弝彸彶忒忑忐忭忨忮忳忡忤忣 +0xcae0: 忺忯忷忻怀忴戺抃抌抎抏抔抇扱扻扺扰抁抈扷扽扲扴攷旰旴旳旲旵杅杇 +0xcb40: 杙杕杌杈杝杍杚杋毐氙氚汸汧汫沄沋沏汱汯汩沚汭沇沕沜汦汳汥汻沎灴灺 +0xcb60: 牣犿犽狃狆狁犺狅玕玗玓玔玒町甹疔疕皁礽耴肕肙肐肒肜芐芏芅芎芑芓 +0xcba0: 芊芃芄豸迉辿邟邡邥邞邧邠阰阨阯阭丳侘佼侅佽侀侇佶佴侉侄佷佌侗佪 +0xcbc0: 侚佹侁佸侐侜侔侞侒侂侕佫佮冞冼冾刵刲刳剆刱劼匊匋匼厒厔咇呿咁咑咂 +0xcbe0: 咈呫呺呾呥呬呴呦咍呯呡呠咘呣呧呤囷囹坯坲坭坫坱坰坶垀坵坻坳坴坢 +0xcc40: 坨坽夌奅妵妺姏姎妲姌姁妶妼姃姖妱妽姀姈妴姇孢孥宓宕屄屇岮岤岠岵岯 +0xcc60: 岨岬岟岣岭岢岪岧岝岥岶岰岦帗帔帙弨弢弣弤彔徂彾彽忞忥怭怦怙怲怋 +0xcca0: 怴怊怗怳怚怞怬怢怍怐怮怓怑怌怉怜戔戽抭抴拑抾抪抶拊抮抳抯抻抩抰 +0xccc0: 抸攽斨斻昉旼昄昒昈旻昃昋昍昅旽昑昐曶朊枅杬枎枒杶杻枘枆构杴枍枌杺 +0xcce0: 枟枑枙枃杽极杸杹枔欥殀歾毞氝沓泬泫泮泙沶泔沭泧沷泐泂沺泃泆泭泲 +0xcd40: 泒泝沴沊沝沀泞泀洰泍泇沰泹泏泩泑炔炘炅炓炆炄炑炖炂炚炃牪狖狋狘狉 +0xcd60: 狜狒狔狚狌狑玤玡玭玦玢玠玬玝瓝瓨甿畀甾疌疘皯盳盱盰盵矸矼矹矻矺 +0xcda0: 矷祂礿秅穸穻竻籵糽耵肏肮肣肸肵肭舠芠苀芫芚芘芛芵芧芮芼芞芺芴芨 +0xcdc0: 芡芩苂芤苃芶芢虰虯虭虮豖迒迋迓迍迖迕迗邲邴邯邳邰阹阽阼阺陃俍俅俓 +0xcde0: 侲俉俋俁俔俜俙侻侳俛俇俖侺俀侹俬剄剉勀勂匽卼厗厖厙厘咺咡咭咥哏 +0xce40: 哃茍咷咮哖咶哅哆咠呰咼咢咾呲哞咰垵垞垟垤垌垗垝垛垔垘垏垙垥垚垕壴 +0xce60: 复奓姡姞姮娀姱姝姺姽姼姶姤姲姷姛姩姳姵姠姾姴姭宨屌峐峘峌峗峋峛 +0xcea0: 峞峚峉峇峊峖峓峔峏峈峆峎峟峸巹帡帢帣帠帤庰庤庢庛庣庥弇弮彖徆怷 +0xcec0: 怹恔恲恞恅恓恇恉恛恌恀恂恟怤恄恘恦恮扂扃拏挍挋拵挎挃拫拹挏挌拸拶 +0xcee0: 挀挓挔拺挕拻拰敁敃斪斿昶昡昲昵昜昦昢昳昫昺昝昴昹昮朏朐柁柲柈枺 +0xcf40: 柜枻柸柘柀枷柅柫柤柟枵柍枳柷柶柮柣柂枹柎柧柰枲柼柆柭柌枮柦柛柺柉 +0xcf60: 柊柃柪柋欨殂殄殶毖毘毠氠氡洨洴洭洟洼洿洒洊泚洳洄洙洺洚洑洀洝浂 +0xcfa0: 洁洘洷洃洏浀洇洠洬洈洢洉洐炷炟炾炱炰炡炴炵炩牁牉牊牬牰牳牮狊狤 +0xcfc0: 狨狫狟狪狦狣玅珌珂珈珅玹玶玵玴珫玿珇玾珃珆玸珋瓬瓮甮畇畈疧疪癹盄 +0xcfe0: 眈眃眄眅眊盷盻盺矧矨砆砑砒砅砐砏砎砉砃砓祊祌祋祅祄秕种秏秖秎窀 +0xd040: 穾竑笀笁籺籸籹籿粀粁紃紈紁罘羑羍羾耇耎耏耔耷胘胇胠胑胈胂胐胅胣胙 +0xd060: 胜胊胕胉胏胗胦胍臿舡芔苙苾苹茇苨茀苕茺苫苖苴苬苡苲苵茌苻苶苰苪 +0xd0a0: 苤苠苺苳苭虷虴虼虳衁衎衧衪衩觓訄訇赲迣迡迮迠郱邽邿郕郅邾郇郋郈 +0xd0c0: 釔釓陔陏陑陓陊陎倞倅倇倓倢倰倛俵俴倳倷倬俶俷倗倜倠倧倵倯倱倎党冔 +0xd0e0: 冓凊凄凅凈凎剡剚剒剞剟剕剢勍匎厞唦哢唗唒哧哳哤唚哿唄唈哫唑唅哱 +0xd140: 唊哻哷哸哠唎唃唋圁圂埌堲埕埒垺埆垽垼垸垶垿埇埐垹埁夎奊娙娖娭娮娕 +0xd160: 娏娗娊娞娳孬宧宭宬尃屖屔峬峿峮峱峷崀峹帩帨庨庮庪庬弳弰彧恝恚恧 +0xd1a0: 恁悢悈悀悒悁悝悃悕悛悗悇悜悎戙扆拲挐捖挬捄捅挶捃揤挹捋捊挼挩捁 +0xd1c0: 挴捘捔捙挭捇挳捚捑挸捗捀捈敊敆旆旃旄旂晊晟晇晑朒朓栟栚桉栲栳栻桋 +0xd1e0: 桏栖栱栜栵栫栭栯桎桄栴栝栒栔栦栨栮桍栺栥栠欬欯欭欱欴歭肂殈毦毤 +0xd240: 毨毣毢毧氥浺浣浤浶洍浡涒浘浢浭浯涑涍淯浿涆浞浧浠涗浰浼浟涂涘洯浨 +0xd260: 涋浾涀涄洖涃浻浽浵涐烜烓烑烝烋缹烢烗烒烞烠烔烍烅烆烇烚烎烡牂牸 +0xd2a0: 牷牶猀狺狴狾狶狳狻猁珓珙珥珖玼珧珣珩珜珒珛珔珝珚珗珘珨瓞瓟瓴瓵 +0xd2c0: 甡畛畟疰痁疻痄痀疿疶疺皊盉眝眛眐眓眒眣眑眕眙眚眢眧砣砬砢砵砯砨砮 +0xd2e0: 砫砡砩砳砪砱祔祛祏祜祓祒祑秫秬秠秮秭秪秜秞秝窆窉窅窋窌窊窇竘笐 +0xd340: 笄笓笅笏笈笊笎笉笒粄粑粊粌粈粍粅紞紝紑紎紘紖紓紟紒紏紌罜罡罞罠罝 +0xd360: 罛羖羒翃翂翀耖耾耹胺胲胹胵脁胻脀舁舯舥茳茭荄茙荑茥荖茿荁茦茜茢 +0xd3a0: 荂荎茛茪茈茼荍茖茤茠茷茯茩荇荅荌荓茞茬荋茧荈虓虒蚢蚨蚖蚍蚑蚞蚇 +0xd3c0: 蚗蚆蚋蚚蚅蚥蚙蚡蚧蚕蚘蚎蚝蚐蚔衃衄衭衵衶衲袀衱衿衯袃衾衴衼訒豇豗 +0xd3e0: 豻貤貣赶赸趵趷趶軑軓迾迵适迿迻逄迼迶郖郠郙郚郣郟郥郘郛郗郜郤酐 +0xd440: 酎酏釕釢釚陜陟隼飣髟鬯乿偰偪偡偞偠偓偋偝偲偈偍偁偛偊偢倕偅偟偩偫 +0xd460: 偣偤偆偀偮偳偗偑凐剫剭剬剮勖勓匭厜啵啶唼啍啐唴唪啑啢唶唵唰啒啅 +0xd4a0: 唌唲啥啎唹啈唭唻啀啋圊圇埻堔埢埶埜埴堀埭埽堈埸堋埳埏堇埮埣埲埥 +0xd4c0: 埬埡堎埼堐埧堁堌埱埩埰堍堄奜婠婘婕婧婞娸娵婭婐婟婥婬婓婤婗婃婝婒 +0xd4e0: 婄婛婈媎娾婍娹婌婰婩婇婑婖婂婜孲孮寁寀屙崞崋崝崚崠崌崨崍崦崥崏 +0xd540: 崰崒崣崟崮帾帴庱庴庹庲庳弶弸徛徖徟悊悐悆悾悰悺惓惔惏惤惙惝惈悱惛 +0xd560: 悷惊悿惃惍惀挲捥掊掂捽掽掞掭掝掗掫掎捯掇掐据掯捵掜捭掮捼掤挻掟 +0xd5a0: 捸掅掁掑掍捰敓旍晥晡晛晙晜晢朘桹梇梐梜桭桮梮梫楖桯梣梬梩桵桴梲 +0xd5c0: 梏桷梒桼桫桲梪梀桱桾梛梖梋梠梉梤桸桻梑梌梊桽欶欳欷欸殑殏殍殎殌氪 +0xd5e0: 淀涫涴涳湴涬淩淢涷淶淔渀淈淠淟淖涾淥淜淝淛淴淊涽淭淰涺淕淂淏淉 +0xd640: 淐淲淓淽淗淍淣涻烺焍烷焗烴焌烰焄烳焐烼烿焆焓焀烸烶焋焂焎牾牻牼牿 +0xd660: 猝猗猇猑猘猊猈狿猏猞玈珶珸珵琄琁珽琇琀珺珼珿琌琋珴琈畤畣痎痒痏 +0xd6a0: 痋痌痑痐皏皉盓眹眯眭眱眲眴眳眽眥眻眵硈硒硉硍硊硌砦硅硐祤祧祩祪 +0xd6c0: 祣祫祡离秺秸秶秷窏窔窐笵筇笴笥笰笢笤笳笘笪笝笱笫笭笯笲笸笚笣粔粘 +0xd6e0: 粖粣紵紽紸紶紺絅紬紩絁絇紾紿絊紻紨罣羕羜羝羛翊翋翍翐翑翇翏翉耟 +0xd740: 耞耛聇聃聈脘脥脙脛脭脟脬脞脡脕脧脝脢舑舸舳舺舴舲艴莐莣莨莍荺荳莤 +0xd760: 荴莏莁莕莙荵莔莩荽莃莌莝莛莪莋荾莥莯莈莗莰荿莦莇莮荶莚虙虖蚿蚷 +0xd7a0: 蛂蛁蛅蚺蚰蛈蚹蚳蚸蛌蚴蚻蚼蛃蚽蚾衒袉袕袨袢袪袚袑袡袟袘袧袙袛袗 +0xd7c0: 袤袬袌袓袎覂觖觙觕訰訧訬訞谹谻豜豝豽貥赽赻赹趼跂趹趿跁軘軞軝軜軗 +0xd7e0: 軠軡逤逋逑逜逌逡郯郪郰郴郲郳郔郫郬郩酖酘酚酓酕釬釴釱釳釸釤釹釪 +0xd840: 釫釷釨釮镺閆閈陼陭陫陱陯隿靪頄飥馗傛傕傔傞傋傣傃傌傎傝偨傜傒傂傇 +0xd860: 兟凔匒匑厤厧喑喨喥喭啷噅喢喓喈喏喵喁喣喒喤啽喌喦啿喕喡喎圌堩堷 +0xd8a0: 堙堞堧堣堨埵塈堥堜堛堳堿堶堮堹堸堭堬堻奡媯媔媟婺媢媞婸媦婼媥媬 +0xd8c0: 媕媮娷媄媊媗媃媋媩婻婽媌媜媏媓媝寪寍寋寔寑寊寎尌尰崷嵃嵫嵁嵋崿崵 +0xd8e0: 嵑嵎嵕崳崺嵒崽崱嵙嵂崹嵉崸崼崲崶嵀嵅幄幁彘徦徥徫惉悹惌惢惎惄愔 +0xd940: 惲愊愖愅惵愓惸惼惾惁愃愘愝愐惿愄愋扊掔掱掰揎揥揨揯揃撝揳揊揠揶揕 +0xd960: 揲揵摡揟掾揝揜揄揘揓揂揇揌揋揈揰揗揙攲敧敪敤敜敨敥斌斝斞斮旐旒 +0xd9a0: 晼晬晻暀晱晹晪晲朁椌棓椄棜椪棬棪棱椏棖棷棫棤棶椓椐棳棡椇棌椈楰 +0xd9c0: 梴椑棯棆椔棸棐棽棼棨椋椊椗棎棈棝棞棦棴棑椆棔棩椕椥棇欹欻欿欼殔殗 +0xd9e0: 殙殕殽毰毲毳氰淼湆湇渟湉溈渼渽湅湢渫渿湁湝湳渜渳湋湀湑渻渃渮湞 +0xda40: 湨湜湡渱渨湠湱湫渹渢渰湓湥渧湸湤湷湕湹湒湦渵渶湚焠焞焯烻焮焱焣焥 +0xda60: 焢焲焟焨焺焛牋牚犈犉犆犅犋猒猋猰猢猱猳猧猲猭猦猣猵猌琮琬琰琫琖 +0xdaa0: 琚琡琭琱琤琣琝琩琠琲瓻甯畯畬痧痚痡痦痝痟痤痗皕皒盚睆睇睄睍睅睊 +0xdac0: 睎睋睌矞矬硠硤硥硜硭硱硪确硰硩硨硞硢祴祳祲祰稂稊稃稌稄窙竦竤筊笻 +0xdae0: 筄筈筌筎筀筘筅粢粞粨粡絘絯絣絓絖絧絪絏絭絜絫絒絔絩絑絟絎缾缿罥 +0xdb40: 罦羢羠羡翗聑聏聐胾胔腃腊腒腏腇脽腍脺臦臮臷臸臹舄舼舽舿艵茻菏菹萣 +0xdb60: 菀菨萒菧菤菼菶萐菆菈菫菣莿萁菝菥菘菿菡菋菎菖菵菉萉萏菞萑萆菂菳 +0xdba0: 菕菺菇菑菪萓菃菬菮菄菻菗菢萛菛菾蛘蛢蛦蛓蛣蛚蛪蛝蛫蛜蛬蛩蛗蛨蛑 +0xdbc0: 衈衖衕袺裗袹袸裀袾袶袼袷袽袲褁裉覕覘覗觝觚觛詎詍訹詙詀詗詘詄詅詒 +0xdbe0: 詈詑詊詌詏豟貁貀貺貾貰貹貵趄趀趉跘跓跍跇跖跜跏跕跙跈跗跅軯軷軺 +0xdc40: 軹軦軮軥軵軧軨軶軫軱軬軴軩逭逴逯鄆鄬鄄郿郼鄈郹郻鄁鄀鄇鄅鄃酡酤酟 +0xdc60: 酢酠鈁鈊鈥鈃鈚鈦鈏鈌鈀鈒釿釽鈆鈄鈧鈂鈜鈤鈙鈗鈅鈖镻閍閌閐隇陾隈 +0xdca0: 隉隃隀雂雈雃雱雰靬靰靮頇颩飫鳦黹亃亄亶傽傿僆傮僄僊傴僈僂傰僁傺 +0xdcc0: 傱僋僉傶傸凗剺剸剻剼嗃嗛嗌嗐嗋嗊嗝嗀嗔嗄嗩喿嗒喍嗏嗕嗢嗖嗈嗲嗍嗙 +0xdce0: 嗂圔塓塨塤塏塍塉塯塕塎塝塙塥塛堽塣塱壼嫇嫄嫋媺媸媱媵媰媿嫈媻嫆 +0xdd40: 媷嫀嫊媴媶嫍媹媐寖寘寙尟尳嵱嵣嵊嵥嵲嵬嵞嵨嵧嵢巰幏幎幊幍幋廅廌廆 +0xdd60: 廋廇彀徯徭惷慉慊愫慅愶愲愮慆愯慏愩慀戠酨戣戥戤揅揱揫搐搒搉搠搤 +0xdda0: 搳摃搟搕搘搹搷搢搣搌搦搰搨摁搵搯搊搚摀搥搧搋揧搛搮搡搎敯斒旓暆 +0xddc0: 暌暕暐暋暊暙暔晸朠楦楟椸楎楢楱椿楅楪椹楂楗楙楺楈楉椵楬椳椽楥棰楸 +0xdde0: 椴楩楀楯楄楶楘楁楴楌椻楋椷楜楏楑椲楒椯楻椼歆歅歃歂歈歁殛嗀毻毼 +0xde40: 毹毷毸溛滖滈溏滀溟溓溔溠溱溹滆滒溽滁溞滉溷溰滍溦滏溲溾滃滜滘溙溒 +0xde60: 溎溍溤溡溿溳滐滊溗溮溣煇煔煒煣煠煁煝煢煲煸煪煡煂煘煃煋煰煟煐煓 +0xdea0: 煄煍煚牏犍犌犑犐犎猼獂猻猺獀獊獉瑄瑊瑋瑒瑑瑗瑀瑏瑐瑎瑂瑆瑍瑔瓡 +0xdec0: 瓿瓾瓽甝畹畷榃痯瘏瘃痷痾痼痹痸瘐痻痶痭痵痽皙皵盝睕睟睠睒睖睚睩睧 +0xdee0: 睔睙睭矠碇碚碔碏碄碕碅碆碡碃硹碙碀碖硻祼禂祽祹稑稘稙稒稗稕稢稓 +0xdf40: 稛稐窣窢窞竫筦筤筭筴筩筲筥筳筱筰筡筸筶筣粲粴粯綈綆綀綍絿綅絺綎絻 +0xdf60: 綃絼綌綔綄絽綒罭罫罧罨罬羦羥羧翛翜耡腤腠腷腜腩腛腢腲朡腞腶腧腯 +0xdfa0: 腄腡舝艉艄艀艂艅蓱萿葖葶葹蒏蒍葥葑葀蒆葧萰葍葽葚葙葴葳葝蔇葞萷 +0xdfc0: 萺萴葺葃葸萲葅萩菙葋萯葂萭葟葰萹葎葌葒葯蓅蒎萻葇萶萳葨葾葄萫葠葔 +0xdfe0: 葮葐蜋蜄蛷蜌蛺蛖蛵蝍蛸蜎蜉蜁蛶蜍蜅裖裋裍裎裞裛裚裌裐覅覛觟觥觤 +0xe040: 觡觠觢觜触詶誆詿詡訿詷誂誄詵誃誁詴詺谼豋豊豥豤豦貆貄貅賌赨赩趑趌 +0xe060: 趎趏趍趓趔趐趒跰跠跬跱跮跐跩跣跢跧跲跫跴輆軿輁輀輅輇輈輂輋遒逿 +0xe0a0: 遄遉逽鄐鄍鄏鄑鄖鄔鄋鄎酮酯鉈鉒鈰鈺鉦鈳鉥鉞銃鈮鉊鉆鉭鉬鉏鉠鉧鉯 +0xe0c0: 鈶鉡鉰鈱鉔鉣鉐鉲鉎鉓鉌鉖鈲閟閜閞閛隒隓隑隗雎雺雽雸雵靳靷靸靲頏頍 +0xe0e0: 頎颬飶飹馯馲馰馵骭骫魛鳪鳭鳧麀黽僦僔僗僨僳僛僪僝僤僓僬僰僯僣僠 +0xe140: 凘劀劁勩勫匰厬嘧嘕嘌嘒嗼嘏嘜嘁嘓嘂嗺嘝嘄嗿嗹墉塼墐墘墆墁塿塴墋塺 +0xe160: 墇墑墎塶墂墈塻墔墏壾奫嫜嫮嫥嫕嫪嫚嫭嫫嫳嫢嫠嫛嫬嫞嫝嫙嫨嫟孷寠 +0xe1a0: 寣屣嶂嶀嵽嶆嵺嶁嵷嶊嶉嶈嵾嵼嶍嵹嵿幘幙幓廘廑廗廎廜廕廙廒廔彄彃 +0xe1c0: 彯徶愬愨慁慞慱慳慒慓慲慬憀慴慔慺慛慥愻慪慡慖戩戧戫搫摍摛摝摴摶摲 +0xe1e0: 摳摽摵摦撦摎撂摞摜摋摓摠摐摿搿摬摫摙摥摷敳斠暡暠暟朅朄朢榱榶槉 +0xe240: 榠槎榖榰榬榼榑榙榎榧榍榩榾榯榿槄榽榤槔榹槊榚槏榳榓榪榡榞槙榗榐槂 +0xe260: 榵榥槆歊歍歋殞殟殠毃毄毾滎滵滱漃漥滸漷滻漮漉潎漙漚漧漘漻漒滭漊 +0xe2a0: 漶潳滹滮漭潀漰漼漵滫漇漎潃漅滽滶漹漜滼漺漟漍漞漈漡熇熐熉熀熅熂 +0xe2c0: 熏煻熆熁熗牄牓犗犕犓獃獍獑獌瑢瑳瑱瑵瑲瑧瑮甀甂甃畽疐瘖瘈瘌瘕瘑瘊 +0xe2e0: 瘔皸瞁睼瞅瞂睮瞀睯睾瞃碲碪碴碭碨硾碫碞碥碠碬碢碤禘禊禋禖禕禔禓 +0xe340: 禗禈禒禐稫穊稰稯稨稦窨窫窬竮箈箜箊箑箐箖箍箌箛箎箅箘劄箙箤箂粻粿 +0xe360: 粼粺綧綷緂綣綪緁緀緅綝緎緄緆緋緌綯綹綖綼綟綦綮綩綡緉罳翢翣翥翞 +0xe3a0: 耤聝聜膉膆膃膇膍膌膋舕蒗蒤蒡蒟蒺蓎蓂蒬蒮蒫蒹蒴蓁蓍蒪蒚蒱蓐蒝蒧 +0xe3c0: 蒻蒢蒔蓇蓌蒛蒩蒯蒨蓖蒘蒶蓏蒠蓗蓔蓒蓛蒰蒑虡蜳蜣蜨蝫蝀蜮蜞蜡蜙蜛蝃 +0xe3e0: 蜬蝁蜾蝆蜠蜲蜪蜭蜼蜒蜺蜱蜵蝂蜦蜧蜸蜤蜚蜰蜑裷裧裱裲裺裾裮裼裶裻 +0xe440: 裰裬裫覝覡覟覞觩觫觨誫誙誋誒誏誖谽豨豩賕賏賗趖踉踂跿踍跽踊踃踇踆 +0xe460: 踅跾踀踄輐輑輎輍鄣鄜鄠鄢鄟鄝鄚鄤鄡鄛酺酲酹酳銥銤鉶銛鉺銠銔銪銍 +0xe4a0: 銦銚銫鉹銗鉿銣鋮銎銂銕銢鉽銈銡銊銆銌銙銧鉾銇銩銝銋鈭隞隡雿靘靽 +0xe4c0: 靺靾鞃鞀鞂靻鞄鞁靿韎韍頖颭颮餂餀餇馝馜駃馹馻馺駂馽駇骱髣髧鬾鬿魠 +0xe4e0: 魡魟鳱鳲鳵麧僿儃儰僸儆儇僶僾儋儌僽儊劋劌勱勯噈噂噌嘵噁噊噉噆噘 +0xe540: 噚噀嘳嘽嘬嘾嘸嘪嘺圚墫墝墱墠墣墯墬墥墡壿嫿嫴嫽嫷嫶嬃嫸嬂嫹嬁嬇嬅 +0xe560: 嬏屧嶙嶗嶟嶒嶢嶓嶕嶠嶜嶡嶚嶞幩幝幠幜緳廛廞廡彉徲憋憃慹憱憰憢憉 +0xe5a0: 憛憓憯憭憟憒憪憡憍慦憳戭摮摰撖撠撅撗撜撏撋撊撌撣撟摨撱撘敶敺敹 +0xe5c0: 敻斲斳暵暰暩暲暷暪暯樀樆樗槥槸樕槱槤樠槿槬槢樛樝槾樧槲槮樔槷槧橀 +0xe5e0: 樈槦槻樍槼槫樉樄樘樥樏槶樦樇槴樖歑殥殣殢殦氁氀毿氂潁漦潾澇濆澒 +0xe640: 澍澉澌潢潏澅潚澖潶潬澂潕潲潒潐潗澔澓潝漀潡潫潽潧澐潓澋潩潿澕潣潷 +0xe660: 潪潻熲熯熛熰熠熚熩熵熝熥熞熤熡熪熜熧熳犘犚獘獒獞獟獠獝獛獡獚獙 +0xe6a0: 獢璇璉璊璆璁瑽璅璈瑼瑹甈甇畾瘥瘞瘙瘝瘜瘣瘚瘨瘛皜皝皞皛瞍瞏瞉瞈 +0xe6c0: 磍碻磏磌磑磎磔磈磃磄磉禚禡禠禜禢禛歶稹窲窴窳箷篋箾箬篎箯箹篊箵糅 +0xe6e0: 糈糌糋緷緛緪緧緗緡縃緺緦緶緱緰緮緟罶羬羰羭翭翫翪翬翦翨聤聧膣膟 +0xe740: 膞膕膢膙膗舖艏艓艒艐艎艑蔤蔻蔏蔀蔩蔎蔉蔍蔟蔊蔧蔜蓻蔫蓺蔈蔌蓴蔪蓲 +0xe760: 蔕蓷蓫蓳蓼蔒蓪蓩蔖蓾蔨蔝蔮蔂蓽蔞蓶蔱蔦蓧蓨蓰蓯蓹蔘蔠蔰蔋蔙蔯虢 +0xe7a0: 蝖蝣蝤蝷蟡蝳蝘蝔蝛蝒蝡蝚蝑蝞蝭蝪蝐蝎蝟蝝蝯蝬蝺蝮蝜蝥蝏蝻蝵蝢蝧 +0xe7c0: 蝩衚褅褌褔褋褗褘褙褆褖褑褎褉覢覤覣觭觰觬諏諆誸諓諑諔諕誻諗誾諀諅 +0xe7e0: 諘諃誺誽諙谾豍貏賥賟賙賨賚賝賧趠趜趡趛踠踣踥踤踮踕踛踖踑踙踦踧 +0xe840: 踔踒踘踓踜踗踚輬輤輘輚輠輣輖輗遳遰遯遧遫鄯鄫鄩鄪鄲鄦鄮醅醆醊醁醂 +0xe860: 醄醀鋐鋃鋄鋀鋙銶鋏鋱鋟鋘鋩鋗鋝鋌鋯鋂鋨鋊鋈鋎鋦鋍鋕鋉鋠鋞鋧鋑鋓 +0xe8a0: 銵鋡鋆銴镼閬閫閮閰隤隢雓霅霈霂靚鞊鞎鞈韐韏頞頝頦頩頨頠頛頧颲餈 +0xe8c0: 飺餑餔餖餗餕駜駍駏駓駔駎駉駖駘駋駗駌骳髬髫髳髲髱魆魃魧魴魱魦魶魵 +0xe8e0: 魰魨魤魬鳼鳺鳽鳿鳷鴇鴀鳹鳻鴈鴅鴄麃黓鼏鼐儜儓儗儚儑凞匴叡噰噠噮 +0xe940: 噳噦噣噭噲噞噷圜圛壈墽壉墿墺壂墼壆嬗嬙嬛嬡嬔嬓嬐嬖嬨嬚嬠嬞寯嶬嶱 +0xe960: 嶩嶧嶵嶰嶮嶪嶨嶲嶭嶯嶴幧幨幦幯廩廧廦廨廥彋徼憝憨憖懅憴懆懁懌憺 +0xe9a0: 憿憸憌擗擖擐擏擉撽撉擃擛擳擙攳敿敼斢曈暾曀曊曋曏暽暻暺曌朣樴橦 +0xe9c0: 橉橧樲橨樾橝橭橶橛橑樨橚樻樿橁橪橤橐橏橔橯橩橠樼橞橖橕橍橎橆歕歔 +0xe9e0: 歖殧殪殫毈毇氄氃氆澭濋澣濇澼濎濈潞濄澽澞濊澨瀄澥澮澺澬澪濏澿澸 +0xea40: 澢濉澫濍澯澲澰燅燂熿熸燖燀燁燋燔燊燇燏熽燘熼燆燚燛犝犞獩獦獧獬獥 +0xea60: 獫獪瑿璚璠璔璒璕璡甋疀瘯瘭瘱瘽瘳瘼瘵瘲瘰皻盦瞚瞝瞡瞜瞛瞢瞣瞕瞙 +0xeaa0: 瞗磝磩磥磪磞磣磛磡磢磭磟磠禤穄穈穇窶窸窵窱窷篞篣篧篝篕篥篚篨篹 +0xeac0: 篔篪篢篜篫篘篟糒糔糗糐糑縒縡縗縌縟縠縓縎縜縕縚縢縋縏縖縍縔縥縤罃 +0xeae0: 罻罼罺羱翯耪耩聬膱膦膮膹膵膫膰膬膴膲膷膧臲艕艖艗蕖蕅蕫蕍蕓蕡蕘 +0xeb40: 蕀蕆蕤蕁蕢蕄蕑蕇蕣蔾蕛蕱蕎蕮蕵蕕蕧蕠薌蕦蕝蕔蕥蕬虣虥虤螛螏螗螓螒 +0xeb60: 螈螁螖螘蝹螇螣螅螐螑螝螄螔螜螚螉褞褦褰褭褮褧褱褢褩褣褯褬褟觱諠 +0xeba0: 諢諲諴諵諝謔諤諟諰諈諞諡諨諿諯諻貑貒貐賵賮賱賰賳赬赮趥趧踳踾踸 +0xebc0: 蹀蹅踶踼踽蹁踰踿躽輶輮輵輲輹輷輴遶遹遻邆郺鄳鄵鄶醓醐醑醍醏錧錞錈 +0xebe0: 錟錆錏鍺錸錼錛錣錒錁鍆錭錎錍鋋錝鋺錥錓鋹鋷錴錂錤鋿錩錹錵錪錔錌 +0xec40: 錋鋾錉錀鋻錖閼闍閾閹閺閶閿閵閽隩雔霋霒霐鞙鞗鞔韰韸頵頯頲餤餟餧餩 +0xec60: 馞駮駬駥駤駰駣駪駩駧骹骿骴骻髶髺髹髷鬳鮀鮅鮇魼魾魻鮂鮓鮒鮐魺鮕 +0xeca0: 魽鮈鴥鴗鴠鴞鴔鴩鴝鴘鴢鴐鴙鴟麈麆麇麮麭黕黖黺鼒鼽儦儥儢儤儠儩勴 +0xecc0: 嚓嚌嚍嚆嚄嚃噾嚂噿嚁壖壔壏壒嬭嬥嬲嬣嬬嬧嬦嬯嬮孻寱寲嶷幬幪徾徻懃 +0xece0: 憵憼懧懠懥懤懨懞擯擩擣擫擤擨斁斀斶旚曒檍檖檁檥檉檟檛檡檞檇檓檎 +0xed40: 檕檃檨檤檑橿檦檚檅檌檒歛殭氉濌澩濴濔濣濜濭濧濦濞濲濝濢濨燡燱燨燲 +0xed60: 燤燰燢獳獮獯璗璲璫璐璪璭璱璥璯甐甑甒甏疄癃癈癉癇皤盩瞵瞫瞲瞷瞶 +0xeda0: 瞴瞱瞨矰磳磽礂磻磼磲礅磹磾礄禫禨穜穛穖穘穔穚窾竀竁簅簏篲簀篿篻 +0xedc0: 簎篴簋篳簂簉簃簁篸篽簆篰篱簐簊糨縭縼繂縳顈縸縪繉繀繇縩繌縰縻縶繄 +0xede0: 縺罅罿罾罽翴翲耬膻臄臌臊臅臇膼臩艛艚艜薃薀薏薧薕薠薋薣蕻薤薚薞 +0xee40: 蕷蕼薉薡蕺蕸蕗薎薖薆薍薙薝薁薢薂薈薅蕹蕶薘薐薟虨螾螪螭蟅螰螬螹螵 +0xee60: 螼螮蟉蟃蟂蟌螷螯蟄蟊螴螶螿螸螽蟞螲褵褳褼褾襁襒褷襂覭覯覮觲觳謞 +0xeea0: 謘謖謑謅謋謢謏謒謕謇謍謈謆謜謓謚豏豰豲豱豯貕貔賹赯蹎蹍蹓蹐蹌蹇 +0xeec0: 轃轀邅遾鄸醚醢醛醙醟醡醝醠鎡鎃鎯鍤鍖鍇鍼鍘鍜鍶鍉鍐鍑鍠鍭鎏鍌鍪鍹 +0xeee0: 鍗鍕鍒鍏鍱鍷鍻鍡鍞鍣鍧鎀鍎鍙闇闀闉闃闅閷隮隰隬霠霟霘霝霙鞚鞡鞜 +0xef40: 鞞鞝韕韔韱顁顄顊顉顅顃餥餫餬餪餳餲餯餭餱餰馘馣馡騂駺駴駷駹駸駶駻 +0xef60: 駽駾駼騃骾髾髽鬁髼魈鮚鮨鮞鮛鮦鮡鮥鮤鮆鮢鮠鮯鴳鵁鵧鴶鴮鴯鴱鴸鴰 +0xefa0: 鵅鵂鵃鴾鴷鵀鴽翵鴭麊麉麍麰黈黚黻黿鼤鼣鼢齔龠儱儭儮嚘嚜嚗嚚嚝嚙 +0xefc0: 奰嬼屩屪巀幭幮懘懟懭懮懱懪懰懫懖懩擿攄擽擸攁攃擼斔旛曚曛曘櫅檹檽 +0xefe0: 櫡櫆檺檶檷櫇檴檭歞毉氋瀇瀌瀍瀁瀅瀔瀎濿瀀濻瀦濼濷瀊爁燿燹爃燽獶 +0xf040: 璸瓀璵瓁璾璶璻瓂甔甓癜癤癙癐癓癗癚皦皽盬矂瞺磿礌礓礔礉礐礒礑禭禬 +0xf060: 穟簜簩簙簠簟簭簝簦簨簢簥簰繜繐繖繣繘繢繟繑繠繗繓羵羳翷翸聵臑臒 +0xf0a0: 臐艟艞薴藆藀藃藂薳薵薽藇藄薿藋藎藈藅薱薶藒蘤薸薷薾虩蟧蟦蟢蟛蟫 +0xf0c0: 蟪蟥蟟蟳蟤蟔蟜蟓蟭蟘蟣螤蟗蟙蠁蟴蟨蟝襓襋襏襌襆襐襑襉謪謧謣謳謰謵 +0xf0e0: 譇謯謼謾謱謥謷謦謶謮謤謻謽謺豂豵貙貘貗賾贄贂贀蹜蹢蹠蹗蹖蹞蹥蹧 +0xf140: 蹛蹚蹡蹝蹩蹔轆轇轈轋鄨鄺鄻鄾醨醥醧醯醪鎵鎌鎒鎷鎛鎝鎉鎧鎎鎪鎞鎦鎕 +0xf160: 鎈鎙鎟鎍鎱鎑鎲鎤鎨鎴鎣鎥闒闓闑隳雗雚巂雟雘雝霣霢霥鞬鞮鞨鞫鞤鞪 +0xf1a0: 鞢鞥韗韙韖韘韺顐顑顒颸饁餼餺騏騋騉騍騄騑騊騅騇騆髀髜鬈鬄鬅鬩鬵 +0xf1c0: 魊魌魋鯇鯆鯃鮿鯁鮵鮸鯓鮶鯄鮹鮽鵜鵓鵏鵊鵛鵋鵙鵖鵌鵗鵒鵔鵟鵘鵚麎麌 +0xf1e0: 黟鼁鼀鼖鼥鼫鼪鼩鼨齌齕儴儵劖勷厴嚫嚭嚦嚧嚪嚬壚壝壛夒嬽嬾嬿巃幰 +0xf240: 徿懻攇攐攍攉攌攎斄旞旝曞櫧櫠櫌櫑櫙櫋櫟櫜櫐櫫櫏櫍櫞歠殰氌瀙瀧瀠瀖 +0xf260: 瀫瀡瀢瀣瀩瀗瀤瀜瀪爌爊爇爂爅犥犦犤犣犡瓋瓅璷瓃甖癠矉矊矄矱礝礛 +0xf2a0: 礡礜礗礞禰穧穨簳簼簹簬簻糬糪繶繵繸繰繷繯繺繲繴繨罋罊羃羆羷翽翾 +0xf2c0: 聸臗臕艤艡艣藫藱藭藙藡藨藚藗藬藲藸藘藟藣藜藑藰藦藯藞藢蠀蟺蠃蟶蟷 +0xf2e0: 蠉蠌蠋蠆蟼蠈蟿蠊蠂襢襚襛襗襡襜襘襝襙覈覷覶觶譐譈譊譀譓譖譔譋譕 +0xf340: 譑譂譒譗豃豷豶貚贆贇贉趬趪趭趫蹭蹸蹳蹪蹯蹻軂轒轑轏轐轓辴酀鄿醰醭 +0xf360: 鏞鏇鏏鏂鏚鏐鏹鏬鏌鏙鎩鏦鏊鏔鏮鏣鏕鏄鏎鏀鏒鏧镽闚闛雡霩霫霬霨霦 +0xf3a0: 鞳鞷鞶韝韞韟顜顙顝顗颿颽颻颾饈饇饃馦馧騚騕騥騝騤騛騢騠騧騣騞騜 +0xf3c0: 騔髂鬋鬊鬎鬌鬷鯪鯫鯠鯞鯤鯦鯢鯰鯔鯗鯬鯜鯙鯥鯕鯡鯚鵷鶁鶊鶄鶈鵱鶀鵸 +0xf3e0: 鶆鶋鶌鵽鵫鵴鵵鵰鵩鶅鵳鵻鶂鵯鵹鵿鶇鵨麔麑黀黼鼭齀齁齍齖齗齘匷嚲 +0xf440: 嚵嚳壣孅巆巇廮廯忀忁懹攗攖攕攓旟曨曣曤櫳櫰櫪櫨櫹櫱櫮櫯瀼瀵瀯瀷瀴 +0xf460: 瀱灂瀸瀿瀺瀹灀瀻瀳灁爓爔犨獽獼璺皫皪皾盭矌矎矏矍矲礥礣礧礨礤礩 +0xf4a0: 禲穮穬穭竷籉籈籊籇籅糮繻繾纁纀羺翿聹臛臙舋艨艩蘢藿蘁藾蘛蘀藶蘄 +0xf4c0: 蘉蘅蘌藽蠙蠐蠑蠗蠓蠖襣襦覹觷譠譪譝譨譣譥譧譭趮躆躈躄轙轖轗轕轘轚 +0xf4e0: 邍酃酁醷醵醲醳鐋鐓鏻鐠鐏鐔鏾鐕鐐鐨鐙鐍鏵鐀鏷鐇鐎鐖鐒鏺鐉鏸鐊鏿 +0xf540: 鏼鐌鏶鐑鐆闞闠闟霮霯鞹鞻韽韾顠顢顣顟飁飂饐饎饙饌饋饓騲騴騱騬騪騶 +0xf560: 騩騮騸騭髇髊髆鬐鬒鬑鰋鰈鯷鰅鰒鯸鱀鰇鰎鰆鰗鰔鰉鶟鶙鶤鶝鶒鶘鶐鶛 +0xf5a0: 鶠鶔鶜鶪鶗鶡鶚鶢鶨鶞鶣鶿鶩鶖鶦鶧麙麛麚黥黤黧黦鼰鼮齛齠齞齝齙龑 +0xf5c0: 儺儹劘劗囃嚽嚾孈孇巋巏廱懽攛欂櫼欃櫸欀灃灄灊灈灉灅灆爝爚爙獾甗癪 +0xf5e0: 矐礭礱礯籔籓糲纊纇纈纋纆纍罍羻耰臝蘘蘪蘦蘟蘣蘜蘙蘧蘮蘡蘠蘩蘞蘥 +0xf640: 蠩蠝蠛蠠蠤蠜蠫衊襭襩襮襫觺譹譸譅譺譻贐贔趯躎躌轞轛轝酆酄酅醹鐿鐻 +0xf660: 鐶鐩鐽鐼鐰鐹鐪鐷鐬鑀鐱闥闤闣霵霺鞿韡顤飉飆飀饘饖騹騽驆驄驂驁騺 +0xf6a0: 騿髍鬕鬗鬘鬖鬺魒鰫鰝鰜鰬鰣鰨鰩鰤鰡鶷鶶鶼鷁鷇鷊鷏鶾鷅鷃鶻鶵鷎鶹 +0xf6c0: 鶺鶬鷈鶱鶭鷌鶳鷍鶲鹺麜黫黮黭鼛鼘鼚鼱齎齥齤龒亹囆囅囋奱孋孌巕巑廲 +0xf6e0: 攡攠攦攢欋欈欉氍灕灖灗灒爞爟犩獿瓘瓕瓙瓗癭皭礵禴穰穱籗籜籙籛籚 +0xf740: 糴糱纑罏羇臞艫蘴蘵蘳蘬蘲蘶蠬蠨蠦蠪蠥襱覿覾觻譾讄讂讆讅譿贕躕躔躚 +0xf760: 躒躐躖躗轠轢酇鑌鑐鑊鑋鑏鑇鑅鑈鑉鑆霿韣顪顩飋饔饛驎驓驔驌驏驈驊 +0xf7a0: 驉驒驐髐鬙鬫鬻魖魕鱆鱈鰿鱄鰹鰳鱁鰼鰷鰴鰲鰽鰶鷛鷒鷞鷚鷋鷐鷜鷑鷟 +0xf7c0: 鷩鷙鷘鷖鷵鷕鷝麶黰鼵鼳鼲齂齫龕龢儽劙壨壧奲孍巘蠯彏戁戃戄攩攥斖曫 +0xf7e0: 欑欒欏毊灛灚爢玂玁玃癰矔籧籦纕艬蘺虀蘹蘼蘱蘻蘾蠰蠲蠮蠳襶襴襳觾 +0xf840: 讌讎讋讈豅贙躘轤轣醼鑢鑕鑝鑗鑞韄韅頀驖驙鬞鬟鬠鱒鱘鱐鱊鱍鱋鱕鱙鱌 +0xf860: 鱎鷻鷷鷯鷣鷫鷸鷤鷶鷡鷮鷦鷲鷰鷢鷬鷴鷳鷨鷭黂黐黲黳鼆鼜鼸鼷鼶齃齏 +0xf8a0: 齱齰齮齯囓囍孎屭攭曭曮欓灟灡灝灠爣瓛瓥矕礸禷禶籪纗羉艭虃蠸蠷蠵 +0xf8c0: 衋讔讕躞躟躠躝醾醽釂鑫鑨鑩雥靆靃靇韇韥驞髕魙鱣鱧鱦鱢鱞鱠鸂鷾鸇鸃 +0xf8e0: 鸆鸅鸀鸁鸉鷿鷽鸄麠鼞齆齴齵齶囔攮斸欘欙欗欚灢爦犪矘矙礹籩籫糶纚 +0xf940: 纘纛纙臠臡虆虇虈襹襺襼襻觿讘讙躥躤躣鑮鑭鑯鑱鑳靉顲饟鱨鱮鱭鸋鸍鸐 +0xf960: 鸏鸒鸑麡黵鼉齇齸齻齺齹圞灦籯蠼趲躦釃鑴鑸鑶鑵驠鱴鱳鱱鱵鸔鸓黶鼊 +0xf9a0: 龤灨灥糷虪蠾蠽蠿讞貜躩軉靋顳顴飌饡馫驤驦驧鬤鸕鸗齈戇欞爧虌躨钂 +0xf9c0: 钀钁驩驨鬮鸙爩虋讟钃鱹麷癵驫鱺鸝灩灪麤齾齉龘碁銹裏墻恒粧嫺╔╦╗ +0xf9e0: ╠╬╣╚╩╝╒╤╕╞╪╡╘╧╛╓╥╖╟╫╢╙╨╜║═ ■ +0xfa40: 鋛蕌䊵珯况㙉鍄苮砼杄拟侫倈 +0xfa60: 徤滛 儁㑺儎顬㝃萖兠兪宂蝽冲冸 +0xfaa0: 鴴凉减凑㳜凓决凢卂凭菍椾彻刋刦刼劵剗劔効勅簕蕂勠蘍 啉 +0xfac0: 滙匳 泋栛珕恊㺪㣌燝䒢卭却卾 矦厓厠厫厮玧 +0xfae0: 㽙玜叁叅汉义埾叙㪫叠叶吓灹唫晗浛呭啝咏咤䞦㶴 +0xfb40: 啇䳭启琗喆喩 䕒暳嘷曍暤暭噍噏磱囱鞇叾圀囯园㘣 +0xfb60: 坆汮炋坂㚱埦堃堦塜墪㕡壠壜壻寿坃鏓㖡够梦㛃湙 +0xfba0: 娤啓蔅姉姙浱姹媫婣㛦 㜈媖瑥嫓㶅 +0xfbc0: 㜲広勐孶斈孼䀄䡝寕慠寳宝䴐尅尓珎尔屉䣝岅峩峯 +0xfbe0: 嶋崐崘嵆岺巗苼㠭芇㠶㯂帮檊 幺厦亷 厨帉廴 +0xfc40: 廹廻㢠廼栾鐛弍㫞䢮强 彣鞽彲鍀徧嶶㵟釖 +0xfc60: 怱暅㥣㷇㘹垐祱㹀悞 悳璤僡媠慤萤慂憁凴憇宪 +0xfca0: 懓懐㤲怣慜攞掋担拕捬㨗搸揸 澊頔擡 +0xfcc0: 擥鑻㩦携㩗敍漖斅敭敟斵䬷旑䃘无旣忟昘晄晋晧 +0xfce0: 晳 矅馤朂㬫槺 杧杢柗䓩栢湐鈼栁桝 +0xfd40: 槡樋楳棃椁椀㴲㨁㮀枬楡䋼椶榘㮡荣傐槹橅檝㯳枱櫈 +0xfd60: 㰍欝惞欵歴溵㝀吡毡毜氷汚舦汹䓅 +0xfda0: 㛥㳫鮃羏样涖浜湼漄蔳凇 萮 瑓秌 +0xfdc0: 湏媑濸㜍澝滺䕕鏰潄潜㵎潴㴻澟濓凟 +0xfde0: 灋灾炧炁烌烕烖烟䄄㷨熴熖焫煅媈煊 岜煏鍢焬熺炽爎 +0xfe40: 鑂爕夑鑃爤鍁爮牀梽牕牗㹕栍漽犂 猫䣭猨献珏玪珉瑉 +0xfe60: 昣㛅珷琕椃琹㻗 瑠瑇珤瑶莹瑬㜰瑴鏱樬璂䥓 +0xfea0: 孆瓈甎 甞寗鎅畍畊畧畮㼄疎瑝疞疴瘂瘬癑癏 +0xfec0: 癯癶皐臯㟸皡皥皷盌葢眞眦着撯睘瞯矴  +0xfee0: 棊碯磇磓隥礮磗礴碱辸袄禆褀椂禀禝礼禩渪㺨秆秔 diff --git a/t/cow.t b/t/cow.t new file mode 100644 index 0000000..9932e9d --- /dev/null +++ b/t/cow.t @@ -0,0 +1,27 @@ +# +# $Id: cow.t,v 1.2 2016/08/04 03:15:58 dankogai Exp $ +# +use strict; +use Encode (); +use Test::More tests => 4; + + +my %a = ( "L\x{c3}\x{a9}on" => "acme" ); +my ($k) = ( keys %a ); +Encode::_utf8_on($k); +my %h = ( $k => "acme" ); +is $h{"L\x{e9}on"} => 'acme'; +($k) = ( keys %h ); +Encode::_utf8_off($k); +%a = ( $k => "acme" ); +is $h{"L\x{e9}on"} => 'acme'; +# use Devel::Peek; +# Dump(\%h); + +{ # invalid input to encode/decode/from_to should not affect COW-shared scalars + my $x = Encode::decode('UTF-8', "\303\244" x 4); + my $orig = "$x"; # non-COW copy + is($x, $orig, "copy of original string matches"); + { my $y = $x; Encode::from_to($y, "UTF-8", "iso-8859-1"); } + is($x, $orig, "original scalar unmodified after from_to() call"); +} diff --git a/t/decode.t b/t/decode.t new file mode 100644 index 0000000..93c992c --- /dev/null +++ b/t/decode.t @@ -0,0 +1,88 @@ +# +# $Id: decode.t,v 1.4 2017/10/06 22:21:53 dankogai Exp $ +# +use strict; +use Encode qw(decode_utf8 FB_CROAK find_encoding decode); +use Test::More tests => 17; +use Test::Builder; + +sub croak_ok(&) { + local $Test::Builder::Level = $Test::Builder::Level + 1; + my $code = shift; + eval { $code->() }; + like $@, qr/does not map/; +} + +my $bytes = "L\x{e9}on"; +my $pad = "\x{30C9}"; + +my $orig = $bytes; +croak_ok { Encode::decode_utf8($orig, FB_CROAK) }; + +my $orig2 = $bytes; +croak_ok { Encode::decode('utf-8', $orig2, FB_CROAK) }; + +chop(my $new = $bytes . $pad); +croak_ok { Encode::decode_utf8($new, FB_CROAK) }; + +my $latin1 = find_encoding('latin1'); +$orig = "\N{U+0080}"; +$orig =~ /(.)/; +is($latin1->decode($1), $orig, '[cpan #115168] passing magic regex globals to decode'); +SKIP: { + skip "Perl Version ($]) is older than v5.16", 1 if $] < 5.016; + *a = $orig; + is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode'); +} + +$orig = "\x80"; +$orig =~ /(.)/; +is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode'); + +$orig = "\x80"; +*a = $orig; +is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode'); + +$orig = "\N{U+0080}"; +$orig =~ /(.)/; +is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode'); + +$orig = "\xC3\x80"; +$orig =~ /(..)/; +is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8'); + +$orig = "\xC3\x80"; +*a = $orig; +is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8'); + +$orig = "\N{U+C0}"; +$orig =~ /(.)/; +is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8'); + +$orig = "\xC3\x80"; +$orig =~ /(..)/; +is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode'); + +$orig = "\xC3\x80"; +*a = $orig; +is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode'); + +$orig = "\N{U+C0}"; +$orig =~ /(.)/; +is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode'); + +SKIP: { + skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016; + + $orig = "\N{U+0080}"; + *a = $orig; + is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode'); + + $orig = "\N{U+C0}"; + *a = $orig; + is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8'); + + $orig = "\N{U+C0}"; + *a = $orig; + is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode'); +} diff --git a/t/enc_data.t b/t/enc_data.t new file mode 100644 index 0000000..e610b0d --- /dev/null +++ b/t/enc_data.t @@ -0,0 +1,49 @@ +# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $ + +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n"; + exit(0); + } + if ($] >= 5.025 and !$Config{usecperl}) { + print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n"; + exit(0); + } + if ($] <= 5.008 and !$Config{perl_patchlevel}){ + print "1..0 # Skip: Perl 5.8.1 or later required\n"; + exit 0; + } +} + + +use strict; +no warnings "deprecated"; +use encoding 'euc-jp'; +use Test::More tests => 4; + +my @a; + +while () { + chomp; + tr/��-��-��/��-��-��/; + push @a, $_; +} + +is(scalar @a, 3); +is($a[0], "�����DATA�դ�����Ϥ�ɤ�ΤƤ��ȥǥ���"); +is($a[1], "���ܸ쥬�������Ѵ��ǥ��륫"); +is($a[2], "�ɥ����ΤƤ��ȥ򥷥ƥ��ޥ���"); + +__DATA__ +�����DATA�ե�����ϥ�ɥ�Υƥ��ȤǤ��� +���ܸ줬�������Ѵ��Ǥ��뤫 +�ɤ����Υƥ��Ȥ򤷤Ƥ��ޤ��� diff --git a/t/enc_eucjp.t b/t/enc_eucjp.t new file mode 100644 index 0000000..fc0af3c --- /dev/null +++ b/t/enc_eucjp.t @@ -0,0 +1,101 @@ +# $Id: enc_eucjp.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $ +# This is the twin of enc_utf8.t . + +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # encoding pragma does not support EBCDIC platforms\n"; + exit(0); + } + if ($] <= 5.008 and !$Config{perl_patchlevel}){ + print "1..0 # Skip: Perl 5.8.1 or later required\n"; + exit 0; + } + if ($] >= 5.025003 and !$Config{usecperl}){ + print "1..0 # Skip: Perl <=5.25.2 or cperl required\n"; + exit 0; + } +} + +no warnings "deprecated"; +use encoding 'euc-jp'; + +my @c = (127, 128, 255, 256); + +print "1.." . (scalar @c + 2) . "\n"; + +my @f; + +for my $i (0..$#c) { + no warnings 'pack'; + my $file = filename("f$i"); + push @f, $file; + open(F, ">$file") or die "$0: failed to open '$file' for writing: $!"; + binmode(F, ":utf8"); + print F chr($c[$i]); + print F pack("C" => $c[$i]); + close F; +} + +my $t = 1; + +for my $i (0..$#c) { + my $file = filename("f$i"); + open(F, "<$file") or die "$0: failed to open '$file' for reading: $!"; + binmode(F, ":utf8"); + my $c = ; + my $o = ord($c); + print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o != $c[$i]\n"; + $t++; +} + +my $f = filename("f" . @f); + +push @f, $f; +open(F, ">$f") or die "$0: failed to open '$f' for writing: $!"; +binmode(F, ":raw"); # Output raw bytes. +print F chr(128); # Output illegal UTF-8. +close F; +open(F, $f) or die "$0: failed to open '$f' for reading: $!"; +binmode(F, ":encoding(UTF-8)"); +{ + local $^W = 1; + local $SIG{__WARN__} = sub { $a = shift }; + eval { }; # This should get caught. +} +close F; +print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ? + "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n"; +$t++; + +open(F, $f) or die "$0: failed to open '$f' for reading: $!"; +binmode(F, ":encoding(utf8)"); +{ + local $^W = 1; + local $SIG{__WARN__} = sub { $a = shift }; + eval { }; # This should get caught. +} +close F; +print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ? + "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n"; +$t++; + +# On VMS temporary file names like "f0." may be more readable than "f0" since +# "f0" could be a logical name pointing elsewhere. +sub filename { + my $name = shift; + $name .= '.' if $^O eq 'VMS'; + return $name; +} + +END { + 1 while unlink @f; +} diff --git a/t/enc_module.enc b/t/enc_module.enc new file mode 100644 index 0000000..688aa1d --- /dev/null +++ b/t/enc_module.enc @@ -0,0 +1,2 @@ +���ʸ���� +�ƥ���ʸ���� diff --git a/t/enc_module.t b/t/enc_module.t new file mode 100644 index 0000000..fd6e6dc --- /dev/null +++ b/t/enc_module.t @@ -0,0 +1,68 @@ +# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $ +# This file is in euc-jp +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } + if (defined ${^UNICODE} and ${^UNICODE} != 0){ + print "1..0 # Skip: \${^UNICODE} == ${^UNICODE}\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n"; + exit(0); + } + if ($] >= 5.025 and !$Config{usecperl}) { + print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n"; + exit(0); + } +} +use lib qw(t ext/Encode/t ../ext/Encode/t); # latter 2 for perl core +use Mod_EUCJP; +no warnings "deprecated"; +use encoding "euc-jp"; +use Test::More tests => 3; +use File::Basename; +use File::Spec; +use File::Compare qw(compare_text); + +my $DEBUG = shift || 0; +my $dir = dirname(__FILE__); +my $file0 = File::Spec->catfile($dir,"enc_module.enc"); +my $file1 = File::Spec->catfile($dir,"$$.enc"); + +my $obj = Mod_EUCJP->new; +local $SIG{__WARN__} = sub{ $DEBUG and print STDERR @_ }; +# to silence reopening STD(IN|OUT) w/o closing unless $DEBUG + +open STDOUT, ">", $file1 or die "$file1:$!"; +print $obj->str, "\n"; +$obj->set("�ƥ���ʸ����"); +print $obj->str, "\n"; + +# Please do not move this to a point after the comparison -- Craig Berry +# and "unless $^O eq 'freebsd'" is needed for FreeBSD (toy-)?thread +# -- dankogai +close STDOUT unless $^O eq 'freebsd'; + +my $cmp = compare_text($file0, $file1); +is($cmp, 0, "encoding vs. STDOUT"); + +my @cmp = qw/���ʸ���� �ƥ���ʸ����/; +open STDIN, "<", $file0 or die "$file0:$!"; +$obj = Mod_EUCJP->new; +my $i = 0; +while(){ + s/\r?\n\z//; + is ($cmp[$i++], $_, "encoding vs. STDIN - $i"); +} + +unlink $file1 unless $cmp; +__END__ + diff --git a/t/enc_utf8.t b/t/enc_utf8.t new file mode 100644 index 0000000..be7d487 --- /dev/null +++ b/t/enc_utf8.t @@ -0,0 +1,95 @@ +# $Id: enc_utf8.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $ +# This is the twin of enc_eucjp.t . + +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # encoding pragma does not support EBCDIC platforms\n"; + exit(0); + } + if ($] >= 5.025003 and !$Config{usecperl}){ + print "1..0 # Skip: Perl <=5.25.2 or cperl required\n"; + exit 0; + } +} + +no warnings "deprecated"; +use encoding 'utf8'; + +my @c = (127, 128, 255, 256); + +print "1.." . (scalar @c + 2) . "\n"; + +my @f; + +for my $i (0..$#c) { + my $file = filename("f$i"); + push @f, $file; + open(F, ">$file") or die "$0: failed to open '$file' for writing: $!"; + binmode(F, ":utf8"); + print F chr($c[$i]); + close F; +} + +my $t = 1; + +for my $i (0..$#c) { + my $file = filename("f$i"); + open(F, "<$file") or die "$0: failed to open '$file' for reading: $!"; + binmode(F, ":utf8"); + my $c = ; + my $o = ord($c); + print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o != $c[$i]\n"; + $t++; +} + +my $f = filename("f" . @f); + +push @f, $f; +open(F, ">$f") or die "$0: failed to open '$f' for writing: $!"; +binmode(F, ":raw"); # Output raw bytes. +print F chr(128); # Output illegal UTF-8. +close F; +open(F, $f) or die "$0: failed to open '$f' for reading: $!"; +binmode(F, ":encoding(UTF-8)"); +{ + local $^W = 1; + local $SIG{__WARN__} = sub { $a = shift }; + eval { }; # This should get caught. +} +close F; +print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ? + "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n"; +$t++; + +open(F, $f) or die "$0: failed to open '$f' for reading: $!"; +binmode(F, ":encoding(utf8)"); +{ + local $^W = 1; + local $SIG{__WARN__} = sub { $a = shift }; + eval { }; # This should get caught. +} +close F; +print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ? + "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n"; +$t++; + +# On VMS temporary file names like "f0." may be more readable than "f0" since +# "f0" could be a logical name pointing elsewhere. +sub filename { + my $name = shift; + $name .= '.' if $^O eq 'VMS'; + return $name; +} + +END { + 1 while unlink @f; +} diff --git a/t/encoding-locale.t b/t/encoding-locale.t new file mode 100644 index 0000000..87e7ecb --- /dev/null +++ b/t/encoding-locale.t @@ -0,0 +1,26 @@ +# +# This test aims to detect (using CPAN Testers) platforms where the locale +# encoding detection doesn't work. +# + +use strict; +use warnings; + +use Test::More tests => 3; + +use encoding (); +use Encode qw; + +my $locale_encoding = encoding::_get_locale_encoding; + +SKIP: { + defined $locale_encoding or skip 'no locale encoding found', 3; + + is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value'); + + my $enc = find_encoding($locale_encoding); + ok(defined $enc, 'encoding returned is supported') + or diag("Encoding: ", explain($locale_encoding)); + isa_ok($enc, 'Encode::Encoding'); + eval { note($locale_encoding, ' => ', $enc->name); }; +} diff --git a/t/encoding.t b/t/encoding.t new file mode 100644 index 0000000..33010e7 --- /dev/null +++ b/t/encoding.t @@ -0,0 +1,219 @@ +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n"; + exit(0); + } + if ($] >= 5.025 and !$Config{usecperl}) { + print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n"; + exit(0); + } +} + +print "1..33\n"; + + +no warnings "deprecated"; +use encoding "latin1"; # ignored (overwritten by the next line) +use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) + +# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is +# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), +# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) + +$a = "\xDF"; +$b = "\x{100}"; + +print "not " unless ord($a) == 0x3af; +print "ok 1\n"; + +print "not " unless ord($b) == 0x100; +print "ok 2\n"; + +my $c; + +$c = $a . $b; + +print "not " unless ord($c) == 0x3af; +print "ok 3\n"; + +print "not " unless length($c) == 2; +print "ok 4\n"; + +print "not " unless ord(substr($c, 1, 1)) == 0x100; +print "ok 5\n"; + +print "not " unless ord(chr(0xdf)) == 0x3af; # spooky +print "ok 6\n"; + +print "not " unless ord(pack("C", 0xdf)) == 0x3af; +print "ok 7\n"; + +# we didn't break pack/unpack, I hope + +print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; +print "ok 8\n"; + +# the first octet of UTF-8 encoded 0x3af +print "not " unless unpack("U0 C", chr(0xdf)) == 0xce; +print "ok 9\n"; + +print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; +print "ok 10\n"; + +print "not " unless unpack("U", chr(0xdf)) == 0x3af; +print "ok 11\n"; + +# charnames must still work +use charnames ':full'; +print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; +print "ok 12\n"; + +# combine + +$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); + +print "not " unless ord($c) == 0x3af; +print "ok 13\n"; + +print "not " unless ord(substr($c, 1, 1)) == 0xdf; +print "ok 14\n"; + +print "not " unless ord(substr($c, 2, 1)) == 0x3af; +print "ok 15\n"; + +# regex literals + +print "not " unless "\xDF" =~ /\x{3AF}/; +print "ok 16\n"; + +print "not " unless "\x{3AF}" =~ /\xDF/; +print "ok 17\n"; + +print "not " unless "\xDF" =~ /\xDF/; +print "ok 18\n"; + +print "not " unless "\x{3AF}" =~ /\x{3AF}/; +print "ok 19\n"; + +# eq, cmp + +my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( + pack("C*", 0xDF ), # byte + pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 + pack("U*", 0x3AF), # $U eq $byte + pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding + pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) + pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; + pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) +); + +# all the tests in this section that compare a byte encoded string +# ato UTF-8 encoded are run in all possible vairants +# all of the eq, ne, cmp operations tested, +# $v z $u tested as well as $u z $v + +sub alleq($$){ + my ($a,$b) = (shift, shift); + $a eq $b && $b eq $a && + !( $a ne $b ) && !( $b ne $a ) && + ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; +} + +sub anyeq($$){ + my ($a,$b) = (shift, shift); + $a eq $b || $b eq $a || + !( $a ne $b ) || !( $b ne $a ) || + ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; +} + +sub allgt($$){ + my ($a,$b) = (shift, shift); + ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; +} +#match the correct UTF-8 string +print "not " unless alleq($byte, $U); +print "ok 20\n"; + +#do not match a wrong UTF-8 string +print "not " if anyeq($byte, $Ub); +print "ok 21\n"; + +#string ordering +print "not " unless allgt ( $g1, $byte ) && + allgt ( $g2, $byte ) && + allgt ( $byte, $l ) && + allgt ( $bytes, $U ); +print "ok 22\n"; + +# upgrade, downgrade + +my ($u,$v,$v2); +$u = $v = $v2 = pack("C*", 0xDF); +utf8::upgrade($v); #explicit upgrade +$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade + +# implicit upgrade === explicit upgrade +print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; +print "ok 23\n"; + +# utf8::upgrade is transparent and does not break equality +print "not " unless alleq( $u, $v ); +print "ok 24\n"; + +$u = $v = pack("C*", 0xDF); +utf8::upgrade($v); +#test for a roundtrip, we should get back from where we left +eval {utf8::downgrade( $v )}; +print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; +print "ok 25\n"; + +# some more eq, cmp + +$byte=pack("C*", 0xDF); + +print "not " unless pack("U*", 0x3AF) eq $byte; +print "ok 26\n"; + +print "not " if chr(0xDF) cmp $byte; +print "ok 27\n"; + +print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && + ((pack("U*", 0x3AE) cmp $byte) == -1) && + ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && + ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); +print "ok 28\n"; + + +{ + # Used to core dump in 5.7.3 + no warnings; # so test goes noiselessly + print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; +} + +{ + my %h1; + my %h2; + $h1{"\xdf"} = 41; + $h2{"\x{3af}"} = 42; + print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; + print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; +} + +# Order of finding the above-Latin1 code point should not matter: both should +# assume Latin1/Unicode encoding +{ + use bytes; + print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/; + print "ok 32\n"; + print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/; + print "ok 33\n"; +} diff --git a/t/fallback.t b/t/fallback.t new file mode 100644 index 0000000..011c86d --- /dev/null +++ b/t/fallback.t @@ -0,0 +1,212 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +#use Test::More qw(no_plan); +use Test::More tests => 58; +use Encode q(:all); + +my $uo = ''; +my $nf = ''; +my ($af, $aq, $ap, $ah, $ax, $uf, $uq, $up, $uh, $ux, $ac, $uc); +for my $i (0x20..0x7e){ + $uo .= chr($i); +} +$af = $aq = $ap = $ah = $ax = $ac = +$uf = $uq = $up = $uh = $ux = $uc = +$nf = $uo; + +my $residue = ''; +for my $i (0x80..0xff){ + $uo .= chr($i); + $residue .= chr($i); + $af .= '?'; + $uf .= "\x{FFFD}"; + $ap .= sprintf("\\x{%04x}", $i); + $up .= sprintf("\\x%02X", $i); + $ah .= sprintf("&#%d;", $i); + $uh .= sprintf("\\x%02X", $i); + $ax .= sprintf("&#x%x;", $i); + $ux .= sprintf("\\x%02X", $i); + $ac .= sprintf("", $i); + $uc .= sprintf("[%02X]", $i); +} + +my $ao = $uo; +utf8::upgrade($uo); + +my $ascii = find_encoding('ascii'); +my $latin1 = find_encoding('latin1'); +my $utf8 = find_encoding('utf8'); + +my $src = $uo; +my $dst = $ascii->encode($src, FB_DEFAULT); +is($dst, $af, "FB_DEFAULT ascii"); +is($src, $uo, "FB_DEFAULT residue ascii"); + +$src = $ao; +$dst = $utf8->decode($src, FB_DEFAULT); +is($dst, $uf, "FB_DEFAULT utf8"); +is($src, $ao, "FB_DEFAULT residue utf8"); + +$src = $uo; +eval{ $dst = $ascii->encode($src, FB_CROAK) }; +like($@, qr/does not map to ascii/o, "FB_CROAK ascii"); +is($src, $uo, "FB_CROAK residue ascii"); + +$src = $ao; +eval{ $dst = $utf8->decode($src, FB_CROAK) }; +like($@, qr/does not map to Unicode/o, "FB_CROAK utf8"); +is($src, $ao, "FB_CROAK residue utf8"); + +$src = $nf; +eval{ $dst = $ascii->encode($src, FB_CROAK) }; +is($@, '', "FB_CROAK on success ascii"); +is($src, '', "FB_CROAK on success residue ascii"); + +$src = $nf; +eval{ $dst = $utf8->decode($src, FB_CROAK) }; +is($@, '', "FB_CROAK on success utf8"); +is($src, '', "FB_CROAK on success residue utf8"); + +$src = $uo; +$dst = $ascii->encode($src, FB_QUIET); +is($dst, $aq, "FB_QUIET ascii"); +is($src, $residue, "FB_QUIET residue ascii"); + +$src = $ao; +$dst = $utf8->decode($src, FB_QUIET); +is($dst, $uq, "FB_QUIET utf8"); +is($src, $residue, "FB_QUIET residue utf8"); + +{ + my $message = ''; + local $SIG{__WARN__} = sub { $message = $_[0] }; + + $src = $uo; + $dst = $ascii->encode($src, FB_WARN); + is($dst, $aq, "FB_WARN ascii"); + is($src, $residue, "FB_WARN residue ascii"); + like($message, qr/does not map to ascii/o, "FB_WARN message ascii"); + + $message = ''; + $src = $ao; + $dst = $utf8->decode($src, FB_WARN); + is($dst, $uq, "FB_WARN utf8"); + is($src, $residue, "FB_WARN residue utf8"); + like($message, qr/does not map to Unicode/o, "FB_WARN message utf8"); + + $message = ''; + $src = $uo; + $dst = $ascii->encode($src, WARN_ON_ERR); + is($dst, $af, "WARN_ON_ERR ascii"); + is($src, '', "WARN_ON_ERR residue ascii"); + like($message, qr/does not map to ascii/o, "WARN_ON_ERR message ascii"); + + $message = ''; + $src = $ao; + $dst = $utf8->decode($src, WARN_ON_ERR); + is($dst, $uf, "WARN_ON_ERR utf8"); + is($src, '', "WARN_ON_ERR residue utf8"); + like($message, qr/does not map to Unicode/o, "WARN_ON_ERR message ascii"); +} + +$src = $uo; +$dst = $ascii->encode($src, FB_PERLQQ); +is($dst, $ap, "FB_PERLQQ encode"); +is($src, $uo, "FB_PERLQQ residue encode"); + +$src = $ao; +$dst = $ascii->decode($src, FB_PERLQQ); +is($dst, $up, "FB_PERLQQ decode"); +is($src, $ao, "FB_PERLQQ residue decode"); + +$src = $uo; +$dst = $ascii->encode($src, FB_HTMLCREF); +is($dst, $ah, "FB_HTMLCREF encode"); +is($src, $uo, "FB_HTMLCREF residue encode"); + +$src = $ao; +$dst = $ascii->decode($src, FB_HTMLCREF); +is($dst, $uh, "FB_HTMLCREF decode"); +is($src, $ao, "FB_HTMLCREF residue decode"); + +$src = $uo; +$dst = $ascii->encode($src, FB_XMLCREF); +is($dst, $ax, "FB_XMLCREF encode"); +is($src, $uo, "FB_XMLCREF residue encode"); + +$src = $ao; +$dst = $ascii->decode($src, FB_XMLCREF); +is($dst, $ux, "FB_XMLCREF decode"); +is($src, $ao, "FB_XMLCREF residue decode"); + +$src = $uo; +$dst = $ascii->encode($src, sub{ sprintf "", shift }); +is($dst, $ac, "coderef encode"); +is($src, $uo, "coderef residue encode"); + +$src = $ao; +$dst = $ascii->decode($src, sub{ sprintf "[%02X]", shift }); +is($dst, $uc, "coderef decode"); +is($src, $ao, "coderef residue decode"); + +$src = "\x{3000}"; +$dst = $ascii->encode($src, sub{ $_[0] }); +is $dst, 0x3000."", q{$ascii->encode($src, sub{ $_[0] } )}; +$dst = encode("ascii", "\x{3000}", sub{ $_[0] }); +is $dst, 0x3000."", q{encode("ascii", "\x{3000}", sub{ $_[0] })}; + +$src = pack "C*", 0xFF; +$dst = $ascii->decode($src, sub{ $_[0] }); +is $dst, 0xFF."", q{$ascii->encode($src, sub{ $_[0] } )}; +$dst = decode("ascii", (pack "C*", 0xFF), sub{ $_[0] }); +is $dst, 0xFF."", q{decode("ascii", (pack "C*", 0xFF), sub{ $_[0] })}; + + +$src = pack "C*", 0x80; +$dst = $utf8->decode($src, sub{ $_[0] }); +is $dst, 0x80."", q{$utf8->encode($src, sub{ $_[0] } )}; +$dst = decode("utf8", $src, sub{ $_[0] }); +is $dst, 0x80."", q{decode("utf8", (pack "C*", 0x80), sub{ $_[0] })}; + +$src = "\x{3000}"; +$dst = $latin1->encode($src, sub { "\N{U+FF}" }); +is $dst, "\x{ff}", q{$latin1->encode($src, sub { "\N{U+FF}" })}; +$dst = encode("latin1", $src, sub { "\N{U+FF}" }); +is $dst, "\x{ff}", q{encode("latin1", $src, sub { "\N{U+FF}" })}; + +$src = "\x{3000}"; +$dst = $latin1->encode($src, sub { utf8::upgrade(my $r = "\x{ff}"); $r }); +is $dst, "\x{ff}", q{$latin1->encode($src, sub { utf8::upgrade(my $r = "\x{ff}"); $r })}; +$dst = encode("latin1", $src, sub { utf8::upgrade(my $r = "\x{ff}"); $r }); +is $dst, "\x{ff}", q{encode("latin1", $src, sub { utf8::upgrade(my $r = "\x{ff}"); $r })}; + +$src = "\x{ff}"; +$dst = $utf8->decode($src, sub { chr($_[0]) }); +is $dst, "\x{ff}", q{$utf8->decode($src, sub { chr($_[0]) })}; +$dst = decode("utf8", $src, sub { chr($_[0]) }); +is $dst, "\x{ff}", q{decode("utf8", $src, sub { chr($_[0]) })}; + +{ + use charnames ':full'; + $src = "\x{ff}"; + $dst = $utf8->decode($src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r }); + is $dst, "\N{LATIN SMALL LETTER Y WITH DIAERESIS}", q{$utf8->decode($src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r })}; + $dst = decode("utf8", $src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r }); + is $dst, "\N{LATIN SMALL LETTER Y WITH DIAERESIS}", q{decode("utf8", $src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r })}; +} diff --git a/t/from_to.t b/t/from_to.t new file mode 100644 index 0000000..dd1d9ec --- /dev/null +++ b/t/from_to.t @@ -0,0 +1,12 @@ +# $Id: from_to.t,v 1.1 2006/01/15 15:06:36 dankogai Exp $ +use strict; +use Test::More tests => 3; +use Encode qw(encode from_to); + +my $foo = encode("utf-8", "\x{5abe}"); +from_to($foo, "utf-8" => "latin1", Encode::FB_HTMLCREF); +ok !Encode::is_utf8($foo); +is $foo, '媾'; + +my $bar = encode("latin-1", "\x{5abe}", Encode::FB_HTMLCREF); +is $bar, '媾'; diff --git a/t/gb2312.enc b/t/gb2312.enc new file mode 100644 index 0000000..141476f --- /dev/null +++ b/t/gb2312.enc @@ -0,0 +1,242 @@ +0x2120: �������������������������������������������������������������� +0x2140: �����¡áġšơǡȡɡʡˡ̡͡ΡϡСѡҡӡԡա֡סء١ڡۡܡݡޡ� +0x2160: ������������������������������������������� +0x2220: ������������������������������ +0x2240: �����¢âĢŢƢǢȢɢʢˢ̢͢΢ϢТѢҢӢԢբ֢עآ٢ڢۢܢݢޢ� +0x2260: ���� ������������ ��������������������� +0x2320: �������������������������������������������������������������� +0x2340: �����£ãģţƣǣȣɣʣˣ̣ͣΣϣУѣңӣԣգ֣ףأ٣ڣۣܣݣޣ� +0x2360: ������������������������������������������� +0x2420: �������������������������������������������������������������� +0x2440: �����¤äĤŤƤǤȤɤʤˤ̤ͤΤϤФѤҤӤԤդ֤פؤ٤ڤۤܤݤޤ� +0x2460: ���������������������� +0x2520: �������������������������������������������������������������� +0x2540: �����¥åĥťƥǥȥɥʥ˥̥ͥΥϥХѥҥӥԥե֥ץإ٥ڥۥܥݥޥ� +0x2560: ��������������������������� +0x2620: ������������������������������������������������ +0x2640: ���¦æĦŦƦǦȦɦʦ˦̦ͦΦϦЦѦҦӦԦզ֦צ� +0x2720: �������������������������������������������������������������� +0x2740: ���� �ѧҧӧԧէ֧קا٧ڧۧܧݧާ� +0x2760: �������������������� +0x2820: ���������������������������������������������������� +0x2840: �ŨƨǨȨɨʨ˨̨ͨΨϨШѨҨӨԨը֨רب٨ڨۨܨݨި� +0x2860: ����������� +0x2920: �������������������������������������������������������� +0x2940: �����©éĩũƩǩȩɩʩ˩̩ͩΩϩЩѩҩөԩթ֩שة٩ک۩ܩݩީ� +0x2960: ������������������ +0x3020: �������������������������������������������������������������� +0x3040: �����°ðİŰưǰȰɰʰ˰̰ͰΰϰаѰҰӰ԰հְװذٰڰ۰ܰݰް� +0x3060: ������������������������������������������� +0x3120: �������������������������������������������������������������� +0x3140: �����±ñıűƱDZȱɱʱ˱̱ͱαϱбѱұӱԱձֱױرٱڱ۱ܱݱޱ� +0x3160: ������������������������������������������� +0x3220: �������������������������������������������������������������� +0x3240: �����²òIJŲƲDzȲɲʲ˲̲ͲβϲвѲҲӲԲղֲײزٲڲ۲ܲݲ޲� +0x3260: ������������������������������������������� +0x3320: �������������������������������������������������������������� +0x3340: �����³óijųƳdzȳɳʳ˳̳ͳγϳгѳҳӳԳճֳ׳سٳڳ۳ܳݳ޳� +0x3360: ������������������������������������������� +0x3420: �������������������������������������������������������������� +0x3440: �����´ôĴŴƴǴȴɴʴ˴̴ʹδϴдѴҴӴԴմִ״شٴڴ۴ܴݴ޴� +0x3460: ������������������������������������������� +0x3520: �������������������������������������������������������������� +0x3540: �����µõĵŵƵǵȵɵʵ˵̵͵εϵеѵҵӵԵյֵ׵صٵڵ۵ܵݵ޵� +0x3560: ������������������������������������������� +0x3620: �������������������������������������������������������������� +0x3640: �����¶öĶŶƶǶȶɶʶ˶̶Ͷζ϶жѶҶӶԶնֶ׶ضٶڶ۶ܶݶ޶� +0x3660: ������������������������������������������� +0x3720: �������������������������������������������������������������� +0x3740: �����·÷ķŷƷǷȷɷʷ˷̷ͷηϷзѷҷӷԷշַ׷طٷڷ۷ܷݷ޷� +0x3760: ������������������������������������������� +0x3820: �������������������������������������������������������������� +0x3840: �����¸øĸŸƸǸȸɸʸ˸̸͸θϸиѸҸӸԸոָ׸ظٸڸ۸ܸݸ޸� +0x3860: ������������������������������������������� +0x3920: �������������������������������������������������������������� +0x3940: �����¹ùĹŹƹǹȹɹʹ˹̹͹ιϹйѹҹӹԹչֹ׹عٹڹ۹ܹݹ޹� +0x3960: ������������������������������������������� +0x3a20: �������������������������������������������������������������� +0x3a40: �����ºúĺźƺǺȺɺʺ˺̺ͺκϺкѺҺӺԺպֺ׺غٺںۺܺݺ޺� +0x3a60: ������������������������������������������� +0x3b20: �������������������������������������������������������������� +0x3b40: �����»ûĻŻƻǻȻɻʻ˻̻ͻλϻлѻһӻԻջֻ׻ػٻڻۻܻݻ޻� +0x3b60: ������������������������������������������� +0x3c20: �������������������������������������������������������������� +0x3c40: �����¼üļżƼǼȼɼʼ˼̼ͼμϼмѼҼӼԼռּ׼ؼټڼۼܼݼ޼� +0x3c60: ������������������������������������������� +0x3d20: �������������������������������������������������������������� +0x3d40: �����½ýĽŽƽǽȽɽʽ˽̽ͽνϽнѽҽӽԽսֽ׽ؽٽڽ۽ܽݽ޽� +0x3d60: ������������������������������������������� +0x3e20: �������������������������������������������������������������� +0x3e40: �����¾þľžƾǾȾɾʾ˾̾;ξϾоѾҾӾԾվ־׾ؾپھ۾ܾݾ޾� +0x3e60: ������������������������������������������� +0x3f20: �������������������������������������������������������������� +0x3f40: �����¿ÿĿſƿǿȿɿʿ˿̿ͿοϿпѿҿӿԿտֿ׿ؿٿڿۿܿݿ޿� +0x3f60: ������������������������������������������� +0x4020: �������������������������������������������������������������� +0x4040: ���������������������������������������������������������������� +0x4060: �������������������������������������������������������������� +0x4120: �������������������������������������������������������������� +0x4140: ���������������������������������������������������������������� +0x4160: �������������������������������������������������������������� +0x4220: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ +0x4240: ���������������������������������������������������������������� +0x4260: �������������������������������������������������������������� +0x4320: áâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ +0x4340: ���������������������������������������������������������������� +0x4360: �������������������������������������������������������������� +0x4420: ġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿ +0x4440: ���������������������������������������������������������������� +0x4460: �������������������������������������������������������������� +0x4520: šŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ +0x4540: ���������������������������������������������������������������� +0x4560: �������������������������������������������������������������� +0x4620: ơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿ +0x4640: ���������������������������������������������������������������� +0x4660: �������������������������������������������������������������� +0x4720: ǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ +0x4740: ���������������������������������������������������������������� +0x4760: �������������������������������������������������������������� +0x4820: ȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ +0x4840: ���������������������������������������������������������������� +0x4860: �������������������������������������������������������������� +0x4920: ɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿ +0x4940: ���������������������������������������������������������������� +0x4960: �������������������������������������������������������������� +0x4a20: ʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯʰʱʲʳʴʵʶʷʸʹʺʻʼʽʾʿ +0x4a40: ���������������������������������������������������������������� +0x4a60: �������������������������������������������������������������� +0x4b20: ˡˢˣˤ˥˦˧˨˩˪˫ˬ˭ˮ˯˰˱˲˳˴˵˶˷˸˹˺˻˼˽˾˿ +0x4b40: ���������������������������������������������������������������� +0x4b60: �������������������������������������������������������������� +0x4c20: ̴̵̶̷̸̡̢̧̨̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼̽̾̿ +0x4c40: ���������������������������������������������������������������� +0x4c60: �������������������������������������������������������������� +0x4d20: ͣͤͥͦͧͨͩͪͫͬͭͮͯ͢͡ͰͱͲͳʹ͵Ͷͷ͸͹ͺͻͼͽ;Ϳ +0x4d40: ���������������������������������������������������������������� +0x4d60: �������������������������������������������������������������� +0x4e20: Ρ΢ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξο +0x4e40: ���������������������������������������������������������������� +0x4e60: �������������������������������������������������������������� +0x4f20: ϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿ +0x4f40: ���������������������������������������������������������������� +0x4f60: �������������������������������������������������������������� +0x5020: СТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп +0x5040: ���������������������������������������������������������������� +0x5060: �������������������������������������������������������������� +0x5120: ѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿ +0x5140: ���������������������������������������������������������������� +0x5160: �������������������������������������������������������������� +0x5220: ҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿ +0x5240: ���������������������������������������������������������������� +0x5260: �������������������������������������������������������������� +0x5320: ӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿ +0x5340: ���������������������������������������������������������������� +0x5360: �������������������������������������������������������������� +0x5420: ԡԢԣԤԥԦԧԨԩԪԫԬԭԮԯ԰ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿ +0x5440: ���������������������������������������������������������������� +0x5460: �������������������������������������������������������������� +0x5520: աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտ +0x5540: ���������������������������������������������������������������� +0x5560: �������������������������������������������������������������� +0x5620: ְֱֲֳִֵֶַָֹֺֻּֽ֢֣֤֥֦֧֪֭֮֡֨֩֫֬֯־ֿ +0x5640: ���������������������������������������������������������������� +0x5660: �������������������������������������������������������������� +0x5720: סעףפץצקרשת׫׬׭׮ׯװױײ׳״׵׶׷׸׹׺׻׼׽׾׿ +0x5740: ���������������������������������������������������������������� +0x5760: ���������������������������������������������������� +0x5820: ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿ +0x5840: ���������������������������������������������������������������� +0x5860: �������������������������������������������������������������� +0x5920: ١٢٣٤٥٦٧٨٩٪٫٬٭ٮٯٰٱٲٳٴٵٶٷٸٹٺٻټٽپٿ +0x5940: ���������������������������������������������������������������� +0x5960: �������������������������������������������������������������� +0x5a20: ڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿ +0x5a40: ���������������������������������������������������������������� +0x5a60: �������������������������������������������������������������� +0x5b20: ۣۡۢۤۥۦۧۨ۩۪ۭ۫۬ۮۯ۰۱۲۳۴۵۶۷۸۹ۺۻۼ۽۾ۿ +0x5b40: ���������������������������������������������������������������� +0x5b60: �������������������������������������������������������������� +0x5c20: ܡܢܣܤܥܦܧܨܩܪܫܬܭܮܯܱܴܷܸܹܻܼܾܰܲܳܵܶܺܽܿ +0x5c40: ���������������������������������������������������������������� +0x5c60: �������������������������������������������������������������� +0x5d20: ݡݢݣݤݥݦݧݨݩݪݫݬݭݮݯݰݱݲݳݴݵݶݷݸݹݺݻݼݽݾݿ +0x5d40: ���������������������������������������������������������������� +0x5d60: �������������������������������������������������������������� +0x5e20: ޡޢޣޤޥަާިީުޫެޭޮޯްޱ޲޳޴޵޶޷޸޹޺޻޼޽޾޿ +0x5e40: ���������������������������������������������������������������� +0x5e60: �������������������������������������������������������������� +0x5f20: ߡߢߣߤߥߦߧߨߩߪ߲߫߬߭߮߯߰߱߳ߴߵ߶߷߸߹ߺ߻߼߽߾߿ +0x5f40: ���������������������������������������������������������������� +0x5f60: �������������������������������������������������������������� +0x6020: ������������������������������� +0x6040: ���������������������������������������������������������������� +0x6060: �������������������������������������������������������������� +0x6120: ������������������������������� +0x6140: ���������������������������������������������������������������� +0x6160: �������������������������������������������������������������� +0x6220: ������������������������������� +0x6240: ���������������������������������������������������������������� +0x6260: �������������������������������������������������������������� +0x6320: ������������������������������� +0x6340: ���������������������������������������������������������������� +0x6360: �������������������������������������������������������������� +0x6420: ������������������������������� +0x6440: ���������������������������������������������������������������� +0x6460: �������������������������������������������������������������� +0x6520: ������������������������������� +0x6540: ���������������������������������������������������������������� +0x6560: �������������������������������������������������������������� +0x6620: ������������������������������� +0x6640: ���������������������������������������������������������������� +0x6660: �������������������������������������������������������������� +0x6720: ������������������������������� +0x6740: ���������������������������������������������������������������� +0x6760: �������������������������������������������������������������� +0x6820: ������������������������������� +0x6840: ���������������������������������������������������������������� +0x6860: �������������������������������������������������������������� +0x6920: ������������������������������� +0x6940: ���������������������������������������������������������������� +0x6960: �������������������������������������������������������������� +0x6a20: ������������������������������� +0x6a40: ���������������������������������������������������������������� +0x6a60: �������������������������������������������������������������� +0x6b20: ������������������������������� +0x6b40: ���������������������������������������������������������������� +0x6b60: �������������������������������������������������������������� +0x6c20: ������������������������������� +0x6c40: ���������������������������������������������������������������� +0x6c60: �������������������������������������������������������������� +0x6d20: �������������������������������������������������������������� +0x6d40: ���������������������������������������������������������������� +0x6d60: �������������������������������������������������������������� +0x6e20: ������������������������������� +0x6e40: ���������������������������������������������������������������� +0x6e60: �������������������������������������������������������������� +0x6f20: ������������������������������� +0x6f40: ���������������������������������������������������������������� +0x6f60: �������������������������������������������������������������� +0x7020: ������������������������������� +0x7040: ���������������������������������������������������������������� +0x7060: �������������������������������������������������������������� +0x7120: ������������������������������� +0x7140: ���������������������������������������������������������������� +0x7160: �������������������������������������������������������������� +0x7220: ������������������������������� +0x7240: ���������������������������������������������������������������� +0x7260: �������������������������������������������������������������� +0x7320: ������������������������������� +0x7340: ���������������������������������������������������������������� +0x7360: �������������������������������������������������������������� +0x7420: �������������������������������������������������������������� +0x7440: ���������������������������������������������������������������� +0x7460: �������������������������������������������������������������� +0x7520: �������������������������������������������������������������� +0x7540: ���������������������������������������������������������������� +0x7560: �������������������������������������������������������������� +0x7620: �������������������������������������������������������������� +0x7640: ���������������������������������������������������������������� +0x7660: �������������������������������������������������������������� +0x7720: �������������������������������������������������������������� +0x7740: ���������������������������������������������������������������� +0x7760: �������������������������������������������������������������� diff --git a/t/gb2312.utf b/t/gb2312.utf new file mode 100644 index 0000000..8c50b31 --- /dev/null +++ b/t/gb2312.utf @@ -0,0 +1,242 @@ +0x2120:  、。・ˉˇ¨〃々―~‖…‘’“”〔〕〈〉《》「」『』〖〗【】 +0x2140: ±×÷∶∧∨∑∏∪∩∈∷√⊥∥∠⌒⊙∫∮≡≌≈∽∝≠≮≯≤≥∞∵ +0x2160: ∴♂♀°′″℃$¤¢£‰§№☆★○●◎◇◆□■△▲※→←↑↓〓 +0x2220: ⒈⒉⒊⒋⒌⒍⒎⒏⒐⒑⒒⒓⒔⒕⒖ +0x2240: ⒗⒘⒙⒚⒛⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇①②③④⑤⑥⑦ +0x2260: ⑧⑨⑩ ㈠㈡㈢㈣㈤㈥㈦㈧㈨㈩ ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ +0x2320: !"#¥%&'()*+,-./0123456789:;<=>? +0x2340: @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +0x2360: `abcdefghijklmnopqrstuvwxyz{|} ̄ +0x2420: ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞた +0x2440: だちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみ +0x2460: むめもゃやゅゆょよらりるれろゎわゐゑをん +0x2520: ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタ +0x2540: ダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミ +0x2560: ムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ +0x2620: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +0x2640: αβγδεζηθικλμνξοπρστυφχψω +0x2720: АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭ +0x2740: ЮЯ абвгдеёжзийклмн +0x2760: опрстуфхцчшщъыьэюя +0x2820: āáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜüê +0x2840: ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟ +0x2860: ㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ +0x2920: ─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛ +0x2940: ├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻ +0x2960: ┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋ +0x3020: 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱 +0x3040: 袄傲奥懊澳芭捌扒叭吧笆八疤巴拔跋靶把耙坝霸罢爸白柏百摆佰败拜稗斑 +0x3060: 班搬扳般颁板版扮拌伴瓣半办绊邦帮梆榜膀绑棒磅蚌镑傍谤苞胞包褒剥 +0x3120: 薄雹保堡饱宝抱报暴豹鲍爆杯碑悲卑北辈背贝钡倍狈备惫焙被奔苯本笨 +0x3140: 崩绷甭泵蹦迸逼鼻比鄙笔彼碧蓖蔽毕毙毖币庇痹闭敝弊必辟壁臂避陛鞭边 +0x3160: 编贬扁便变卞辨辩辫遍标彪膘表鳖憋别瘪彬斌濒滨宾摈兵冰柄丙秉饼炳 +0x3220: 病并玻菠播拨钵波博勃搏铂箔伯帛舶脖膊渤泊驳捕卜哺补埠不布步簿部 +0x3240: 怖擦猜裁材才财睬踩采彩菜蔡餐参蚕残惭惨灿苍舱仓沧藏操糙槽曹草厕策 +0x3260: 侧册测层蹭插叉茬茶查碴搽察岔差诧拆柴豺搀掺蝉馋谗缠铲产阐颤昌猖 +0x3320: 场尝常长偿肠厂敞畅唱倡超抄钞朝嘲潮巢吵炒车扯撤掣彻澈郴臣辰尘晨 +0x3340: 忱沉陈趁衬撑称城橙成呈乘程惩澄诚承逞骋秤吃痴持匙池迟弛驰耻齿侈尺 +0x3360: 赤翅斥炽充冲虫崇宠抽酬畴踌稠愁筹仇绸瞅丑臭初出橱厨躇锄雏滁除楚 +0x3420: 础储矗搐触处揣川穿椽传船喘串疮窗幢床闯创吹炊捶锤垂春椿醇唇淳纯 +0x3440: 蠢戳绰疵茨磁雌辞慈瓷词此刺赐次聪葱囱匆从丛凑粗醋簇促蹿篡窜摧崔催 +0x3460: 脆瘁粹淬翠村存寸磋撮搓措挫错搭达答瘩打大呆歹傣戴带殆代贷袋待逮 +0x3520: 怠耽担丹单郸掸胆旦氮但惮淡诞弹蛋当挡党荡档刀捣蹈倒岛祷导到稻悼 +0x3540: 道盗德得的蹬灯登等瞪凳邓堤低滴迪敌笛狄涤翟嫡抵底地蒂第帝弟递缔颠 +0x3560: 掂滇碘点典靛垫电佃甸店惦奠淀殿碉叼雕凋刁掉吊钓调跌爹碟蝶迭谍叠 +0x3620: 丁盯叮钉顶鼎锭定订丢东冬董懂动栋侗恫冻洞兜抖斗陡豆逗痘都督毒犊 +0x3640: 独读堵睹赌杜镀肚度渡妒端短锻段断缎堆兑队对墩吨蹲敦顿囤钝盾遁掇哆 +0x3660: 多夺垛躲朵跺舵剁惰堕蛾峨鹅俄额讹娥恶厄扼遏鄂饿恩而儿耳尔饵洱二 +0x3720: 贰发罚筏伐乏阀法珐藩帆番翻樊矾钒繁凡烦反返范贩犯饭泛坊芳方肪房 +0x3740: 防妨仿访纺放菲非啡飞肥匪诽吠肺废沸费芬酚吩氛分纷坟焚汾粉奋份忿愤 +0x3760: 粪丰封枫蜂峰锋风疯烽逢冯缝讽奉凤佛否夫敷肤孵扶拂辐幅氟符伏俘服 +0x3820: 浮涪福袱弗甫抚辅俯釜斧脯腑府腐赴副覆赋复傅付阜父腹负富讣附妇缚 +0x3840: 咐噶嘎该改概钙盖溉干甘杆柑竿肝赶感秆敢赣冈刚钢缸肛纲岗港杠篙皋高 +0x3860: 膏羔糕搞镐稿告哥歌搁戈鸽胳疙割革葛格蛤阁隔铬个各给根跟耕更庚羹 +0x3920: 埂耿梗工攻功恭龚供躬公宫弓巩汞拱贡共钩勾沟苟狗垢构购够辜菇咕箍 +0x3940: 估沽孤姑鼓古蛊骨谷股故顾固雇刮瓜剐寡挂褂乖拐怪棺关官冠观管馆罐惯 +0x3960: 灌贯光广逛瑰规圭硅归龟闺轨鬼诡癸桂柜跪贵刽辊滚棍锅郭国果裹过哈 +0x3a20: 骸孩海氦亥害骇酣憨邯韩含涵寒函喊罕翰撼捍旱憾悍焊汗汉夯杭航壕嚎 +0x3a40: 豪毫郝好耗号浩呵喝荷菏核禾和何合盒貉阂河涸赫褐鹤贺嘿黑痕很狠恨哼 +0x3a60: 亨横衡恒轰哄烘虹鸿洪宏弘红喉侯猴吼厚候后呼乎忽瑚壶葫胡蝴狐糊湖 +0x3b20: 弧虎唬护互沪户花哗华猾滑画划化话槐徊怀淮坏欢环桓还缓换患唤痪豢 +0x3b40: 焕涣宦幻荒慌黄磺蝗簧皇凰惶煌晃幌恍谎灰挥辉徽恢蛔回毁悔慧卉惠晦贿 +0x3b60: 秽会烩汇讳诲绘荤昏婚魂浑混豁活伙火获或惑霍货祸击圾基机畸稽积箕 +0x3c20: 肌饥迹激讥鸡姬绩缉吉极棘辑籍集及急疾汲即嫉级挤几脊己蓟技冀季伎 +0x3c40: 祭剂悸济寄寂计记既忌际妓继纪嘉枷夹佳家加荚颊贾甲钾假稼价架驾嫁歼 +0x3c60: 监坚尖笺间煎兼肩艰奸缄茧检柬碱硷拣捡简俭剪减荐槛鉴践贱见键箭件 +0x3d20: 健舰剑饯渐溅涧建僵姜将浆江疆蒋桨奖讲匠酱降蕉椒礁焦胶交郊浇骄娇 +0x3d40: 嚼搅铰矫侥脚狡角饺缴绞剿教酵轿较叫窖揭接皆秸街阶截劫节桔杰捷睫竭 +0x3d60: 洁结解姐戒藉芥界借介疥诫届巾筋斤金今津襟紧锦仅谨进靳晋禁近烬浸 +0x3e20: 尽劲荆兢茎睛晶鲸京惊精粳经井警景颈静境敬镜径痉靖竟竞净炯窘揪究 +0x3e40: 纠玖韭久灸九酒厩救旧臼舅咎就疚鞠拘狙疽居驹菊局咀矩举沮聚拒据巨具 +0x3e60: 距踞锯俱句惧炬剧捐鹃娟倦眷卷绢撅攫抉掘倔爵觉决诀绝均菌钧军君峻 +0x3f20: 俊竣浚郡骏喀咖卡咯开揩楷凯慨刊堪勘坎砍看康慷糠扛抗亢炕考拷烤靠 +0x3f40: 坷苛柯棵磕颗科壳咳可渴克刻客课肯啃垦恳坑吭空恐孔控抠口扣寇枯哭窟 +0x3f60: 苦酷库裤夸垮挎跨胯块筷侩快宽款匡筐狂框矿眶旷况亏盔岿窥葵奎魁傀 +0x4020: 馈愧溃坤昆捆困括扩廓阔垃拉喇蜡腊辣啦莱来赖蓝婪栏拦篮阑兰澜谰揽 +0x4040: 览懒缆烂滥琅榔狼廊郎朗浪捞劳牢老佬姥酪烙涝勒乐雷镭蕾磊累儡垒擂肋 +0x4060: 类泪棱楞冷厘梨犁黎篱狸离漓理李里鲤礼莉荔吏栗丽厉励砾历利傈例俐 +0x4120: 痢立粒沥隶力璃哩俩联莲连镰廉怜涟帘敛脸链恋炼练粮凉梁粱良两辆量 +0x4140: 晾亮谅撩聊僚疗燎寥辽潦了撂镣廖料列裂烈劣猎琳林磷霖临邻鳞淋凛赁吝 +0x4160: 拎玲菱零龄铃伶羚凌灵陵岭领另令溜琉榴硫馏留刘瘤流柳六龙聋咙笼窿 +0x4220: 隆垄拢陇楼娄搂篓漏陋芦卢颅庐炉掳卤虏鲁麓碌露路赂鹿潞禄录陆戮驴 +0x4240: 吕铝侣旅履屡缕虑氯律率滤绿峦挛孪滦卵乱掠略抡轮伦仑沦纶论萝螺罗逻 +0x4260: 锣箩骡裸落洛骆络妈麻玛码蚂马骂嘛吗埋买麦卖迈脉瞒馒蛮满蔓曼慢漫 +0x4320: 谩芒茫盲氓忙莽猫茅锚毛矛铆卯茂冒帽貌贸么玫枚梅酶霉煤没眉媒镁每 +0x4340: 美昧寐妹媚门闷们萌蒙檬盟锰猛梦孟眯醚靡糜迷谜弥米秘觅泌蜜密幂棉眠 +0x4360: 绵冕免勉娩缅面苗描瞄藐秒渺庙妙蔑灭民抿皿敏悯闽明螟鸣铭名命谬摸 +0x4420: 摹蘑模膜磨摩魔抹末莫墨默沫漠寞陌谋牟某拇牡亩姆母墓暮幕募慕木目 +0x4440: 睦牧穆拿哪呐钠那娜纳氖乃奶耐奈南男难囊挠脑恼闹淖呢馁内嫩能妮霓倪 +0x4460: 泥尼拟你匿腻逆溺蔫拈年碾撵捻念娘酿鸟尿捏聂孽啮镊镍涅您柠狞凝宁 +0x4520: 拧泞牛扭钮纽脓浓农弄奴努怒女暖虐疟挪懦糯诺哦欧鸥殴藕呕偶沤啪趴 +0x4540: 爬帕怕琶拍排牌徘湃派攀潘盘磐盼畔判叛乓庞旁耪胖抛咆刨炮袍跑泡呸胚 +0x4560: 培裴赔陪配佩沛喷盆砰抨烹澎彭蓬棚硼篷膨朋鹏捧碰坯砒霹批披劈琵毗 +0x4620: 啤脾疲皮匹痞僻屁譬篇偏片骗飘漂瓢票撇瞥拼频贫品聘乒坪苹萍平凭瓶 +0x4640: 评屏坡泼颇婆破魄迫粕剖扑铺仆莆葡菩蒲埔朴圃普浦谱曝瀑期欺栖戚妻七 +0x4660: 凄漆柒沏其棋奇歧畦崎脐齐旗祈祁骑起岂乞企启契砌器气迄弃汽泣讫掐 +0x4720: 恰洽牵扦钎铅千迁签仟谦乾黔钱钳前潜遣浅谴堑嵌欠歉枪呛腔羌墙蔷强 +0x4740: 抢橇锹敲悄桥瞧乔侨巧鞘撬翘峭俏窍切茄且怯窃钦侵亲秦琴勤芹擒禽寝沁 +0x4760: 青轻氢倾卿清擎晴氰情顷请庆琼穷秋丘邱球求囚酋泅趋区蛆曲躯屈驱渠 +0x4820: 取娶龋趣去圈颧权醛泉全痊拳犬券劝缺炔瘸却鹊榷确雀裙群然燃冉染瓤 +0x4840: 壤攘嚷让饶扰绕惹热壬仁人忍韧任认刃妊纫扔仍日戎茸蓉荣融熔溶容绒冗 +0x4860: 揉柔肉茹蠕儒孺如辱乳汝入褥软阮蕊瑞锐闰润若弱撒洒萨腮鳃塞赛三叁 +0x4920: 伞散桑嗓丧搔骚扫嫂瑟色涩森僧莎砂杀刹沙纱傻啥煞筛晒珊苫杉山删煽 +0x4940: 衫闪陕擅赡膳善汕扇缮墒伤商赏晌上尚裳梢捎稍烧芍勺韶少哨邵绍奢赊蛇 +0x4960: 舌舍赦摄射慑涉社设砷申呻伸身深娠绅神沈审婶甚肾慎渗声生甥牲升绳 +0x4a20: 省盛剩胜圣师失狮施湿诗尸虱十石拾时什食蚀实识史矢使屎驶始式示士 +0x4a40: 世柿事拭誓逝势是嗜噬适仕侍释饰氏市恃室视试收手首守寿授售受瘦兽蔬 +0x4a60: 枢梳殊抒输叔舒淑疏书赎孰熟薯暑曙署蜀黍鼠属术述树束戍竖墅庶数漱 +0x4b20: 恕刷耍摔衰甩帅栓拴霜双爽谁水睡税吮瞬顺舜说硕朔烁斯撕嘶思私司丝 +0x4b40: 死肆寺嗣四伺似饲巳松耸怂颂送宋讼诵搜艘擞嗽苏酥俗素速粟僳塑溯宿诉 +0x4b60: 肃酸蒜算虽隋随绥髓碎岁穗遂隧祟孙损笋蓑梭唆缩琐索锁所塌他它她塔 +0x4c20: 獭挞蹋踏胎苔抬台泰酞太态汰坍摊贪瘫滩坛檀痰潭谭谈坦毯袒碳探叹炭 +0x4c40: 汤塘搪堂棠膛唐糖倘躺淌趟烫掏涛滔绦萄桃逃淘陶讨套特藤腾疼誊梯剔踢 +0x4c60: 锑提题蹄啼体替嚏惕涕剃屉天添填田甜恬舔腆挑条迢眺跳贴铁帖厅听烃 +0x4d20: 汀廷停亭庭挺艇通桐酮瞳同铜彤童桶捅筒统痛偷投头透凸秃突图徒途涂 +0x4d40: 屠土吐兔湍团推颓腿蜕褪退吞屯臀拖托脱鸵陀驮驼椭妥拓唾挖哇蛙洼娃瓦 +0x4d60: 袜歪外豌弯湾玩顽丸烷完碗挽晚皖惋宛婉万腕汪王亡枉网往旺望忘妄威 +0x4e20: 巍微危韦违桅围唯惟为潍维苇萎委伟伪尾纬未蔚味畏胃喂魏位渭谓尉慰 +0x4e40: 卫瘟温蚊文闻纹吻稳紊问嗡翁瓮挝蜗涡窝我斡卧握沃巫呜钨乌污诬屋无芜 +0x4e60: 梧吾吴毋武五捂午舞伍侮坞戊雾晤物勿务悟误昔熙析西硒矽晰嘻吸锡牺 +0x4f20: 稀息希悉膝夕惜熄烯溪汐犀檄袭席习媳喜铣洗系隙戏细瞎虾匣霞辖暇峡 +0x4f40: 侠狭下厦夏吓掀锨先仙鲜纤咸贤衔舷闲涎弦嫌显险现献县腺馅羡宪陷限线 +0x4f60: 相厢镶香箱襄湘乡翔祥详想响享项巷橡像向象萧硝霄削哮嚣销消宵淆晓 +0x5020: 小孝校肖啸笑效楔些歇蝎鞋协挟携邪斜胁谐写械卸蟹懈泄泻谢屑薪芯锌 +0x5040: 欣辛新忻心信衅星腥猩惺兴刑型形邢行醒幸杏性姓兄凶胸匈汹雄熊休修羞 +0x5060: 朽嗅锈秀袖绣墟戌需虚嘘须徐许蓄酗叙旭序畜恤絮婿绪续轩喧宣悬旋玄 +0x5120: 选癣眩绚靴薛学穴雪血勋熏循旬询寻驯巡殉汛训讯逊迅压押鸦鸭呀丫芽 +0x5140: 牙蚜崖衙涯雅哑亚讶焉咽阉烟淹盐严研蜒岩延言颜阎炎沿奄掩眼衍演艳堰 +0x5160: 燕厌砚雁唁彦焰宴谚验殃央鸯秧杨扬佯疡羊洋阳氧仰痒养样漾邀腰妖瑶 +0x5220: 摇尧遥窑谣姚咬舀药要耀椰噎耶爷野冶也页掖业叶曳腋夜液一壹医揖铱 +0x5240: 依伊衣颐夷遗移仪胰疑沂宜姨彝椅蚁倚已乙矣以艺抑易邑屹亿役臆逸肄疫 +0x5260: 亦裔意毅忆义益溢诣议谊译异翼翌绎茵荫因殷音阴姻吟银淫寅饮尹引隐 +0x5320: 印英樱婴鹰应缨莹萤营荧蝇迎赢盈影颖硬映哟拥佣臃痈庸雍踊蛹咏泳涌 +0x5340: 永恿勇用幽优悠忧尤由邮铀犹油游酉有友右佑釉诱又幼迂淤于盂榆虞愚舆 +0x5360: 余俞逾鱼愉渝渔隅予娱雨与屿禹宇语羽玉域芋郁吁遇喻峪御愈欲狱育誉 +0x5420: 浴寓裕预豫驭鸳渊冤元垣袁原援辕园员圆猿源缘远苑愿怨院曰约越跃钥 +0x5440: 岳粤月悦阅耘云郧匀陨允运蕴酝晕韵孕匝砸杂栽哉灾宰载再在咱攒暂赞赃 +0x5460: 脏葬遭糟凿藻枣早澡蚤躁噪造皂灶燥责择则泽贼怎增憎曾赠扎喳渣札轧 +0x5520: 铡闸眨栅榨咋乍炸诈摘斋宅窄债寨瞻毡詹粘沾盏斩辗崭展蘸栈占战站湛 +0x5540: 绽樟章彰漳张掌涨杖丈帐账仗胀瘴障招昭找沼赵照罩兆肇召遮折哲蛰辙者 +0x5560: 锗蔗这浙珍斟真甄砧臻贞针侦枕疹诊震振镇阵蒸挣睁征狰争怔整拯正政 +0x5620: 帧症郑证芝枝支吱蜘知肢脂汁之织职直植殖执值侄址指止趾只旨纸志挚 +0x5640: 掷至致置帜峙制智秩稚质炙痔滞治窒中盅忠钟衷终种肿重仲众舟周州洲诌 +0x5660: 粥轴肘帚咒皱宙昼骤珠株蛛朱猪诸诛逐竹烛煮拄瞩嘱主著柱助蛀贮铸筑 +0x5720: 住注祝驻抓爪拽专砖转撰赚篆桩庄装妆撞壮状椎锥追赘坠缀谆准捉拙卓 +0x5740: 桌琢茁酌啄着灼浊兹咨资姿滋淄孜紫仔籽滓子自渍字鬃棕踪宗综总纵邹走 +0x5760: 奏揍租足卒族祖诅阻组钻纂嘴醉最罪尊遵昨左佐柞做作坐座 +0x5820: 亍丌兀丐廿卅丕亘丞鬲孬噩丨禺丿匕乇夭爻卮氐囟胤馗毓睾鼗丶亟鼐乜 +0x5840: 乩亓芈孛啬嘏仄厍厝厣厥厮靥赝匚叵匦匮匾赜卦卣刂刈刎刭刳刿剀剌剞剡 +0x5860: 剜蒯剽劂劁劐劓冂罔亻仃仉仂仨仡仫仞伛仳伢佤仵伥伧伉伫佞佧攸佚佝 +0x5920: 佟佗伲伽佶佴侑侉侃侏佾佻侪佼侬侔俦俨俪俅俚俣俜俑俟俸倩偌俳倬倏 +0x5940: 倮倭俾倜倌倥倨偾偃偕偈偎偬偻傥傧傩傺僖儆僭僬僦僮儇儋仝氽佘佥俎龠 +0x5960: 汆籴兮巽黉馘冁夔勹匍訇匐凫夙兕亠兖亳衮袤亵脔裒禀嬴蠃羸冫冱冽冼 +0x5a20: 凇冖冢冥讠讦讧讪讴讵讷诂诃诋诏诎诒诓诔诖诘诙诜诟诠诤诨诩诮诰诳 +0x5a40: 诶诹诼诿谀谂谄谇谌谏谑谒谔谕谖谙谛谘谝谟谠谡谥谧谪谫谮谯谲谳谵谶 +0x5a60: 卩卺阝阢阡阱阪阽阼陂陉陔陟陧陬陲陴隈隍隗隰邗邛邝邙邬邡邴邳邶邺 +0x5b20: 邸邰郏郅邾郐郄郇郓郦郢郜郗郛郫郯郾鄄鄢鄞鄣鄱鄯鄹酃酆刍奂劢劬劭 +0x5b40: 劾哿勐勖勰叟燮矍廴凵凼鬯厶弁畚巯坌垩垡塾墼壅壑圩圬圪圳圹圮圯坜圻 +0x5b60: 坂坩垅坫垆坼坻坨坭坶坳垭垤垌垲埏垧垴垓垠埕埘埚埙埒垸埴埯埸埤埝 +0x5c20: 堋堍埽埭堀堞堙塄堠塥塬墁墉墚墀馨鼙懿艹艽艿芏芊芨芄芎芑芗芙芫芸 +0x5c40: 芾芰苈苊苣芘芷芮苋苌苁芩芴芡芪芟苄苎芤苡茉苷苤茏茇苜苴苒苘茌苻苓 +0x5c60: 茑茚茆茔茕苠苕茜荑荛荜茈莒茼茴茱莛荞茯荏荇荃荟荀茗荠茭茺茳荦荥 +0x5d20: 荨茛荩荬荪荭荮莰荸莳莴莠莪莓莜莅荼莶莩荽莸荻莘莞莨莺莼菁萁菥菘 +0x5d40: 堇萘萋菝菽菖萜萸萑萆菔菟萏萃菸菹菪菅菀萦菰菡葜葑葚葙葳蒇蒈葺蒉葸 +0x5d60: 萼葆葩葶蒌蒎萱葭蓁蓍蓐蓦蒽蓓蓊蒿蒺蓠蒡蒹蒴蒗蓥蓣蔌甍蔸蓰蔹蔟蔺 +0x5e20: 蕖蔻蓿蓼蕙蕈蕨蕤蕞蕺瞢蕃蕲蕻薤薨薇薏蕹薮薜薅薹薷薰藓藁藜藿蘧蘅 +0x5e40: 蘩蘖蘼廾弈夼奁耷奕奚奘匏尢尥尬尴扌扪抟抻拊拚拗拮挢拶挹捋捃掭揶捱 +0x5e60: 捺掎掴捭掬掊捩掮掼揲揸揠揿揄揞揎摒揆掾摅摁搋搛搠搌搦搡摞撄摭撖 +0x5f20: 摺撷撸撙撺擀擐擗擤擢攉攥攮弋忒甙弑卟叱叽叩叨叻吒吖吆呋呒呓呔呖 +0x5f40: 呃吡呗呙吣吲咂咔呷呱呤咚咛咄呶呦咝哐咭哂咴哒咧咦哓哔呲咣哕咻咿哌 +0x5f60: 哙哚哜咩咪咤哝哏哞唛哧唠哽唔哳唢唣唏唑唧唪啧喏喵啉啭啁啕唿啐唼 +0x6020: 唷啖啵啶啷唳唰啜喋嗒喃喱喹喈喁喟啾嗖喑啻嗟喽喾喔喙嗪嗷嗉嘟嗑嗫 +0x6040: 嗬嗔嗦嗝嗄嗯嗥嗲嗳嗌嗍嗨嗵嗤辔嘞嘈嘌嘁嘤嘣嗾嘀嘧嘭噘嘹噗嘬噍噢噙 +0x6060: 噜噌噔嚆噤噱噫噻噼嚅嚓嚯囔囗囝囡囵囫囹囿圄圊圉圜帏帙帔帑帱帻帼 +0x6120: 帷幄幔幛幞幡岌屺岍岐岖岈岘岙岑岚岜岵岢岽岬岫岱岣峁岷峄峒峤峋峥 +0x6140: 崂崃崧崦崮崤崞崆崛嵘崾崴崽嵬嵛嵯嵝嵫嵋嵊嵩嵴嶂嶙嶝豳嶷巅彳彷徂徇 +0x6160: 徉後徕徙徜徨徭徵徼衢彡犭犰犴犷犸狃狁狎狍狒狨狯狩狲狴狷猁狳猃狺 +0x6220: 狻猗猓猡猊猞猝猕猢猹猥猬猸猱獐獍獗獠獬獯獾舛夥飧夤夂饣饧饨饩饪 +0x6240: 饫饬饴饷饽馀馄馇馊馍馐馑馓馔馕庀庑庋庖庥庠庹庵庾庳赓廒廑廛廨廪膺 +0x6260: 忄忉忖忏怃忮怄忡忤忾怅怆忪忭忸怙怵怦怛怏怍怩怫怊怿怡恸恹恻恺恂 +0x6320: 恪恽悖悚悭悝悃悒悌悛惬悻悱惝惘惆惚悴愠愦愕愣惴愀愎愫慊慵憬憔憧 +0x6340: 憷懔懵忝隳闩闫闱闳闵闶闼闾阃阄阆阈阊阋阌阍阏阒阕阖阗阙阚丬爿戕氵 +0x6360: 汔汜汊沣沅沐沔沌汨汩汴汶沆沩泐泔沭泷泸泱泗沲泠泖泺泫泮沱泓泯泾 +0x6420: 洹洧洌浃浈洇洄洙洎洫浍洮洵洚浏浒浔洳涑浯涞涠浞涓涔浜浠浼浣渚淇 +0x6440: 淅淞渎涿淠渑淦淝淙渖涫渌涮渫湮湎湫溲湟溆湓湔渲渥湄滟溱溘滠漭滢溥 +0x6460: 溧溽溻溷滗溴滏溏滂溟潢潆潇漤漕滹漯漶潋潴漪漉漩澉澍澌潸潲潼潺濑 +0x6520: 濉澧澹澶濂濡濮濞濠濯瀚瀣瀛瀹瀵灏灞宀宄宕宓宥宸甯骞搴寤寮褰寰蹇 +0x6540: 謇辶迓迕迥迮迤迩迦迳迨逅逄逋逦逑逍逖逡逵逶逭逯遄遑遒遐遨遘遢遛暹 +0x6560: 遴遽邂邈邃邋彐彗彖彘尻咫屐屙孱屣屦羼弪弩弭艴弼鬻屮妁妃妍妩妪妣 +0x6620: 妗姊妫妞妤姒妲妯姗妾娅娆姝娈姣姘姹娌娉娲娴娑娣娓婀婧婊婕娼婢婵 +0x6640: 胬媪媛婷婺媾嫫媲嫒嫔媸嫠嫣嫱嫖嫦嫘嫜嬉嬗嬖嬲嬷孀尕尜孚孥孳孑孓孢 +0x6660: 驵驷驸驺驿驽骀骁骅骈骊骐骒骓骖骘骛骜骝骟骠骢骣骥骧纟纡纣纥纨纩 +0x6720: 纭纰纾绀绁绂绉绋绌绐绔绗绛绠绡绨绫绮绯绱绲缍绶绺绻绾缁缂缃缇缈 +0x6740: 缋缌缏缑缒缗缙缜缛缟缡缢缣缤缥缦缧缪缫缬缭缯缰缱缲缳缵幺畿巛甾邕 +0x6760: 玎玑玮玢玟珏珂珑玷玳珀珉珈珥珙顼琊珩珧珞玺珲琏琪瑛琦琥琨琰琮琬 +0x6820: 琛琚瑁瑜瑗瑕瑙瑷瑭瑾璜璎璀璁璇璋璞璨璩璐璧瓒璺韪韫韬杌杓杞杈杩 +0x6840: 枥枇杪杳枘枧杵枨枞枭枋杷杼柰栉柘栊柩枰栌柙枵柚枳柝栀柃枸柢栎柁柽 +0x6860: 栲栳桠桡桎桢桄桤梃栝桕桦桁桧桀栾桊桉栩梵梏桴桷梓桫棂楮棼椟椠棹 +0x6920: 椤棰椋椁楗棣椐楱椹楠楂楝榄楫榀榘楸椴槌榇榈槎榉楦楣楹榛榧榻榫榭 +0x6940: 槔榱槁槊槟榕槠榍槿樯槭樗樘橥槲橄樾檠橐橛樵檎橹樽樨橘橼檑檐檩檗檫 +0x6960: 猷獒殁殂殇殄殒殓殍殚殛殡殪轫轭轱轲轳轵轶轸轷轹轺轼轾辁辂辄辇辋 +0x6a20: 辍辎辏辘辚軎戋戗戛戟戢戡戥戤戬臧瓯瓴瓿甏甑甓攴旮旯旰昊昙杲昃昕 +0x6a40: 昀炅曷昝昴昱昶昵耆晟晔晁晏晖晡晗晷暄暌暧暝暾曛曜曦曩贲贳贶贻贽赀 +0x6a60: 赅赆赈赉赇赍赕赙觇觊觋觌觎觏觐觑牮犟牝牦牯牾牿犄犋犍犏犒挈挲掰 +0x6b20: 搿擘耄毪毳毽毵毹氅氇氆氍氕氘氙氚氡氩氤氪氲攵敕敫牍牒牖爰虢刖肟 +0x6b40: 肜肓肼朊肽肱肫肭肴肷胧胨胩胪胛胂胄胙胍胗朐胝胫胱胴胭脍脎胲胼朕脒 +0x6b60: 豚脶脞脬脘脲腈腌腓腴腙腚腱腠腩腼腽腭腧塍媵膈膂膑滕膣膪臌朦臊膻 +0x6c20: 臁膦欤欷欹歃歆歙飑飒飓飕飙飚殳彀毂觳斐齑斓於旆旄旃旌旎旒旖炀炜 +0x6c40: 炖炝炻烀炷炫炱烨烊焐焓焖焯焱煳煜煨煅煲煊煸煺熘熳熵熨熠燠燔燧燹爝 +0x6c60: 爨灬焘煦熹戾戽扃扈扉礻祀祆祉祛祜祓祚祢祗祠祯祧祺禅禊禚禧禳忑忐 +0x6d20: 怼恝恚恧恁恙恣悫愆愍慝憩憝懋懑戆肀聿沓泶淼矶矸砀砉砗砘砑斫砭砜 +0x6d40: 砝砹砺砻砟砼砥砬砣砩硎硭硖硗砦硐硇硌硪碛碓碚碇碜碡碣碲碹碥磔磙磉 +0x6d60: 磬磲礅磴礓礤礞礴龛黹黻黼盱眄眍盹眇眈眚眢眙眭眦眵眸睐睑睇睃睚睨 +0x6e20: 睢睥睿瞍睽瞀瞌瞑瞟瞠瞰瞵瞽町畀畎畋畈畛畲畹疃罘罡罟詈罨罴罱罹羁 +0x6e40: 罾盍盥蠲钅钆钇钋钊钌钍钏钐钔钗钕钚钛钜钣钤钫钪钭钬钯钰钲钴钶钷钸 +0x6e60: 钹钺钼钽钿铄铈铉铊铋铌铍铎铐铑铒铕铖铗铙铘铛铞铟铠铢铤铥铧铨铪 +0x6f20: 铩铫铮铯铳铴铵铷铹铼铽铿锃锂锆锇锉锊锍锎锏锒锓锔锕锖锘锛锝锞锟 +0x6f40: 锢锪锫锩锬锱锲锴锶锷锸锼锾锿镂锵镄镅镆镉镌镎镏镒镓镔镖镗镘镙镛镞 +0x6f60: 镟镝镡镢镤镥镦镧镨镩镪镫镬镯镱镲镳锺矧矬雉秕秭秣秫稆嵇稃稂稞稔 +0x7020: 稹稷穑黏馥穰皈皎皓皙皤瓞瓠甬鸠鸢鸨鸩鸪鸫鸬鸲鸱鸶鸸鸷鸹鸺鸾鹁鹂 +0x7040: 鹄鹆鹇鹈鹉鹋鹌鹎鹑鹕鹗鹚鹛鹜鹞鹣鹦鹧鹨鹩鹪鹫鹬鹱鹭鹳疒疔疖疠疝疬 +0x7060: 疣疳疴疸痄疱疰痃痂痖痍痣痨痦痤痫痧瘃痱痼痿瘐瘀瘅瘌瘗瘊瘥瘘瘕瘙 +0x7120: 瘛瘼瘢瘠癀瘭瘰瘿瘵癃瘾瘳癍癞癔癜癖癫癯翊竦穸穹窀窆窈窕窦窠窬窨 +0x7140: 窭窳衤衩衲衽衿袂袢裆袷袼裉裢裎裣裥裱褚裼裨裾裰褡褙褓褛褊褴褫褶襁 +0x7160: 襦襻疋胥皲皴矜耒耔耖耜耠耢耥耦耧耩耨耱耋耵聃聆聍聒聩聱覃顸颀颃 +0x7220: 颉颌颍颏颔颚颛颞颟颡颢颥颦虍虔虬虮虿虺虼虻蚨蚍蚋蚬蚝蚧蚣蚪蚓蚩 +0x7240: 蚶蛄蚵蛎蚰蚺蚱蚯蛉蛏蚴蛩蛱蛲蛭蛳蛐蜓蛞蛴蛟蛘蛑蜃蜇蛸蜈蜊蜍蜉蜣蜻 +0x7260: 蜞蜥蜮蜚蜾蝈蜴蜱蜩蜷蜿螂蜢蝽蝾蝻蝠蝰蝌蝮螋蝓蝣蝼蝤蝙蝥螓螯螨蟒 +0x7320: 蟆螈螅螭螗螃螫蟥螬螵螳蟋蟓螽蟑蟀蟊蟛蟪蟠蟮蠖蠓蟾蠊蠛蠡蠹蠼缶罂 +0x7340: 罄罅舐竺竽笈笃笄笕笊笫笏筇笸笪笙笮笱笠笥笤笳笾笞筘筚筅筵筌筝筠筮 +0x7360: 筻筢筲筱箐箦箧箸箬箝箨箅箪箜箢箫箴篑篁篌篝篚篥篦篪簌篾篼簏簖簋 +0x7420: 簟簪簦簸籁籀臾舁舂舄臬衄舡舢舣舭舯舨舫舸舻舳舴舾艄艉艋艏艚艟艨 +0x7440: 衾袅袈裘裟襞羝羟羧羯羰羲籼敉粑粝粜粞粢粲粼粽糁糇糌糍糈糅糗糨艮暨 +0x7460: 羿翎翕翥翡翦翩翮翳糸絷綦綮繇纛麸麴赳趄趔趑趱赧赭豇豉酊酐酎酏酤 +0x7520: 酢酡酰酩酯酽酾酲酴酹醌醅醐醍醑醢醣醪醭醮醯醵醴醺豕鹾趸跫踅蹙蹩 +0x7540: 趵趿趼趺跄跖跗跚跞跎跏跛跆跬跷跸跣跹跻跤踉跽踔踝踟踬踮踣踯踺蹀踹 +0x7560: 踵踽踱蹉蹁蹂蹑蹒蹊蹰蹶蹼蹯蹴躅躏躔躐躜躞豸貂貊貅貘貔斛觖觞觚觜 +0x7620: 觥觫觯訾謦靓雩雳雯霆霁霈霏霎霪霭霰霾龀龃龅龆龇龈龉龊龌黾鼋鼍隹 +0x7640: 隼隽雎雒瞿雠銎銮鋈錾鍪鏊鎏鐾鑫鱿鲂鲅鲆鲇鲈稣鲋鲎鲐鲑鲒鲔鲕鲚鲛鲞 +0x7660: 鲟鲠鲡鲢鲣鲥鲦鲧鲨鲩鲫鲭鲮鲰鲱鲲鲳鲴鲵鲶鲷鲺鲻鲼鲽鳄鳅鳆鳇鳊鳋 +0x7720: 鳌鳍鳎鳏鳐鳓鳔鳕鳗鳘鳙鳜鳝鳟鳢靼鞅鞑鞒鞔鞯鞫鞣鞲鞴骱骰骷鹘骶骺 +0x7740: 骼髁髀髅髂髋髌髑魅魃魇魉魈魍魑飨餍餮饕饔髟髡髦髯髫髻髭髹鬈鬏鬓鬟 +0x7760: 鬣麽麾縻麂麇麈麋麒鏖麝麟黛黜黝黠黟黢黩黧黥黪黯鼢鼬鼯鼹鼷鼽鼾齄 diff --git a/t/grow.t b/t/grow.t new file mode 100644 index 0000000..e6b35fc --- /dev/null +++ b/t/grow.t @@ -0,0 +1,39 @@ +#!../perl +our $POWER; +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + $POWER = 12; # up to 1 MB. You may adjust the figure here +} + +use strict; +use Encode; + +my $seed = ""; +for my $i (0x00..0xff){ + my $c = chr($i); + $seed .= ($c =~ /^\p{IsPrint}/o) ? $c : " "; +} + +use Test::More tests => $POWER*2; +my $octs = $seed; +use bytes (); +for my $i (1..$POWER){ + $octs .= $octs; + my $len = bytes::length($octs); + my $utf8 = Encode::decode('latin1', $octs); + ok(1, "decode $len bytes"); + is($octs, + Encode::encode('latin1', $utf8), + "encode $len bytes"); +} +__END__ + + diff --git a/t/gsm0338.t b/t/gsm0338.t new file mode 100644 index 0000000..127604b --- /dev/null +++ b/t/gsm0338.t @@ -0,0 +1,139 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + $| = 1; +} + +use strict; +use utf8; +use Test::More tests => 780; +use Encode; +use Encode::GSM0338; + +# The specification of GSM 03.38 is not awfully clear. +# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT) +# The various combinations of 0x00 and 0x1B as leading bytes +# are unclear, as is the semantics of those bytes as standalone +# or as final single bytes. + + +my $chk = Encode::LEAVE_SRC(); + +# escapes +# see http://www.csoft.co.uk/sms/character_sets/gsm.htm +my %esc_seq = ( + "\x{20ac}" => "\x1b\x65", + "\x0c" => "\x1b\x0A", + "[" => "\x1b\x3C", + "\\" => "\x1b\x2F", + "]" => "\x1b\x3E", + "^" => "\x1b\x14", + "{" => "\x1b\x28", + "|" => "\x1b\x40", + "}" => "\x1b\x29", + "~" => "\x1b\x3D", +); + +my %unesc_seq = reverse %esc_seq; + + +sub eu{ + $_[0] =~ /[\x00-\x1f]/ ? + sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]); + +} + +for my $c ( map { chr } 0 .. 127 ) { + my $u = $Encode::GSM0338::GSM2UNI{$c}; + + # default character set + is decode( "gsm0338", $c, $chk ), $u, + sprintf( "decode \\x%02X", ord($c) ); + eval { decode( "gsm0338", $c . "\xff", $chk ) }; + ok( $@, $@ ); + is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) ); + eval { encode( "gsm0338", $u . "\x{3000}", $chk ) }; + ok( $@, $@ ); + + # nasty atmark + if ( $c eq "\x00" ) { + is decode( "gsm0338", "\x00" . $c, $chk ), "\x00", + sprintf( '@@ =>: \x00+\x%02X', ord($c) ); + } + else { + is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ), + sprintf( '@: decode \x00+\x%02X', ord($c) ); + } + + # escape seq. + my $ecs = "\x1b" . $c; + if ( $unesc_seq{$ecs} ) { + is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs}, + sprintf( "ESC: decode ESC+\\x%02X", ord($c) ); + is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs, + sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) ); + } + else { + is decode( "gsm0338", $ecs, $chk ), + "\xA0" . decode( "gsm0338", $c ), + sprintf( "decode ESC+\\x%02X", ord($c) ); + } +} + +# https://rt.cpan.org/Ticket/Display.html?id=75670 +is decode("gsm0338", "\x09") => chr(0xC7), 'RT75670: decode'; +is encode("gsm0338", chr(0xC7)) => "\x09", 'RT75670: encode'; + +__END__ +for my $c (map { chr } 0..127){ + my $b = "\x1b$c"; + my $u = $Encode::GSM0338::GSM2UNI{$b}; + next unless $u; + $u ||= "\xA0" . $Encode::GSM0338::GSM2UNI{$c}; + is decode("gsm0338", $b), $u, sprintf("decode ESC+\\x%02X", ord($c) ); +} + +__END__ +# old test follows +ub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) } + +# t("\x00", "\x00"); # ??? + +# "Round-trip". +t("\x41", "\x41"); + +t("\x01", "\xA3"); +t("\x02", "\x24"); +t("\x03", "\xA5"); +t("\x09", "\xE7"); + +t("\x00\x00", "\x00\x00"); # Maybe? +t("\x00\x1B", "\x40\xA0"); # Maybe? +t("\x00\x41", "\x40\x41"); + +# t("\x1B", "\x1B"); # ??? + +# Escape with no special second byte is just a NBSP. +t("\x1B\x41", "\xA0\x41"); + +t("\x1B\x00", "\xA0\x40"); # Maybe? + +# Special escape characters. +t("\x1B\x0A", "\x0C"); +t("\x1B\x14", "\x5E"); +t("\x1B\x28", "\x7B"); +t("\x1B\x29", "\x7D"); +t("\x1B\x2F", "\x5C"); +t("\x1B\x3C", "\x5B"); +t("\x1B\x3D", "\x7E"); +t("\x1B\x3E", "\x5D"); +t("\x1B\x40", "\x7C"); +t("\x1B\x40", "\x7C"); +t("\x1B\x65", "\x{20AC}"); diff --git a/t/guess.t b/t/guess.t new file mode 100644 index 0000000..896028b --- /dev/null +++ b/t/guess.t @@ -0,0 +1,120 @@ +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use File::Basename; +use File::Spec; +use Encode qw(decode encode find_encoding _utf8_off); + +#use Test::More qw(no_plan); +use Test::More tests => 32; +BEGIN { use_ok("Encode::Guess") } + +my $ascii = join('' => map {chr($_)}(0x21..0x7e)); +my $latin1 = join('' => map {chr($_)}(0xa1..0xfe)); +my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe)); +my $utf8off = $utf8on; _utf8_off($utf8off); +my $utf16 = encode('UTF-16', $utf8on); +my $utf32 = encode('UTF-32', $utf8on); + +like(guess_encoding(''), qr/empty string/io, 'empty string'); +is(guess_encoding($ascii)->name, 'ascii', 'ascii'); +like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii'); +is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1'); +is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag'); +is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag'); +is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16'); +is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32'); + +my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf'); +my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'); +my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf'); + +open my $fh, $jisx0208 or die "$jisx0208: $!"; +binmode($fh); +$utf8off = join('' => <$fh>); +close $fh; +$utf8on = decode('utf8', $utf8off); + +my @jp = qw(7bit-jis shiftjis euc-jp); + +Encode::Guess->set_suspects(@jp); + +for my $jp (@jp){ + my $test = encode($jp, $utf8on); + is(guess_encoding($test)->name, $jp, "JP:$jp"); +} + +is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')"); +eval{ encode('Guess', $utf8on) }; +like($@, qr/not defined/io, "no encode()"); + +{ + my $warning; + local $SIG{__WARN__} = sub { $warning = shift }; + my $euc_jp = my $euc_jp_clone = encode('euc-jp', $utf8on); + Encode::from_to($euc_jp, 'Guess', 'euc-jp'); + is $euc_jp_clone, $euc_jp, "from_to(..., 'Guess')"; + ok !$warning, "no warning"; + diag $warning if $warning; +} + +my %CJKT = + ( + 'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'), + 'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'), + 'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'), + 'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'), +); + +Encode::Guess->set_suspects(keys %CJKT); + +for my $name (keys %CJKT){ + open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!"; + binmode($fh); + $utf8off = join('' => <$fh>); + close $fh; + + my $test = encode($name, decode('utf8', $utf8off)); + is(guess_encoding($test)->name, $name, "CJKT:$name"); +} + +my $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}"; +my $english = "The quick brown fox jumps over the black lazy dog."; +for my $utf (qw/UTF-16 UTF-32/){ + for my $bl (qw/BE LE/){ + my $test = encode("$utf$bl" => $english); + is(guess_encoding($test)->name, "$utf$bl", "$utf$bl"); + } +} +for my $bl (qw/BE LE/){ + my $test = encode("UTF-16$bl" => $ambiguous); + my $result = guess_encoding($test); + ok(! ref($result), "UTF-16$bl:$result"); +} + + + +Encode::Guess->set_suspects(); +for my $jp (@jp){ + # intentionally set $1 a priori -- see Changes + my $test = "English"; + '$1' =~ m/^(.*)/o; + is(guess_encoding($test, ($jp))->name, 'ascii', + "ascii vs $jp (\$1 messed)"); + $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}"); + is(guess_encoding($test, ($jp))->name, + $jp, "$jp vs ascii (\$1 messed)"); +} + +__END__; diff --git a/t/isa.t b/t/isa.t new file mode 100644 index 0000000..84703a5 --- /dev/null +++ b/t/isa.t @@ -0,0 +1,10 @@ +# +# $Id: isa.t,v 1.1 2015/04/02 12:08:24 dankogai Exp $ +# +use strict; +use Encode qw/find_encoding/; +use Test::More; +my @enc = Encode->encodings(":all"); +plan tests => 0+@enc; +isa_ok find_encoding($_), "Encode::Encoding" for @enc; + diff --git a/t/jis7-fallback.t b/t/jis7-fallback.t new file mode 100644 index 0000000..b1e57b9 --- /dev/null +++ b/t/jis7-fallback.t @@ -0,0 +1,21 @@ +use strict; +use Test::More 'no_plan'; +use Encode ':fallbacks'; + +my $str = "\x{0647}"; +my @data = grep length, map { chomp; $_ } ; + +while (my($in, $out) = splice(@data, 0, 2)) { + my $copy = $str; + is Encode::encode("iso-2022-jp", $copy, eval $in), $out; +} + +__DATA__ +FB_PERLQQ +\x{0647} + +FB_HTMLCREF +ه + +FB_XMLCREF +ه diff --git a/t/jisx0201.enc b/t/jisx0201.enc new file mode 100644 index 0000000..66a7cbb --- /dev/null +++ b/t/jisx0201.enc @@ -0,0 +1,2 @@ +0x00a0: �������������������������������������������������������������� +0x00c0: �����ŽÎĎŎƎǎȎɎʎˎ͎̎ΎώЎюҎӎԎՎ֎׎؎َڎێ܎ݎގ� diff --git a/t/jisx0201.utf b/t/jisx0201.utf new file mode 100644 index 0000000..5b898e3 --- /dev/null +++ b/t/jisx0201.utf @@ -0,0 +1,2 @@ +0x00a0: 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソ +0x00c0: タチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚ diff --git a/t/jisx0208.enc b/t/jisx0208.enc new file mode 100644 index 0000000..479f3e3 --- /dev/null +++ b/t/jisx0208.enc @@ -0,0 +1,226 @@ +0x2120: �������������������������������������������������������������� +0x2140: �����¡áġšơǡȡɡʡˡ̡͡ΡϡСѡҡӡԡա֡סء١ڡۡܡݡޡ� +0x2160: ������������������������������������������� +0x2220: ���������������������������� ������������ +0x2240: ���� �ʢˢ̢͢΢Ϣ� �ܢݢޢ� +0x2260: ������������ �������������� �� +0x2320: �������������������� +0x2340: ���£ãģţƣǣȣɣʣˣ̣ͣΣϣУѣңӣԣգ֣ףأ٣� +0x2360: ���������������������������������� +0x2420: �������������������������������������������������������������� +0x2440: �����¤äĤŤƤǤȤɤʤˤ̤ͤΤϤФѤҤӤԤդ֤פؤ٤ڤۤܤݤޤ� +0x2460: ���������������������� +0x2520: �������������������������������������������������������������� +0x2540: �����¥åĥťƥǥȥɥʥ˥̥ͥΥϥХѥҥӥԥե֥ץإ٥ڥۥܥݥޥ� +0x2560: ��������������������������� +0x2620: ������������������������������������������������ +0x2640: ���¦æĦŦƦǦȦɦʦ˦̦ͦΦϦЦѦҦӦԦզ֦צ� +0x2720: �������������������������������������������������������������� +0x2740: ���� �ѧҧӧԧէ֧קا٧ڧۧܧݧާ� +0x2760: �������������������� +0x2820: �������������������������������������������������������������� +0x2840: �� +0x3020: �������������������������������������������������������������� +0x3040: �����°ðİŰưǰȰɰʰ˰̰ͰΰϰаѰҰӰ԰հְװذٰڰ۰ܰݰް� +0x3060: ������������������������������������������� +0x3120: �������������������������������������������������������������� +0x3140: �����±ñıűƱDZȱɱʱ˱̱ͱαϱбѱұӱԱձֱױرٱڱ۱ܱݱޱ� +0x3160: ������������������������������������������� +0x3220: �������������������������������������������������������������� +0x3240: �����²òIJŲƲDzȲɲʲ˲̲ͲβϲвѲҲӲԲղֲײزٲڲ۲ܲݲ޲� +0x3260: ������������������������������������������� +0x3320: �������������������������������������������������������������� +0x3340: �����³óijųƳdzȳɳʳ˳̳ͳγϳгѳҳӳԳճֳ׳سٳڳ۳ܳݳ޳� +0x3360: ������������������������������������������� +0x3420: �������������������������������������������������������������� +0x3440: �����´ôĴŴƴǴȴɴʴ˴̴ʹδϴдѴҴӴԴմִ״شٴڴ۴ܴݴ޴� +0x3460: ������������������������������������������� +0x3520: �������������������������������������������������������������� +0x3540: �����µõĵŵƵǵȵɵʵ˵̵͵εϵеѵҵӵԵյֵ׵صٵڵ۵ܵݵ޵� +0x3560: ������������������������������������������� +0x3620: �������������������������������������������������������������� +0x3640: �����¶öĶŶƶǶȶɶʶ˶̶Ͷζ϶жѶҶӶԶնֶ׶ضٶڶ۶ܶݶ޶� +0x3660: ������������������������������������������� +0x3720: �������������������������������������������������������������� +0x3740: �����·÷ķŷƷǷȷɷʷ˷̷ͷηϷзѷҷӷԷշַ׷طٷڷ۷ܷݷ޷� +0x3760: ������������������������������������������� +0x3820: �������������������������������������������������������������� +0x3840: �����¸øĸŸƸǸȸɸʸ˸̸͸θϸиѸҸӸԸոָ׸ظٸڸ۸ܸݸ޸� +0x3860: ������������������������������������������� +0x3920: �������������������������������������������������������������� +0x3940: �����¹ùĹŹƹǹȹɹʹ˹̹͹ιϹйѹҹӹԹչֹ׹عٹڹ۹ܹݹ޹� +0x3960: ������������������������������������������� +0x3a20: �������������������������������������������������������������� +0x3a40: �����ºúĺźƺǺȺɺʺ˺̺ͺκϺкѺҺӺԺպֺ׺غٺںۺܺݺ޺� +0x3a60: ������������������������������������������� +0x3b20: �������������������������������������������������������������� +0x3b40: �����»ûĻŻƻǻȻɻʻ˻̻ͻλϻлѻһӻԻջֻ׻ػٻڻۻܻݻ޻� +0x3b60: ������������������������������������������� +0x3c20: �������������������������������������������������������������� +0x3c40: �����¼üļżƼǼȼɼʼ˼̼ͼμϼмѼҼӼԼռּ׼ؼټڼۼܼݼ޼� +0x3c60: ������������������������������������������� +0x3d20: �������������������������������������������������������������� +0x3d40: �����½ýĽŽƽǽȽɽʽ˽̽ͽνϽнѽҽӽԽսֽ׽ؽٽڽ۽ܽݽ޽� +0x3d60: ������������������������������������������� +0x3e20: �������������������������������������������������������������� +0x3e40: �����¾þľžƾǾȾɾʾ˾̾;ξϾоѾҾӾԾվ־׾ؾپھ۾ܾݾ޾� +0x3e60: ������������������������������������������� +0x3f20: �������������������������������������������������������������� +0x3f40: �����¿ÿĿſƿǿȿɿʿ˿̿ͿοϿпѿҿӿԿտֿ׿ؿٿڿۿܿݿ޿� +0x3f60: ������������������������������������������� +0x4020: �������������������������������������������������������������� +0x4040: ���������������������������������������������������������������� +0x4060: �������������������������������������������������������������� +0x4120: �������������������������������������������������������������� +0x4140: ���������������������������������������������������������������� +0x4160: �������������������������������������������������������������� +0x4220: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ +0x4240: ���������������������������������������������������������������� +0x4260: �������������������������������������������������������������� +0x4320: áâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ +0x4340: ���������������������������������������������������������������� +0x4360: �������������������������������������������������������������� +0x4420: ġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿ +0x4440: ���������������������������������������������������������������� +0x4460: �������������������������������������������������������������� +0x4520: šŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ +0x4540: ���������������������������������������������������������������� +0x4560: �������������������������������������������������������������� +0x4620: ơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿ +0x4640: ���������������������������������������������������������������� +0x4660: �������������������������������������������������������������� +0x4720: ǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ +0x4740: ���������������������������������������������������������������� +0x4760: �������������������������������������������������������������� +0x4820: ȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ +0x4840: ���������������������������������������������������������������� +0x4860: �������������������������������������������������������������� +0x4920: ɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿ +0x4940: ���������������������������������������������������������������� +0x4960: �������������������������������������������������������������� +0x4a20: ʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯʰʱʲʳʴʵʶʷʸʹʺʻʼʽʾʿ +0x4a40: ���������������������������������������������������������������� +0x4a60: �������������������������������������������������������������� +0x4b20: ˡˢˣˤ˥˦˧˨˩˪˫ˬ˭ˮ˯˰˱˲˳˴˵˶˷˸˹˺˻˼˽˾˿ +0x4b40: ���������������������������������������������������������������� +0x4b60: �������������������������������������������������������������� +0x4c20: ̴̵̶̷̸̡̢̧̨̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼̽̾̿ +0x4c40: ���������������������������������������������������������������� +0x4c60: �������������������������������������������������������������� +0x4d20: ͣͤͥͦͧͨͩͪͫͬͭͮͯ͢͡ͰͱͲͳʹ͵Ͷͷ͸͹ͺͻͼͽ;Ϳ +0x4d40: ���������������������������������������������������������������� +0x4d60: �������������������������������������������������������������� +0x4e20: Ρ΢ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξο +0x4e40: ���������������������������������������������������������������� +0x4e60: �������������������������������������������������������������� +0x4f20: ϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿ +0x4f40: ���������������������������������������� +0x5020: СТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп +0x5040: ���������������������������������������������������������������� +0x5060: �������������������������������������������������������������� +0x5120: ѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿ +0x5140: ���������������������������������������������������������������� +0x5160: �������������������������������������������������������������� +0x5220: ҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿ +0x5240: ���������������������������������������������������������������� +0x5260: �������������������������������������������������������������� +0x5320: ӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿ +0x5340: ���������������������������������������������������������������� +0x5360: �������������������������������������������������������������� +0x5420: ԡԢԣԤԥԦԧԨԩԪԫԬԭԮԯ԰ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿ +0x5440: ���������������������������������������������������������������� +0x5460: �������������������������������������������������������������� +0x5520: աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտ +0x5540: ���������������������������������������������������������������� +0x5560: �������������������������������������������������������������� +0x5620: ְֱֲֳִֵֶַָֹֺֻּֽ֢֣֤֥֦֧֪֭֮֡֨֩֫֬֯־ֿ +0x5640: ���������������������������������������������������������������� +0x5660: �������������������������������������������������������������� +0x5720: סעףפץצקרשת׫׬׭׮ׯװױײ׳״׵׶׷׸׹׺׻׼׽׾׿ +0x5740: ���������������������������������������������������������������� +0x5760: �������������������������������������������������������������� +0x5820: ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿ +0x5840: ���������������������������������������������������������������� +0x5860: �������������������������������������������������������������� +0x5920: ١٢٣٤٥٦٧٨٩٪٫٬٭ٮٯٰٱٲٳٴٵٶٷٸٹٺٻټٽپٿ +0x5940: ���������������������������������������������������������������� +0x5960: �������������������������������������������������������������� +0x5a20: ڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿ +0x5a40: ���������������������������������������������������������������� +0x5a60: �������������������������������������������������������������� +0x5b20: ۣۡۢۤۥۦۧۨ۩۪ۭ۫۬ۮۯ۰۱۲۳۴۵۶۷۸۹ۺۻۼ۽۾ۿ +0x5b40: ���������������������������������������������������������������� +0x5b60: �������������������������������������������������������������� +0x5c20: ܡܢܣܤܥܦܧܨܩܪܫܬܭܮܯܱܴܷܸܹܻܼܾܰܲܳܵܶܺܽܿ +0x5c40: ���������������������������������������������������������������� +0x5c60: �������������������������������������������������������������� +0x5d20: ݡݢݣݤݥݦݧݨݩݪݫݬݭݮݯݰݱݲݳݴݵݶݷݸݹݺݻݼݽݾݿ +0x5d40: ���������������������������������������������������������������� +0x5d60: �������������������������������������������������������������� +0x5e20: ޡޢޣޤޥަާިީުޫެޭޮޯްޱ޲޳޴޵޶޷޸޹޺޻޼޽޾޿ +0x5e40: ���������������������������������������������������������������� +0x5e60: �������������������������������������������������������������� +0x5f20: ߡߢߣߤߥߦߧߨߩߪ߲߫߬߭߮߯߰߱߳ߴߵ߶߷߸߹ߺ߻߼߽߾߿ +0x5f40: ���������������������������������������������������������������� +0x5f60: �������������������������������������������������������������� +0x6020: ������������������������������� +0x6040: ���������������������������������������������������������������� +0x6060: �������������������������������������������������������������� +0x6120: ������������������������������� +0x6140: ���������������������������������������������������������������� +0x6160: �������������������������������������������������������������� +0x6220: ������������������������������� +0x6240: ���������������������������������������������������������������� +0x6260: �������������������������������������������������������������� +0x6320: ������������������������������� +0x6340: ���������������������������������������������������������������� +0x6360: �������������������������������������������������������������� +0x6420: ������������������������������� +0x6440: ���������������������������������������������������������������� +0x6460: �������������������������������������������������������������� +0x6520: ������������������������������� +0x6540: ���������������������������������������������������������������� +0x6560: �������������������������������������������������������������� +0x6620: ������������������������������� +0x6640: ���������������������������������������������������������������� +0x6660: �������������������������������������������������������������� +0x6720: ������������������������������� +0x6740: ���������������������������������������������������������������� +0x6760: �������������������������������������������������������������� +0x6820: ������������������������������� +0x6840: ���������������������������������������������������������������� +0x6860: �������������������������������������������������������������� +0x6920: ������������������������������� +0x6940: ���������������������������������������������������������������� +0x6960: �������������������������������������������������������������� +0x6a20: ������������������������������� +0x6a40: ���������������������������������������������������������������� +0x6a60: �������������������������������������������������������������� +0x6b20: ������������������������������� +0x6b40: ���������������������������������������������������������������� +0x6b60: �������������������������������������������������������������� +0x6c20: ������������������������������� +0x6c40: ���������������������������������������������������������������� +0x6c60: �������������������������������������������������������������� +0x6d20: �������������������������������������������������������������� +0x6d40: ���������������������������������������������������������������� +0x6d60: �������������������������������������������������������������� +0x6e20: ������������������������������� +0x6e40: ���������������������������������������������������������������� +0x6e60: �������������������������������������������������������������� +0x6f20: ������������������������������� +0x6f40: ���������������������������������������������������������������� +0x6f60: �������������������������������������������������������������� +0x7020: ������������������������������� +0x7040: ���������������������������������������������������������������� +0x7060: �������������������������������������������������������������� +0x7120: ������������������������������� +0x7140: ���������������������������������������������������������������� +0x7160: �������������������������������������������������������������� +0x7220: ������������������������������� +0x7240: ���������������������������������������������������������������� +0x7260: �������������������������������������������������������������� +0x7320: ������������������������������� +0x7340: ���������������������������������������������������������������� +0x7360: �������������������������������������������������������������� +0x7420: ������������ diff --git a/t/jisx0208.utf b/t/jisx0208.utf new file mode 100644 index 0000000..3a51feb --- /dev/null +++ b/t/jisx0208.utf @@ -0,0 +1,226 @@ +0x2120:  、。,.・:;?!゛゜´`¨^ ̄_ヽヾゝゞ〃仝々〆〇ー―‐/ +0x2140: \〜‖|…‥‘’“”()〔〕[]{}〈〉《》「」『』【】+−±× +0x2160: ÷=≠<>≦≧∞∴♂♀°′″℃¥$¢£%#&*@§☆★○●◎◇ +0x2220: ◆□■△▲▽▼※〒→←↑↓〓 ∈∋⊆⊇⊂⊃ +0x2240: ∪∩ ∧∨¬⇒⇔∀∃ ∠⊥⌒∂ +0x2260: ∇≡≒≪≫√∽∝∵∫∬ ʼn♯♭♪†‡¶ ◯ +0x2320: 0123456789 +0x2340: ABCDEFGHIJKLMNOPQRSTUVWXYZ +0x2360: abcdefghijklmnopqrstuvwxyz +0x2420: ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞた +0x2440: だちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみ +0x2460: むめもゃやゅゆょよらりるれろゎわゐゑをん +0x2520: ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタ +0x2540: ダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミ +0x2560: ムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ +0x2620: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +0x2640: αβγδεζηθικλμνξοπρστυφχψω +0x2720: АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭ +0x2740: ЮЯ абвгдеёжзийклмн +0x2760: опрстуфхцчшщъыьэюя +0x2820: ─│┌┐┘└├┬┤┴┼━┃┏┓┛┗┣┳┫┻╋┠┯┨┷┿┝┰┥┸ +0x2840: ╂ +0x3020: 亜唖娃阿哀愛挨姶逢葵茜穐悪握渥旭葦芦鯵梓圧斡扱宛姐虻飴絢綾鮎或 +0x3040: 粟袷安庵按暗案闇鞍杏以伊位依偉囲夷委威尉惟意慰易椅為畏異移維緯胃 +0x3060: 萎衣謂違遺医井亥域育郁磯一壱溢逸稲茨芋鰯允印咽員因姻引飲淫胤蔭 +0x3120: 院陰隠韻吋右宇烏羽迂雨卯鵜窺丑碓臼渦嘘唄欝蔚鰻姥厩浦瓜閏噂云運 +0x3140: 雲荏餌叡営嬰影映曳栄永泳洩瑛盈穎頴英衛詠鋭液疫益駅悦謁越閲榎厭円 +0x3160: 園堰奄宴延怨掩援沿演炎焔煙燕猿縁艶苑薗遠鉛鴛塩於汚甥凹央奥往応 +0x3220: 押旺横欧殴王翁襖鴬鴎黄岡沖荻億屋憶臆桶牡乙俺卸恩温穏音下化仮何 +0x3240: 伽価佳加可嘉夏嫁家寡科暇果架歌河火珂禍禾稼箇花苛茄荷華菓蝦課嘩貨 +0x3260: 迦過霞蚊俄峨我牙画臥芽蛾賀雅餓駕介会解回塊壊廻快怪悔恢懐戒拐改 +0x3320: 魁晦械海灰界皆絵芥蟹開階貝凱劾外咳害崖慨概涯碍蓋街該鎧骸浬馨蛙 +0x3340: 垣柿蛎鈎劃嚇各廓拡撹格核殻獲確穫覚角赫較郭閣隔革学岳楽額顎掛笠樫 +0x3360: 橿梶鰍潟割喝恰括活渇滑葛褐轄且鰹叶椛樺鞄株兜竃蒲釜鎌噛鴨栢茅萱 +0x3420: 粥刈苅瓦乾侃冠寒刊勘勧巻喚堪姦完官寛干幹患感慣憾換敢柑桓棺款歓 +0x3440: 汗漢澗潅環甘監看竿管簡緩缶翰肝艦莞観諌貫還鑑間閑関陥韓館舘丸含岸 +0x3460: 巌玩癌眼岩翫贋雁頑顔願企伎危喜器基奇嬉寄岐希幾忌揮机旗既期棋棄 +0x3520: 機帰毅気汽畿祈季稀紀徽規記貴起軌輝飢騎鬼亀偽儀妓宜戯技擬欺犠疑 +0x3540: 祇義蟻誼議掬菊鞠吉吃喫桔橘詰砧杵黍却客脚虐逆丘久仇休及吸宮弓急救 +0x3560: 朽求汲泣灸球究窮笈級糾給旧牛去居巨拒拠挙渠虚許距鋸漁禦魚亨享京 +0x3620: 供侠僑兇競共凶協匡卿叫喬境峡強彊怯恐恭挟教橋況狂狭矯胸脅興蕎郷 +0x3640: 鏡響饗驚仰凝尭暁業局曲極玉桐粁僅勤均巾錦斤欣欽琴禁禽筋緊芹菌衿襟 +0x3660: 謹近金吟銀九倶句区狗玖矩苦躯駆駈駒具愚虞喰空偶寓遇隅串櫛釧屑屈 +0x3720: 掘窟沓靴轡窪熊隈粂栗繰桑鍬勲君薫訓群軍郡卦袈祁係傾刑兄啓圭珪型 +0x3740: 契形径恵慶慧憩掲携敬景桂渓畦稽系経継繋罫茎荊蛍計詣警軽頚鶏芸迎鯨 +0x3760: 劇戟撃激隙桁傑欠決潔穴結血訣月件倹倦健兼券剣喧圏堅嫌建憲懸拳捲 +0x3820: 検権牽犬献研硯絹県肩見謙賢軒遣鍵険顕験鹸元原厳幻弦減源玄現絃舷 +0x3840: 言諺限乎個古呼固姑孤己庫弧戸故枯湖狐糊袴股胡菰虎誇跨鈷雇顧鼓五互 +0x3860: 伍午呉吾娯後御悟梧檎瑚碁語誤護醐乞鯉交佼侯候倖光公功効勾厚口向 +0x3920: 后喉坑垢好孔孝宏工巧巷幸広庚康弘恒慌抗拘控攻昂晃更杭校梗構江洪 +0x3940: 浩港溝甲皇硬稿糠紅紘絞綱耕考肯肱腔膏航荒行衡講貢購郊酵鉱砿鋼閤降 +0x3960: 項香高鴻剛劫号合壕拷濠豪轟麹克刻告国穀酷鵠黒獄漉腰甑忽惚骨狛込 +0x3a20: 此頃今困坤墾婚恨懇昏昆根梱混痕紺艮魂些佐叉唆嵯左差査沙瑳砂詐鎖 +0x3a40: 裟坐座挫債催再最哉塞妻宰彩才採栽歳済災采犀砕砦祭斎細菜裁載際剤在 +0x3a60: 材罪財冴坂阪堺榊肴咲崎埼碕鷺作削咋搾昨朔柵窄策索錯桜鮭笹匙冊刷 +0x3b20: 察拶撮擦札殺薩雑皐鯖捌錆鮫皿晒三傘参山惨撒散桟燦珊産算纂蚕讃賛 +0x3b40: 酸餐斬暫残仕仔伺使刺司史嗣四士始姉姿子屍市師志思指支孜斯施旨枝止 +0x3b60: 死氏獅祉私糸紙紫肢脂至視詞詩試誌諮資賜雌飼歯事似侍児字寺慈持時 +0x3c20: 次滋治爾璽痔磁示而耳自蒔辞汐鹿式識鴫竺軸宍雫七叱執失嫉室悉湿漆 +0x3c40: 疾質実蔀篠偲柴芝屡蕊縞舎写射捨赦斜煮社紗者謝車遮蛇邪借勺尺杓灼爵 +0x3c60: 酌釈錫若寂弱惹主取守手朱殊狩珠種腫趣酒首儒受呪寿授樹綬需囚収周 +0x3d20: 宗就州修愁拾洲秀秋終繍習臭舟蒐衆襲讐蹴輯週酋酬集醜什住充十従戎 +0x3d40: 柔汁渋獣縦重銃叔夙宿淑祝縮粛塾熟出術述俊峻春瞬竣舜駿准循旬楯殉淳 +0x3d60: 準潤盾純巡遵醇順処初所暑曙渚庶緒署書薯藷諸助叙女序徐恕鋤除傷償 +0x3e20: 勝匠升召哨商唱嘗奨妾娼宵将小少尚庄床廠彰承抄招掌捷昇昌昭晶松梢 +0x3e40: 樟樵沼消渉湘焼焦照症省硝礁祥称章笑粧紹肖菖蒋蕉衝裳訟証詔詳象賞醤 +0x3e60: 鉦鍾鐘障鞘上丈丞乗冗剰城場壌嬢常情擾条杖浄状畳穣蒸譲醸錠嘱埴飾 +0x3f20: 拭植殖燭織職色触食蝕辱尻伸信侵唇娠寝審心慎振新晋森榛浸深申疹真 +0x3f40: 神秦紳臣芯薪親診身辛進針震人仁刃塵壬尋甚尽腎訊迅陣靭笥諏須酢図厨 +0x3f60: 逗吹垂帥推水炊睡粋翠衰遂酔錐錘随瑞髄崇嵩数枢趨雛据杉椙菅頗雀裾 +0x4020: 澄摺寸世瀬畝是凄制勢姓征性成政整星晴棲栖正清牲生盛精聖声製西誠 +0x4040: 誓請逝醒青静斉税脆隻席惜戚斥昔析石積籍績脊責赤跡蹟碩切拙接摂折設 +0x4060: 窃節説雪絶舌蝉仙先千占宣専尖川戦扇撰栓栴泉浅洗染潜煎煽旋穿箭線 +0x4120: 繊羨腺舛船薦詮賎践選遷銭銑閃鮮前善漸然全禅繕膳糎噌塑岨措曾曽楚 +0x4140: 狙疏疎礎祖租粗素組蘇訴阻遡鼠僧創双叢倉喪壮奏爽宋層匝惣想捜掃挿掻 +0x4160: 操早曹巣槍槽漕燥争痩相窓糟総綜聡草荘葬蒼藻装走送遭鎗霜騒像増憎 +0x4220: 臓蔵贈造促側則即息捉束測足速俗属賊族続卒袖其揃存孫尊損村遜他多 +0x4240: 太汰詑唾堕妥惰打柁舵楕陀駄騨体堆対耐岱帯待怠態戴替泰滞胎腿苔袋貸 +0x4260: 退逮隊黛鯛代台大第醍題鷹滝瀧卓啄宅托択拓沢濯琢託鐸濁諾茸凧蛸只 +0x4320: 叩但達辰奪脱巽竪辿棚谷狸鱈樽誰丹単嘆坦担探旦歎淡湛炭短端箪綻耽 +0x4340: 胆蛋誕鍛団壇弾断暖檀段男談値知地弛恥智池痴稚置致蜘遅馳築畜竹筑蓄 +0x4360: 逐秩窒茶嫡着中仲宙忠抽昼柱注虫衷註酎鋳駐樗瀦猪苧著貯丁兆凋喋寵 +0x4420: 帖帳庁弔張彫徴懲挑暢朝潮牒町眺聴脹腸蝶調諜超跳銚長頂鳥勅捗直朕 +0x4440: 沈珍賃鎮陳津墜椎槌追鎚痛通塚栂掴槻佃漬柘辻蔦綴鍔椿潰坪壷嬬紬爪吊 +0x4460: 釣鶴亭低停偵剃貞呈堤定帝底庭廷弟悌抵挺提梯汀碇禎程締艇訂諦蹄逓 +0x4520: 邸鄭釘鼎泥摘擢敵滴的笛適鏑溺哲徹撤轍迭鉄典填天展店添纏甜貼転顛 +0x4540: 点伝殿澱田電兎吐堵塗妬屠徒斗杜渡登菟賭途都鍍砥砺努度土奴怒倒党冬 +0x4560: 凍刀唐塔塘套宕島嶋悼投搭東桃梼棟盗淘湯涛灯燈当痘祷等答筒糖統到 +0x4620: 董蕩藤討謄豆踏逃透鐙陶頭騰闘働動同堂導憧撞洞瞳童胴萄道銅峠鴇匿 +0x4640: 得徳涜特督禿篤毒独読栃橡凸突椴届鳶苫寅酉瀞噸屯惇敦沌豚遁頓呑曇鈍 +0x4660: 奈那内乍凪薙謎灘捺鍋楢馴縄畷南楠軟難汝二尼弐迩匂賑肉虹廿日乳入 +0x4720: 如尿韮任妊忍認濡禰祢寧葱猫熱年念捻撚燃粘乃廼之埜嚢悩濃納能脳膿 +0x4740: 農覗蚤巴把播覇杷波派琶破婆罵芭馬俳廃拝排敗杯盃牌背肺輩配倍培媒梅 +0x4760: 楳煤狽買売賠陪這蝿秤矧萩伯剥博拍柏泊白箔粕舶薄迫曝漠爆縛莫駁麦 +0x4820: 函箱硲箸肇筈櫨幡肌畑畠八鉢溌発醗髪伐罰抜筏閥鳩噺塙蛤隼伴判半反 +0x4840: 叛帆搬斑板氾汎版犯班畔繁般藩販範釆煩頒飯挽晩番盤磐蕃蛮匪卑否妃庇 +0x4860: 彼悲扉批披斐比泌疲皮碑秘緋罷肥被誹費避非飛樋簸備尾微枇毘琵眉美 +0x4920: 鼻柊稗匹疋髭彦膝菱肘弼必畢筆逼桧姫媛紐百謬俵彪標氷漂瓢票表評豹 +0x4940: 廟描病秒苗錨鋲蒜蛭鰭品彬斌浜瀕貧賓頻敏瓶不付埠夫婦富冨布府怖扶敷 +0x4960: 斧普浮父符腐膚芙譜負賦赴阜附侮撫武舞葡蕪部封楓風葺蕗伏副復幅服 +0x4a20: 福腹複覆淵弗払沸仏物鮒分吻噴墳憤扮焚奮粉糞紛雰文聞丙併兵塀幣平 +0x4a40: 弊柄並蔽閉陛米頁僻壁癖碧別瞥蔑箆偏変片篇編辺返遍便勉娩弁鞭保舗鋪 +0x4a60: 圃捕歩甫補輔穂募墓慕戊暮母簿菩倣俸包呆報奉宝峰峯崩庖抱捧放方朋 +0x4b20: 法泡烹砲縫胞芳萌蓬蜂褒訪豊邦鋒飽鳳鵬乏亡傍剖坊妨帽忘忙房暴望某 +0x4b40: 棒冒紡肪膨謀貌貿鉾防吠頬北僕卜墨撲朴牧睦穆釦勃没殆堀幌奔本翻凡盆 +0x4b60: 摩磨魔麻埋妹昧枚毎哩槙幕膜枕鮪柾鱒桝亦俣又抹末沫迄侭繭麿万慢満 +0x4c20: 漫蔓味未魅巳箕岬密蜜湊蓑稔脈妙粍民眠務夢無牟矛霧鵡椋婿娘冥名命 +0x4c40: 明盟迷銘鳴姪牝滅免棉綿緬面麺摸模茂妄孟毛猛盲網耗蒙儲木黙目杢勿餅 +0x4c60: 尤戻籾貰問悶紋門匁也冶夜爺耶野弥矢厄役約薬訳躍靖柳薮鑓愉愈油癒 +0x4d20: 諭輸唯佑優勇友宥幽悠憂揖有柚湧涌猶猷由祐裕誘遊邑郵雄融夕予余与 +0x4d40: 誉輿預傭幼妖容庸揚揺擁曜楊様洋溶熔用窯羊耀葉蓉要謡踊遥陽養慾抑欲 +0x4d60: 沃浴翌翼淀羅螺裸来莱頼雷洛絡落酪乱卵嵐欄濫藍蘭覧利吏履李梨理璃 +0x4e20: 痢裏裡里離陸律率立葎掠略劉流溜琉留硫粒隆竜龍侶慮旅虜了亮僚両凌 +0x4e40: 寮料梁涼猟療瞭稜糧良諒遼量陵領力緑倫厘林淋燐琳臨輪隣鱗麟瑠塁涙累 +0x4e60: 類令伶例冷励嶺怜玲礼苓鈴隷零霊麗齢暦歴列劣烈裂廉恋憐漣煉簾練聯 +0x4f20: 蓮連錬呂魯櫓炉賂路露労婁廊弄朗楼榔浪漏牢狼篭老聾蝋郎六麓禄肋録 +0x4f40: 論倭和話歪賄脇惑枠鷲亙亘鰐詫藁蕨椀湾碗腕 +0x5020: 弌丐丕个丱丶丼丿乂乖乘亂亅豫亊舒弍于亞亟亠亢亰亳亶从仍仄仆仂仗 +0x5040: 仞仭仟价伉佚估佛佝佗佇佶侈侏侘佻佩佰侑佯來侖儘俔俟俎俘俛俑俚俐俤 +0x5060: 俥倚倨倔倪倥倅伜俶倡倩倬俾俯們倆偃假會偕偐偈做偖偬偸傀傚傅傴傲 +0x5120: 僉僊傳僂僖僞僥僭僣僮價僵儉儁儂儖儕儔儚儡儺儷儼儻儿兀兒兌兔兢竸 +0x5140: 兩兪兮冀冂囘册冉冏冑冓冕冖冤冦冢冩冪冫决冱冲冰况冽凅凉凛几處凩凭 +0x5160: 凰凵凾刄刋刔刎刧刪刮刳刹剏剄剋剌剞剔剪剴剩剳剿剽劍劔劒剱劈劑辨 +0x5220: 辧劬劭劼劵勁勍勗勞勣勦飭勠勳勵勸勹匆匈甸匍匐匏匕匚匣匯匱匳匸區 +0x5240: 卆卅丗卉卍凖卞卩卮夘卻卷厂厖厠厦厥厮厰厶參簒雙叟曼燮叮叨叭叺吁吽 +0x5260: 呀听吭吼吮吶吩吝呎咏呵咎呟呱呷呰咒呻咀呶咄咐咆哇咢咸咥咬哄哈咨 +0x5320: 咫哂咤咾咼哘哥哦唏唔哽哮哭哺哢唹啀啣啌售啜啅啖啗唸唳啝喙喀咯喊 +0x5340: 喟啻啾喘喞單啼喃喩喇喨嗚嗅嗟嗄嗜嗤嗔嘔嗷嘖嗾嗽嘛嗹噎噐營嘴嘶嘲嘸 +0x5360: 噫噤嘯噬噪嚆嚀嚊嚠嚔嚏嚥嚮嚶嚴囂嚼囁囃囀囈囎囑囓囗囮囹圀囿圄圉 +0x5420: 圈國圍圓團圖嗇圜圦圷圸坎圻址坏坩埀垈坡坿垉垓垠垳垤垪垰埃埆埔埒 +0x5440: 埓堊埖埣堋堙堝塲堡塢塋塰毀塒堽塹墅墹墟墫墺壞墻墸墮壅壓壑壗壙壘壥 +0x5460: 壜壤壟壯壺壹壻壼壽夂夊夐夛梦夥夬夭夲夸夾竒奕奐奎奚奘奢奠奧奬奩 +0x5520: 奸妁妝佞侫妣妲姆姨姜妍姙姚娥娟娑娜娉娚婀婬婉娵娶婢婪媚媼媾嫋嫂 +0x5540: 媽嫣嫗嫦嫩嫖嫺嫻嬌嬋嬖嬲嫐嬪嬶嬾孃孅孀孑孕孚孛孥孩孰孳孵學斈孺宀 +0x5560: 它宦宸寃寇寉寔寐寤實寢寞寥寫寰寶寳尅將專對尓尠尢尨尸尹屁屆屎屓 +0x5620: 屐屏孱屬屮乢屶屹岌岑岔妛岫岻岶岼岷峅岾峇峙峩峽峺峭嶌峪崋崕崗嵜 +0x5640: 崟崛崑崔崢崚崙崘嵌嵒嵎嵋嵬嵳嵶嶇嶄嶂嶢嶝嶬嶮嶽嶐嶷嶼巉巍巓巒巖巛 +0x5660: 巫已巵帋帚帙帑帛帶帷幄幃幀幎幗幔幟幢幤幇幵并幺麼广庠廁廂廈廐廏 +0x5720: 廖廣廝廚廛廢廡廨廩廬廱廳廰廴廸廾弃弉彝彜弋弑弖弩弭弸彁彈彌彎弯 +0x5740: 彑彖彗彙彡彭彳彷徃徂彿徊很徑徇從徙徘徠徨徭徼忖忻忤忸忱忝悳忿怡恠 +0x5760: 怙怐怩怎怱怛怕怫怦怏怺恚恁恪恷恟恊恆恍恣恃恤恂恬恫恙悁悍惧悃悚 +0x5820: 悄悛悖悗悒悧悋惡悸惠惓悴忰悽惆悵惘慍愕愆惶惷愀惴惺愃愡惻惱愍愎 +0x5840: 慇愾愨愧慊愿愼愬愴愽慂慄慳慷慘慙慚慫慴慯慥慱慟慝慓慵憙憖憇憬憔憚 +0x5860: 憊憑憫憮懌懊應懷懈懃懆憺懋罹懍懦懣懶懺懴懿懽懼懾戀戈戉戍戌戔戛 +0x5920: 戞戡截戮戰戲戳扁扎扞扣扛扠扨扼抂抉找抒抓抖拔抃抔拗拑抻拏拿拆擔 +0x5940: 拈拜拌拊拂拇抛拉挌拮拱挧挂挈拯拵捐挾捍搜捏掖掎掀掫捶掣掏掉掟掵捫 +0x5960: 捩掾揩揀揆揣揉插揶揄搖搴搆搓搦搶攝搗搨搏摧摯摶摎攪撕撓撥撩撈撼 +0x5a20: 據擒擅擇撻擘擂擱擧舉擠擡抬擣擯攬擶擴擲擺攀擽攘攜攅攤攣攫攴攵攷 +0x5a40: 收攸畋效敖敕敍敘敞敝敲數斂斃變斛斟斫斷旃旆旁旄旌旒旛旙无旡旱杲昊 +0x5a60: 昃旻杳昵昶昴昜晏晄晉晁晞晝晤晧晨晟晢晰暃暈暎暉暄暘暝曁暹曉暾暼 +0x5b20: 曄暸曖曚曠昿曦曩曰曵曷朏朖朞朦朧霸朮朿朶杁朸朷杆杞杠杙杣杤枉杰 +0x5b40: 枩杼杪枌枋枦枡枅枷柯枴柬枳柩枸柤柞柝柢柮枹柎柆柧檜栞框栩桀桍栲桎 +0x5b60: 梳栫桙档桷桿梟梏梭梔條梛梃檮梹桴梵梠梺椏梍桾椁棊椈棘椢椦棡椌棍 +0x5c20: 棔棧棕椶椒椄棗棣椥棹棠棯椨椪椚椣椡棆楹楷楜楸楫楔楾楮椹楴椽楙椰 +0x5c40: 楡楞楝榁楪榲榮槐榿槁槓榾槎寨槊槝榻槃榧樮榑榠榜榕榴槞槨樂樛槿權槹 +0x5c60: 槲槧樅榱樞槭樔槫樊樒櫁樣樓橄樌橲樶橸橇橢橙橦橈樸樢檐檍檠檄檢檣 +0x5d20: 檗蘗檻櫃櫂檸檳檬櫞櫑櫟檪櫚櫪櫻欅蘖櫺欒欖鬱欟欸欷盜欹飮歇歃歉歐 +0x5d40: 歙歔歛歟歡歸歹歿殀殄殃殍殘殕殞殤殪殫殯殲殱殳殷殼毆毋毓毟毬毫毳毯 +0x5d60: 麾氈氓气氛氤氣汞汕汢汪沂沍沚沁沛汾汨汳沒沐泄泱泓沽泗泅泝沮沱沾 +0x5e20: 沺泛泯泙泪洟衍洶洫洽洸洙洵洳洒洌浣涓浤浚浹浙涎涕濤涅淹渕渊涵淇 +0x5e40: 淦涸淆淬淞淌淨淒淅淺淙淤淕淪淮渭湮渮渙湲湟渾渣湫渫湶湍渟湃渺湎渤 +0x5e60: 滿渝游溂溪溘滉溷滓溽溯滄溲滔滕溏溥滂溟潁漑灌滬滸滾漿滲漱滯漲滌 +0x5f20: 漾漓滷澆潺潸澁澀潯潛濳潭澂潼潘澎澑濂潦澳澣澡澤澹濆澪濟濕濬濔濘 +0x5f40: 濱濮濛瀉瀋濺瀑瀁瀏濾瀛瀚潴瀝瀘瀟瀰瀾瀲灑灣炙炒炯烱炬炸炳炮烟烋烝 +0x5f60: 烙焉烽焜焙煥煕熈煦煢煌煖煬熏燻熄熕熨熬燗熹熾燒燉燔燎燠燬燧燵燼 +0x6020: 燹燿爍爐爛爨爭爬爰爲爻爼爿牀牆牋牘牴牾犂犁犇犒犖犢犧犹犲狃狆狄 +0x6040: 狎狒狢狠狡狹狷倏猗猊猜猖猝猴猯猩猥猾獎獏默獗獪獨獰獸獵獻獺珈玳珎 +0x6060: 玻珀珥珮珞璢琅瑯琥珸琲琺瑕琿瑟瑙瑁瑜瑩瑰瑣瑪瑶瑾璋璞璧瓊瓏瓔珱 +0x6120: 瓠瓣瓧瓩瓮瓲瓰瓱瓸瓷甄甃甅甌甎甍甕甓甞甦甬甼畄畍畊畉畛畆畚畩畤 +0x6140: 畧畫畭畸當疆疇畴疊疉疂疔疚疝疥疣痂疳痃疵疽疸疼疱痍痊痒痙痣痞痾痿 +0x6160: 痼瘁痰痺痲痳瘋瘍瘉瘟瘧瘠瘡瘢瘤瘴瘰瘻癇癈癆癜癘癡癢癨癩癪癧癬癰 +0x6220: 癲癶癸發皀皃皈皋皎皖皓皙皚皰皴皸皹皺盂盍盖盒盞盡盥盧盪蘯盻眈眇 +0x6240: 眄眩眤眞眥眦眛眷眸睇睚睨睫睛睥睿睾睹瞎瞋瞑瞠瞞瞰瞶瞹瞿瞼瞽瞻矇矍 +0x6260: 矗矚矜矣矮矼砌砒礦砠礪硅碎硴碆硼碚碌碣碵碪碯磑磆磋磔碾碼磅磊磬 +0x6320: 磧磚磽磴礇礒礑礙礬礫祀祠祗祟祚祕祓祺祿禊禝禧齋禪禮禳禹禺秉秕秧 +0x6340: 秬秡秣稈稍稘稙稠稟禀稱稻稾稷穃穗穉穡穢穩龝穰穹穽窈窗窕窘窖窩竈窰 +0x6360: 窶竅竄窿邃竇竊竍竏竕竓站竚竝竡竢竦竭竰笂笏笊笆笳笘笙笞笵笨笶筐 +0x6420: 筺笄筍笋筌筅筵筥筴筧筰筱筬筮箝箘箟箍箜箚箋箒箏筝箙篋篁篌篏箴篆 +0x6440: 篝篩簑簔篦篥籠簀簇簓篳篷簗簍篶簣簧簪簟簷簫簽籌籃籔籏籀籐籘籟籤籖 +0x6460: 籥籬籵粃粐粤粭粢粫粡粨粳粲粱粮粹粽糀糅糂糘糒糜糢鬻糯糲糴糶糺紆 +0x6520: 紂紜紕紊絅絋紮紲紿紵絆絳絖絎絲絨絮絏絣經綉絛綏絽綛綺綮綣綵緇綽 +0x6540: 綫總綢綯緜綸綟綰緘緝緤緞緻緲緡縅縊縣縡縒縱縟縉縋縢繆繦縻縵縹繃縷 +0x6560: 縲縺繧繝繖繞繙繚繹繪繩繼繻纃緕繽辮繿纈纉續纒纐纓纔纖纎纛纜缸缺 +0x6620: 罅罌罍罎罐网罕罔罘罟罠罨罩罧罸羂羆羃羈羇羌羔羞羝羚羣羯羲羹羮羶 +0x6640: 羸譱翅翆翊翕翔翡翦翩翳翹飜耆耄耋耒耘耙耜耡耨耿耻聊聆聒聘聚聟聢聨 +0x6660: 聳聲聰聶聹聽聿肄肆肅肛肓肚肭冐肬胛胥胙胝胄胚胖脉胯胱脛脩脣脯腋 +0x6720: 隋腆脾腓腑胼腱腮腥腦腴膃膈膊膀膂膠膕膤膣腟膓膩膰膵膾膸膽臀臂膺 +0x6740: 臉臍臑臙臘臈臚臟臠臧臺臻臾舁舂舅與舊舍舐舖舩舫舸舳艀艙艘艝艚艟艤 +0x6760: 艢艨艪艫舮艱艷艸艾芍芒芫芟芻芬苡苣苟苒苴苳苺莓范苻苹苞茆苜茉苙 +0x6820: 茵茴茖茲茱荀茹荐荅茯茫茗茘莅莚莪莟莢莖茣莎莇莊荼莵荳荵莠莉莨菴 +0x6840: 萓菫菎菽萃菘萋菁菷萇菠菲萍萢萠莽萸蔆菻葭萪萼蕚蒄葷葫蒭葮蒂葩葆萬 +0x6860: 葯葹萵蓊葢蒹蒿蒟蓙蓍蒻蓚蓐蓁蓆蓖蒡蔡蓿蓴蔗蔘蔬蔟蔕蔔蓼蕀蕣蕘蕈 +0x6920: 蕁蘂蕋蕕薀薤薈薑薊薨蕭薔薛藪薇薜蕷蕾薐藉薺藏薹藐藕藝藥藜藹蘊蘓 +0x6940: 蘋藾藺蘆蘢蘚蘰蘿虍乕虔號虧虱蚓蚣蚩蚪蚋蚌蚶蚯蛄蛆蚰蛉蠣蚫蛔蛞蛩蛬 +0x6960: 蛟蛛蛯蜒蜆蜈蜀蜃蛻蜑蜉蜍蛹蜊蜴蜿蜷蜻蜥蜩蜚蝠蝟蝸蝌蝎蝴蝗蝨蝮蝙 +0x6a20: 蝓蝣蝪蠅螢螟螂螯蟋螽蟀蟐雖螫蟄螳蟇蟆螻蟯蟲蟠蠏蠍蟾蟶蟷蠎蟒蠑蠖 +0x6a40: 蠕蠢蠡蠱蠶蠹蠧蠻衄衂衒衙衞衢衫袁衾袞衵衽袵衲袂袗袒袮袙袢袍袤袰袿 +0x6a60: 袱裃裄裔裘裙裝裹褂裼裴裨裲褄褌褊褓襃褞褥褪褫襁襄褻褶褸襌褝襠襞 +0x6b20: 襦襤襭襪襯襴襷襾覃覈覊覓覘覡覩覦覬覯覲覺覽覿觀觚觜觝觧觴觸訃訖 +0x6b40: 訐訌訛訝訥訶詁詛詒詆詈詼詭詬詢誅誂誄誨誡誑誥誦誚誣諄諍諂諚諫諳諧 +0x6b60: 諤諱謔諠諢諷諞諛謌謇謚諡謖謐謗謠謳鞫謦謫謾謨譁譌譏譎證譖譛譚譫 +0x6c20: 譟譬譯譴譽讀讌讎讒讓讖讙讚谺豁谿豈豌豎豐豕豢豬豸豺貂貉貅貊貍貎 +0x6c40: 貔豼貘戝貭貪貽貲貳貮貶賈賁賤賣賚賽賺賻贄贅贊贇贏贍贐齎贓賍贔贖赧 +0x6c60: 赭赱赳趁趙跂趾趺跏跚跖跌跛跋跪跫跟跣跼踈踉跿踝踞踐踟蹂踵踰踴蹊 +0x6d20: 蹇蹉蹌蹐蹈蹙蹤蹠踪蹣蹕蹶蹲蹼躁躇躅躄躋躊躓躑躔躙躪躡躬躰軆躱躾 +0x6d40: 軅軈軋軛軣軼軻軫軾輊輅輕輒輙輓輜輟輛輌輦輳輻輹轅轂輾轌轉轆轎轗轜 +0x6d60: 轢轣轤辜辟辣辭辯辷迚迥迢迪迯邇迴逅迹迺逑逕逡逍逞逖逋逧逶逵逹迸 +0x6e20: 遏遐遑遒逎遉逾遖遘遞遨遯遶隨遲邂遽邁邀邊邉邏邨邯邱邵郢郤扈郛鄂 +0x6e40: 鄒鄙鄲鄰酊酖酘酣酥酩酳酲醋醉醂醢醫醯醪醵醴醺釀釁釉釋釐釖釟釡釛釼 +0x6e60: 釵釶鈞釿鈔鈬鈕鈑鉞鉗鉅鉉鉤鉈銕鈿鉋鉐銜銖銓銛鉚鋏銹銷鋩錏鋺鍄錮 +0x6f20: 錙錢錚錣錺錵錻鍜鍠鍼鍮鍖鎰鎬鎭鎔鎹鏖鏗鏨鏥鏘鏃鏝鏐鏈鏤鐚鐔鐓鐃 +0x6f40: 鐇鐐鐶鐫鐵鐡鐺鑁鑒鑄鑛鑠鑢鑞鑪鈩鑰鑵鑷鑽鑚鑼鑾钁鑿閂閇閊閔閖閘閙 +0x6f60: 閠閨閧閭閼閻閹閾闊濶闃闍闌闕闔闖關闡闥闢阡阨阮阯陂陌陏陋陷陜陞 +0x7020: 陝陟陦陲陬隍隘隕隗險隧隱隲隰隴隶隸隹雎雋雉雍襍雜霍雕雹霄霆霈霓 +0x7040: 霎霑霏霖霙霤霪霰霹霽霾靄靆靈靂靉靜靠靤靦靨勒靫靱靹鞅靼鞁靺鞆鞋鞏 +0x7060: 鞐鞜鞨鞦鞣鞳鞴韃韆韈韋韜韭齏韲竟韶韵頏頌頸頤頡頷頽顆顏顋顫顯顰 +0x7120: 顱顴顳颪颯颱颶飄飃飆飩飫餃餉餒餔餘餡餝餞餤餠餬餮餽餾饂饉饅饐饋 +0x7140: 饑饒饌饕馗馘馥馭馮馼駟駛駝駘駑駭駮駱駲駻駸騁騏騅駢騙騫騷驅驂驀驃 +0x7160: 騾驕驍驛驗驟驢驥驤驩驫驪骭骰骼髀髏髑髓體髞髟髢髣髦髯髫髮髴髱髷 +0x7220: 髻鬆鬘鬚鬟鬢鬣鬥鬧鬨鬩鬪鬮鬯鬲魄魃魏魍魎魑魘魴鮓鮃鮑鮖鮗鮟鮠鮨 +0x7240: 鮴鯀鯊鮹鯆鯏鯑鯒鯣鯢鯤鯔鯡鰺鯲鯱鯰鰕鰔鰉鰓鰌鰆鰈鰒鰊鰄鰮鰛鰥鰤鰡 +0x7260: 鰰鱇鰲鱆鰾鱚鱠鱧鱶鱸鳧鳬鳰鴉鴈鳫鴃鴆鴪鴦鶯鴣鴟鵄鴕鴒鵁鴿鴾鵆鵈 +0x7320: 鵝鵞鵤鵑鵐鵙鵲鶉鶇鶫鵯鵺鶚鶤鶩鶲鷄鷁鶻鶸鶺鷆鷏鷂鷙鷓鷸鷦鷭鷯鷽 +0x7340: 鸚鸛鸞鹵鹹鹽麁麈麋麌麒麕麑麝麥麩麸麪麭靡黌黎黏黐黔黜點黝黠黥黨黯 +0x7360: 黴黶黷黹黻黼黽鼇鼈皷鼕鼡鼬鼾齊齒齔齣齟齠齡齦齧齬齪齷齲齶龕龜龠 +0x7420: 堯槇遙瑤凜熙 diff --git a/t/jisx0212.enc b/t/jisx0212.enc new file mode 100644 index 0000000..f6910c3 --- /dev/null +++ b/t/jisx0212.enc @@ -0,0 +1,200 @@ +0x2220: ������������������������~������ +0x2240: ���Ï�� +0x2260: ��돢쏢폢���� +0x2660: ��Ꮶ⏦㏦䏦� ��� ��鏦� ��� ���������������������������� +0x2740: ���Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�� +0x2760: ��������������������������������� +0x2920: ������ ��� ��� ������ ��������� ������ +0x2940: ������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�� +0x2a20: ������������������������������������������������������������������������ ������������������ +0x2a40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x2a60: �����Ꮺ⏪㏪䏪只揪珪菪鏪ꏪ돪쏪폪�������������� +0x2b20: ��������������������������������������������������������������������������������� ��������� +0x2b40: ���������� ��ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x2b60: �����Ꮻ⏫㏫䏫叫揫珫菫鏫ꏫ돫쏫폫�������������� +0x3020: ��������������������������������������������������������������������������������������������� +0x3040: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3060: �����Ᏸ⏰㏰䏰台揰珰菰鏰ꏰ돰쏰폰����������������������������������� +0x3120: ��������������������������������������������������������������������������������������������� +0x3140: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3160: �����Ᏹ⏱㏱䏱叱揱珱菱鏱ꏱ돱쏱폱����������������������������������� +0x3220: ��������������������������������������������������������������������������������������������� +0x3240: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3260: �����Ᏺ⏲㏲䏲史揲珲菲鏲ꏲ돲쏲폲����������������������������������� +0x3320: ��������������������������������������������������������������������������������������������� +0x3340: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3360: �����Ᏻ⏳㏳䏳右揳珳菳鏳ꏳ돳쏳폳����������������������������������� +0x3420: ��������������������������������������������������������������������������������������������� +0x3440: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3460: �����Ᏼ⏴㏴䏴叴援珴菴鏴ꏴ돴쏴폴����������������������������������� +0x3520: ��������������������������������������������������������������������������������������������� +0x3540: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3560: �����Ᏽ⏵㏵䏵叵揵珵菵鏵ꏵ돵쏵폵����������������������������������� +0x3620: ��������������������������������������������������������������������������������������������� +0x3640: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3660: �����᏶⏶㏶䏶叶揶珶菶鏶ꏶ돶쏶폶����������������������������������� +0x3720: ��������������������������������������������������������������������������������������������� +0x3740: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3760: �����᏷⏷㏷䏷号揷珷菷鏷ꏷ돷쏷폷����������������������������������� +0x3820: ��������������������������������������������������������������������������������������������� +0x3840: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3860: �����ᏸ⏸㏸䏸司揸珸菸鏸ꏸ돸쏸폸����������������������������������� +0x3920: ��������������������������������������������������������������������������������������������� +0x3940: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3960: �����ᏹ⏹㏹䏹叹揹珹菹鏹ꏹ돹쏹폹����������������������������������� +0x3a20: ��������������������������������������������������������������������������������������������� +0x3a40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3a60: �����ᏺ⏺㏺䏺叺揺珺菺鏺ꏺ돺쏺폺����������������������������������� +0x3b20: ��������������������������������������������������������������������������������������������� +0x3b40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3b60: �����ᏻ⏻㏻䏻叻揻珻菻鏻ꏻ돻쏻폻����������������������������������� +0x3c20: ��������������������������������������������������������������������������������������������� +0x3c40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3c60: �����ᏼ⏼㏼䏼叼揼珼菼鏼ꏼ돼쏼폼����������������������������������� +0x3d20: ��������������������������������������������������������������������������������������������� +0x3d40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3d60: �����ᏽ⏽㏽䏽叽揽珽菽鏽ꏽ돽쏽폽����������������������������������� +0x3e20: ��������������������������������������������������������������������������������������������� +0x3e40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3e60: �����᏾⏾㏾䏾叾揾現菾鏾ꏾ돾쏾폾����������������������������������� +0x3f20: ��������������������������������������������������������������������������������������������� +0x3f40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x3f60: �����᏿⏿㏿䏿叿揿珿菿鏿ꏿ돿쏿폿����������������������������������� +0x4020: ��������������������������������������������������������������������������������������������� +0x4040: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4060: �������������������������������������������������������������������������� +0x4120: ��������������������������������������������������������������������������������������������� +0x4140: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4160: �������������������������������������������������������������������������� +0x4220: �¡�¢�£�¤�¥�¦�§�¨�©�ª�«�¬�­�®�¯�°�±�²�³�´�µ�¶�·�¸�¹�º�»�¼�½�¾�¿ +0x4240: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4260: �������������������������������������������������������������������������� +0x4320: �á�â�ã�ä�å�æ�ç�è�é�ê�ë�ì�í�î�ï�ð�ñ�ò�ó�ô�õ�ö�÷�ø�ù�ú�û�ü�ý�þ�ÿ +0x4340: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4360: �������������������������������������������������������������������������� +0x4420: �ġ�Ģ�ģ�Ĥ�ĥ�Ħ�ħ�Ĩ�ĩ�Ī�ī�Ĭ�ĭ�Į�į�İ�ı�IJ�ij�Ĵ�ĵ�Ķ�ķ�ĸ�Ĺ�ĺ�Ļ�ļ�Ľ�ľ�Ŀ +0x4440: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4460: �������������������������������������������������������������������������� +0x4520: �š�Ţ�ţ�Ť�ť�Ŧ�ŧ�Ũ�ũ�Ū�ū�Ŭ�ŭ�Ů�ů�Ű�ű�Ų�ų�Ŵ�ŵ�Ŷ�ŷ�Ÿ�Ź�ź�Ż�ż�Ž�ž�ſ +0x4540: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4560: �������������������������������������������������������������������������� +0x4620: �ơ�Ƣ�ƣ�Ƥ�ƥ�Ʀ�Ƨ�ƨ�Ʃ�ƪ�ƫ�Ƭ�ƭ�Ʈ�Ư�ư�Ʊ�Ʋ�Ƴ�ƴ�Ƶ�ƶ�Ʒ�Ƹ�ƹ�ƺ�ƻ�Ƽ�ƽ�ƾ�ƿ +0x4640: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4660: �������������������������������������������������������������������������� +0x4720: �ǡ�Ǣ�ǣ�Ǥ�ǥ�Ǧ�ǧ�Ǩ�ǩ�Ǫ�ǫ�Ǭ�ǭ�Ǯ�ǯ�ǰ�DZ�Dz�dz�Ǵ�ǵ�Ƕ�Ƿ�Ǹ�ǹ�Ǻ�ǻ�Ǽ�ǽ�Ǿ�ǿ +0x4740: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4760: �������������������������������������������������������������������������� +0x4820: �ȡ�Ȣ�ȣ�Ȥ�ȥ�Ȧ�ȧ�Ȩ�ȩ�Ȫ�ȫ�Ȭ�ȭ�Ȯ�ȯ�Ȱ�ȱ�Ȳ�ȳ�ȴ�ȵ�ȶ�ȷ�ȸ�ȹ�Ⱥ�Ȼ�ȼ�Ƚ�Ⱦ�ȿ +0x4840: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4860: �������������������������������������������������������������������������� +0x4920: �ɡ�ɢ�ɣ�ɤ�ɥ�ɦ�ɧ�ɨ�ɩ�ɪ�ɫ�ɬ�ɭ�ɮ�ɯ�ɰ�ɱ�ɲ�ɳ�ɴ�ɵ�ɶ�ɷ�ɸ�ɹ�ɺ�ɻ�ɼ�ɽ�ɾ�ɿ +0x4940: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4960: �������������������������������������������������������������������������� +0x4a20: �ʡ�ʢ�ʣ�ʤ�ʥ�ʦ�ʧ�ʨ�ʩ�ʪ�ʫ�ʬ�ʭ�ʮ�ʯ�ʰ�ʱ�ʲ�ʳ�ʴ�ʵ�ʶ�ʷ�ʸ�ʹ�ʺ�ʻ�ʼ�ʽ�ʾ�ʿ +0x4a40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4a60: �������������������������������������������������������������������������� +0x4b20: �ˡ�ˢ�ˣ�ˤ�˥�˦�˧�˨�˩�˪�˫�ˬ�˭�ˮ�˯�˰�˱�˲�˳�˴�˵�˶�˷�˸�˹�˺�˻�˼�˽�˾�˿ +0x4b40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4b60: �������������������������������������������������������������������������� +0x4c20: �̡�̢�̣�̤�̥�̦�̧�̨�̩�̪�̫�̬�̭�̮�̯�̰�̱�̲�̳�̴�̵�̶�̷�̸�̹�̺�̻�̼�̽�̾�̿ +0x4c40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4c60: �������������������������������������������������������������������������� +0x4d20: �͡�͢�ͣ�ͤ�ͥ�ͦ�ͧ�ͨ�ͩ�ͪ�ͫ�ͬ�ͭ�ͮ�ͯ�Ͱ�ͱ�Ͳ�ͳ�ʹ�͵�Ͷ�ͷ�͸�͹�ͺ�ͻ�ͼ�ͽ�;�Ϳ +0x4d40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4d60: �������������������������������������������������������������������������� +0x4e20: �Ρ�΢�Σ�Τ�Υ�Φ�Χ�Ψ�Ω�Ϊ�Ϋ�ά�έ�ή�ί�ΰ�α�β�γ�δ�ε�ζ�η�θ�ι�κ�λ�μ�ν�ξ�ο +0x4e40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4e60: �������������������������������������������������������������������������� +0x4f20: �ϡ�Ϣ�ϣ�Ϥ�ϥ�Ϧ�ϧ�Ϩ�ϩ�Ϫ�ϫ�Ϭ�ϭ�Ϯ�ϯ�ϰ�ϱ�ϲ�ϳ�ϴ�ϵ�϶�Ϸ�ϸ�Ϲ�Ϻ�ϻ�ϼ�Ͻ�Ͼ�Ͽ +0x4f40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x4f60: �������������������������������������������������������������������������� +0x5020: �С�Т�У�Ф�Х�Ц�Ч�Ш�Щ�Ъ�Ы�Ь�Э�Ю�Я�а�б�в�г�д�е�ж�з�и�й�к�л�м�н�о�п +0x5040: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5060: �������������������������������������������������������������������������� +0x5120: �ѡ�Ѣ�ѣ�Ѥ�ѥ�Ѧ�ѧ�Ѩ�ѩ�Ѫ�ѫ�Ѭ�ѭ�Ѯ�ѯ�Ѱ�ѱ�Ѳ�ѳ�Ѵ�ѵ�Ѷ�ѷ�Ѹ�ѹ�Ѻ�ѻ�Ѽ�ѽ�Ѿ�ѿ +0x5140: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5160: �������������������������������������������������������������������������� +0x5220: �ҡ�Ң�ң�Ҥ�ҥ�Ҧ�ҧ�Ҩ�ҩ�Ҫ�ҫ�Ҭ�ҭ�Ү�ү�Ұ�ұ�Ҳ�ҳ�Ҵ�ҵ�Ҷ�ҷ�Ҹ�ҹ�Һ�һ�Ҽ�ҽ�Ҿ�ҿ +0x5240: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5260: �������������������������������������������������������������������������� +0x5320: �ӡ�Ӣ�ӣ�Ӥ�ӥ�Ӧ�ӧ�Ө�ө�Ӫ�ӫ�Ӭ�ӭ�Ӯ�ӯ�Ӱ�ӱ�Ӳ�ӳ�Ӵ�ӵ�Ӷ�ӷ�Ӹ�ӹ�Ӻ�ӻ�Ӽ�ӽ�Ӿ�ӿ +0x5340: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5360: �������������������������������������������������������������������������� +0x5420: �ԡ�Ԣ�ԣ�Ԥ�ԥ�Ԧ�ԧ�Ԩ�ԩ�Ԫ�ԫ�Ԭ�ԭ�Ԯ�ԯ�԰�Ա�Բ�Գ�Դ�Ե�Զ�Է�Ը�Թ�Ժ�Ի�Լ�Խ�Ծ�Կ +0x5440: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5460: �������������������������������������������������������������������������� +0x5520: �ա�բ�գ�դ�ե�զ�է�ը�թ�ժ�ի�լ�խ�ծ�կ�հ�ձ�ղ�ճ�մ�յ�ն�շ�ո�չ�պ�ջ�ռ�ս�վ�տ +0x5540: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5560: �������������������������������������������������������������������������� +0x5620: �֡�֢�֣�֤�֥�֦�֧�֨�֩�֪�֫�֬�֭�֮�֯�ְ�ֱ�ֲ�ֳ�ִ�ֵ�ֶ�ַ�ָ�ֹ�ֺ�ֻ�ּ�ֽ�־�ֿ +0x5640: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5660: �������������������������������������������������������������������������� +0x5720: �ס�ע�ף�פ�ץ�צ�ק�ר�ש�ת�׫�׬�׭�׮�ׯ�װ�ױ�ײ�׳�״�׵�׶�׷�׸�׹�׺�׻�׼�׽�׾�׿ +0x5740: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5760: �������������������������������������������������������������������������� +0x5820: �ء�آ�أ�ؤ�إ�ئ�ا�ب�ة�ت�ث�ج�ح�خ�د�ذ�ر�ز�س�ش�ص�ض�ط�ظ�ع�غ�ػ�ؼ�ؽ�ؾ�ؿ +0x5840: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5860: �������������������������������������������������������������������������� +0x5920: �١�٢�٣�٤�٥�٦�٧�٨�٩�٪�٫�٬�٭�ٮ�ٯ�ٰ�ٱ�ٲ�ٳ�ٴ�ٵ�ٶ�ٷ�ٸ�ٹ�ٺ�ٻ�ټ�ٽ�پ�ٿ +0x5940: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5960: �������������������������������������������������������������������������� +0x5a20: �ڡ�ڢ�ڣ�ڤ�ڥ�ڦ�ڧ�ڨ�ک�ڪ�ګ�ڬ�ڭ�ڮ�گ�ڰ�ڱ�ڲ�ڳ�ڴ�ڵ�ڶ�ڷ�ڸ�ڹ�ں�ڻ�ڼ�ڽ�ھ�ڿ +0x5a40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5a60: �������������������������������������������������������������������������� +0x5b20: �ۡ�ۢ�ۣ�ۤ�ۥ�ۦ�ۧ�ۨ�۩�۪�۫�۬�ۭ�ۮ�ۯ�۰�۱�۲�۳�۴�۵�۶�۷�۸�۹�ۺ�ۻ�ۼ�۽�۾�ۿ +0x5b40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5b60: �������������������������������������������������������������������������� +0x5c20: �ܡ�ܢ�ܣ�ܤ�ܥ�ܦ�ܧ�ܨ�ܩ�ܪ�ܫ�ܬ�ܭ�ܮ�ܯ�ܰ�ܱ�ܲ�ܳ�ܴ�ܵ�ܶ�ܷ�ܸ�ܹ�ܺ�ܻ�ܼ�ܽ�ܾ�ܿ +0x5c40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5c60: �������������������������������������������������������������������������� +0x5d20: �ݡ�ݢ�ݣ�ݤ�ݥ�ݦ�ݧ�ݨ�ݩ�ݪ�ݫ�ݬ�ݭ�ݮ�ݯ�ݰ�ݱ�ݲ�ݳ�ݴ�ݵ�ݶ�ݷ�ݸ�ݹ�ݺ�ݻ�ݼ�ݽ�ݾ�ݿ +0x5d40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5d60: �������������������������������������������������������������������������� +0x5e20: �ޡ�ޢ�ޣ�ޤ�ޥ�ަ�ާ�ި�ީ�ު�ޫ�ެ�ޭ�ޮ�ޯ�ް�ޱ�޲�޳�޴�޵�޶�޷�޸�޹�޺�޻�޼�޽�޾�޿ +0x5e40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5e60: �������������������������������������������������������������������������� +0x5f20: �ߡ�ߢ�ߣ�ߤ�ߥ�ߦ�ߧ�ߨ�ߩ�ߪ�߫�߬�߭�߮�߯�߰�߱�߲�߳�ߴ�ߵ�߶�߷�߸�߹�ߺ�߻�߼�߽�߾�߿ +0x5f40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x5f60: �������������������������������������������������������������������������� +0x6020: �ࡏ࢏࣏एॏএ৏ਏ੏એ૏ଏ୏ஏ௏ఏ౏ಏ೏ഏ൏ඏාฏ๏ຏ໏༏ཏྏ� +0x6040: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6060: �������������������������������������������������������������������������� +0x6120: �ᡏᢏᣏᤏ᥏ᦏ᧏ᨏᩏ᪏᫏ᬏ᭏ᮏᯏᰏᱏ᲏᳏ᴏᵏᶏ᷏ḏṏẏỏἏ὏ᾏ� +0x6140: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6160: �������������������������������������������������������������������������� +0x6220: �⡏⢏⣏⤏⥏⦏⧏⨏⩏⪏⫏⬏⭏⮏⯏Ⰿⱏⲏⳏⴏⵏⶏ⷏⸏⹏⺏⻏⼏⽏⾏� +0x6240: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6260: �������������������������������������������������������������������������� +0x6320: �㡏㢏㣏㤏㥏㦏㧏㨏㩏㪏㫏㬏㭏㮏㯏㰏㱏㲏㳏㴏㵏㶏㷏㸏㹏㺏㻏㼏㽏㾏� +0x6340: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6360: �������������������������������������������������������������������������� +0x6420: �䡏䢏䣏䤏䥏䦏䧏䨏䩏䪏䫏䬏䭏䮏䯏䰏䱏䲏䳏䴏䵏䶏䷏丏乏亏仏伏住侏� +0x6440: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6460: �������������������������������������������������������������������������� +0x6520: �塏墏壏夏奏妏姏娏婏媏嫏嬏孏宏寏小屏岏峏崏嵏嶏巏帏幏序廏式彏徏� +0x6540: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6560: �������������������������������������������������������������������������� +0x6620: �桏梏棏椏楏榏槏樏橏檏櫏欏歏殏每氏汏沏泏洏浏涏淏渏湏溏滏漏潏澏� +0x6640: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6660: �������������������������������������������������������������������������� +0x6720: �硏碏磏礏祏福秏稏穏窏竏笏筏箏篏簏籏粏糏紏絏綏総縏繏纏经缏罏羏� +0x6740: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6760: �������������������������������������������������������������������������� +0x6820: �衏袏裏褏襏規觏訏詏誏諏謏譏讏诏谏豏貏賏贏赏趏跏踏蹏躏軏輏轏辏� +0x6840: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6860: �������������������������������������������������������������������������� +0x6920: �顏颏飏餏饏馏駏騏驏骏髏鬏魏鮏鯏鰏鱏鲏鳏鴏鵏鶏鷏鸏鹏麏黏鼏齏龏� +0x6940: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6960: �������������������������������������������������������������������������� +0x6a20: �ꡏꢏ꣏ꤏꥏꦏꧏꨏ꩏ꪏ꫏꬏ꭏꮏꯏ갏걏겏곏괏굏궏귏긏깏꺏껏꼏꽏꾏� +0x6a40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6a60: �������������������������������������������������������������������������� +0x6b20: �롏뢏룏뤏륏릏맏먏멏몏뫏묏뭏뮏믏및뱏벏볏봏뵏붏뷏븏빏뺏뻏뼏뽏뾏� +0x6b40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6b60: �������������������������������������������������������������������������� +0x6c20: �졏좏죏줏쥏즏짏쨏쩏쪏쫏쬏쭏쮏쯏찏챏첏쳏촏쵏춏췏츏칏캏컏켏콏쾏� +0x6c40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6c60: �������������������������������������������������������������������������� +0x6d20: ��������������������������������������������������������������������������������������������� +0x6d40: ���������Ï�ď�ŏ�Ə�Ǐ�ȏ�ɏ�ʏ�ˏ�̏�͏�Ώ�Ϗ�Џ�я�ҏ�ӏ�ԏ�Տ�֏�׏�؏�ُ�ڏ�ۏ�܏�ݏ�ޏ�� +0x6d60: ���������� diff --git a/t/jisx0212.utf b/t/jisx0212.utf new file mode 100644 index 0000000..84d221c --- /dev/null +++ b/t/jisx0212.utf @@ -0,0 +1,200 @@ +0x2220: ˘ˇ¸˙˝¯˛˚~΄΅ +0x2240: ¡¦¿ +0x2260: ºª©®™¤№ +0x2660: ΆΈΉΊΪ Ό ΎΫ Ώ άέήίϊΐόςύϋΰώ +0x2740: ЂЃЄЅІЇЈЉЊЋЌЎЏ +0x2760: ђѓєѕіїјљњћќўџ +0x2920: ÆĐ Ħ IJ ŁĿ ŊØŒ ŦÞ +0x2940: æđðħıijĸłŀʼnŋøœßŧþ +0x2a20: ÁÀÄÂĂǍĀĄÅÃĆĈČÇĊĎÉÈËÊĚĖĒĘ ĜĞĢĠĤÍ +0x2a40: ÌÏÎǏİĪĮĨĴĶĹĽĻŃŇŅÑÓÒÖÔǑŐŌÕŔŘŖŚŜŠŞ +0x2a60: ŤŢÚÙÜÛŬǓŰŪŲŮŨǗǛǙǕŴÝŸŶŹŽŻ +0x2b20: áàäâăǎāąåãćĉčçċďéèëêěėēęǵĝğ ġĥí +0x2b40: ìïîǐ īįĩĵķĺľļńňņñóòöôǒőōõŕřŗśŝšş +0x2b60: ťţúùüûŭǔűūųůũǘǜǚǖŵýÿŷźžż +0x3020: 丂丄丅丌丒丟丣两丨丫丮丯丰丵乀乁乄乇乑乚乜乣乨乩乴乵乹乿亍亖亗 +0x3040: 亝亯亹仃仐仚仛仠仡仢仨仯仱仳仵份仾仿伀伂伃伈伋伌伒伕伖众伙伮伱你 +0x3060: 伳伵伷伹伻伾佀佂佈佉佋佌佒佔佖佘佟佣佪佬佮佱佷佸佹佺佽佾侁侂侄 +0x3120: 侅侉侊侌侎侐侒侓侔侗侙侚侞侟侲侷侹侻侼侽侾俀俁俅俆俈俉俋俌俍俏 +0x3140: 俒俜俠俢俰俲俼俽俿倀倁倄倇倊倌倎倐倓倗倘倛倜倝倞倢倧倮倰倲倳倵偀 +0x3160: 偁偂偅偆偊偌偎偑偒偓偗偙偟偠偢偣偦偧偪偭偰偱倻傁傃傄傆傊傎傏傐 +0x3220: 傒傓傔傖傛傜傞傟傠傡傢傪傯傰傹傺傽僀僃僄僇僌僎僐僓僔僘僜僝僟僢 +0x3240: 僤僦僨僩僯僱僶僺僾儃儆儇儈儋儌儍儎僲儐儗儙儛儜儝儞儣儧儨儬儭儯儱 +0x3260: 儳儴儵儸儹兂兊兏兓兕兗兘兟兤兦兾冃冄冋冎冘冝冡冣冭冸冺冼冾冿凂 +0x3320: 凈减凑凒凓凕凘凞凢凥凮凲凳凴凷刁刂刅划刓刕刖刘刢刨刱刲刵刼剅剉 +0x3340: 剕剗剘剚剜剟剠剡剦剮剷剸剹劀劂劅劊劌劓劕劖劗劘劚劜劤劥劦劧劯劰劶 +0x3360: 劷劸劺劻劽勀勄勆勈勌勏勑勔勖勛勜勡勥勨勩勪勬勰勱勴勶勷匀匃匊匋 +0x3420: 匌匑匓匘匛匜匞匟匥匧匨匩匫匬匭匰匲匵匼匽匾卂卌卋卙卛卡卣卥卬卭 +0x3440: 卲卹卾厃厇厈厎厓厔厙厝厡厤厪厫厯厲厴厵厷厸厺厽叀叅叏叒叓叕叚叝叞 +0x3460: 叠另叧叵吂吓吚吡吧吨吪启吱吴吵呃呄呇呍呏呞呢呤呦呧呩呫呭呮呴呿 +0x3520: 咁咃咅咈咉咍咑咕咖咜咟咡咦咧咩咪咭咮咱咷咹咺咻咿哆哊响哎哠哪哬 +0x3540: 哯哶哼哾哿唀唁唅唈唉唌唍唎唕唪唫唲唵唶唻唼唽啁啇啉啊啍啐啑啘啚啛 +0x3560: 啞啠啡啤啦啿喁喂喆喈喎喏喑喒喓喔喗喣喤喭喲喿嗁嗃嗆嗉嗋嗌嗎嗑嗒 +0x3620: 嗓嗗嗘嗛嗞嗢嗩嗶嗿嘅嘈嘊嘍嘎嘏嘐嘑嘒嘙嘬嘰嘳嘵嘷嘹嘻嘼嘽嘿噀噁 +0x3640: 噃噄噆噉噋噍噏噔噞噠噡噢噣噦噩噭噯噱噲噵嚄嚅嚈嚋嚌嚕嚙嚚嚝嚞嚟嚦 +0x3660: 嚧嚨嚩嚫嚬嚭嚱嚳嚷嚾囅囉囊囋囏囐囌囍囙囜囝囟囡囤囥囦囧囨囱囫园 +0x3720: 囶囷圁圂圇圊圌圑圕圚圛圝圠圢圣圤圥圩圪圬圮圯圳圴圽圾圿坅坆坌坍 +0x3740: 坒坢坥坧坨坫坭坮坯坰坱坳坴坵坷坹坺坻坼坾垁垃垌垔垗垙垚垜垝垞垟垡 +0x3760: 垕垧垨垩垬垸垽埇埈埌埏埕埝埞埤埦埧埩埭埰埵埶埸埽埾埿堃堄堈堉埡 +0x3820: 堌堍堛堞堟堠堦堧堭堲堹堿塉塌塍塏塐塕塟塡塤塧塨塸塼塿墀墁墇墈墉 +0x3840: 墊墌墍墏墐墔墖墝墠墡墢墦墩墱墲壄墼壂壈壍壎壐壒壔壖壚壝壡壢壩壳夅 +0x3860: 夆夋夌夒夓夔虁夝夡夣夤夨夯夰夳夵夶夿奃奆奒奓奙奛奝奞奟奡奣奫奭 +0x3920: 奯奲奵奶她奻奼妋妌妎妒妕妗妟妤妧妭妮妯妰妳妷妺妼姁姃姄姈姊姍姒 +0x3940: 姝姞姟姣姤姧姮姯姱姲姴姷娀娄娌娍娎娒娓娞娣娤娧娨娪娭娰婄婅婇婈婌 +0x3960: 婐婕婞婣婥婧婭婷婺婻婾媋媐媓媖媙媜媞媟媠媢媧媬媱媲媳媵媸媺媻媿 +0x3a20: 嫄嫆嫈嫏嫚嫜嫠嫥嫪嫮嫵嫶嫽嬀嬁嬈嬗嬴嬙嬛嬝嬡嬥嬭嬸孁孋孌孒孖孞 +0x3a40: 孨孮孯孼孽孾孿宁宄宆宊宎宐宑宓宔宖宨宩宬宭宯宱宲宷宺宼寀寁寍寏寖 +0x3a60: 寗寘寙寚寠寯寱寴寽尌尗尞尟尣尦尩尫尬尮尰尲尵尶屙屚屜屢屣屧屨屩 +0x3b20: 屭屰屴屵屺屻屼屽岇岈岊岏岒岝岟岠岢岣岦岪岲岴岵岺峉峋峒峝峗峮峱 +0x3b40: 峲峴崁崆崍崒崫崣崤崦崧崱崴崹崽崿嵂嵃嵆嵈嵕嵑嵙嵊嵟嵠嵡嵢嵤嵪嵭嵰 +0x3b60: 嵹嵺嵾嵿嶁嶃嶈嶊嶒嶓嶔嶕嶙嶛嶟嶠嶧嶫嶰嶴嶸嶹巃巇巋巐巎巘巙巠巤 +0x3c20: 巩巸巹帀帇帍帒帔帕帘帟帠帮帨帲帵帾幋幐幉幑幖幘幛幜幞幨幪幫幬幭 +0x3c40: 幮幰庀庋庎庢庤庥庨庪庬庱庳庽庾庿廆廌廋廎廑廒廔廕廜廞廥廫异弆弇弈 +0x3c60: 弎弙弜弝弡弢弣弤弨弫弬弮弰弴弶弻弽弿彀彄彅彇彍彐彔彘彛彠彣彤彧 +0x3d20: 彯彲彴彵彸彺彽彾徉徍徏徖徜徝徢徧徫徤徬徯徰徱徸忄忇忈忉忋忐忑忒 +0x3d40: 忓忔忞忡忢忨忩忪忬忭忮忯忲忳忶忺忼怇怊怍怓怔怗怘怚怟怤怭怳怵恀恇 +0x3d60: 恈恉恌恑恔恖恗恝恡恧恱恾恿悂悆悈悊悎悑悓悕悘悝悞悢悤悥您悰悱悷 +0x3e20: 悻悾惂惄惈惉惊惋惎惏惔惕惙惛惝惞惢惥惲惵惸惼惽愂愇愊愌愐愑愒愓 +0x3e40: 愔愖愗愙愜愞愢愪愫愰愱愵愶愷愹慁慅慆慉慞慠慬慲慸慻慼慿憀憁憃憄憋 +0x3e60: 憍憒憓憗憘憜憝憟憠憥憨憪憭憸憹憼懀懁懂懎懏懕懜懝懞懟懡懢懧懩懥 +0x3f20: 懬懭懯戁戃戄戇戓戕戜戠戢戣戧戩戫戹戽扂扃扄扆扌扐扑扒扔扖扚扜扤 +0x3f40: 扭扯扳扺扽抍抎抏抐抦抨抳抶抷抺抾抿拄拎拕拖拚拪拲拴拼拽挃挄挊挋挍 +0x3f60: 挐挓挖挘挩挪挭挵挶挹挼捁捂捃捄捆捊捋捎捒捓捔捘捛捥捦捬捭捱捴捵 +0x4020: 捸捼捽捿掂掄掇掊掐掔掕掙掚掞掤掦掭掮掯掽揁揅揈揎揑揓揔揕揜揠揥 +0x4040: 揪揬揲揳揵揸揹搉搊搐搒搔搘搞搠搢搤搥搩搪搯搰搵搽搿摋摏摑摒摓摔摚 +0x4060: 摛摜摝摟摠摡摣摭摳摴摻摽撅撇撏撐撑撘撙撛撝撟撡撣撦撨撬撳撽撾撿 +0x4120: 擄擉擊擋擌擎擐擑擕擗擤擥擩擪擭擰擵擷擻擿攁攄攈攉攊攏攓攔攖攙攛 +0x4140: 攞攟攢攦攩攮攱攺攼攽敃敇敉敐敒敔敟敠敧敫敺敽斁斅斊斒斕斘斝斠斣斦 +0x4160: 斮斲斳斴斿旂旈旉旎旐旔旖旘旟旰旲旴旵旹旾旿昀昄昈昉昍昑昒昕昖昝 +0x4220: 昞昡昢昣昤昦昩昪昫昬昮昰昱昳昹昷晀晅晆晊晌晑晎晗晘晙晛晜晠晡曻 +0x4240: 晪晫晬晾晳晵晿晷晸晹晻暀晼暋暌暍暐暒暙暚暛暜暟暠暤暭暱暲暵暻暿曀 +0x4260: 曂曃曈曌曎曏曔曛曟曨曫曬曮曺朅朇朎朓朙朜朠朢朳朾杅杇杈杌杔杕杝 +0x4320: 杦杬杮杴杶杻极构枎枏枑枓枖枘枙枛枰枱枲枵枻枼枽柹柀柂柃柅柈柉柒 +0x4340: 柗柙柜柡柦柰柲柶柷桒栔栙栝栟栨栧栬栭栯栰栱栳栻栿桄桅桊桌桕桗桘桛 +0x4360: 桫桮桯桰桱桲桵桹桺桻桼梂梄梆梈梖梘梚梜梡梣梥梩梪梮梲梻棅棈棌棏 +0x4420: 棐棑棓棖棙棜棝棥棨棪棫棬棭棰棱棵棶棻棼棽椆椉椊椐椑椓椖椗椱椳椵 +0x4440: 椸椻楂楅楉楎楗楛楣楤楥楦楨楩楬楰楱楲楺楻楿榀榍榒榖榘榡榥榦榨榫榭 +0x4460: 榯榷榸榺榼槅槈槑槖槗槢槥槮槯槱槳槵槾樀樁樃樏樑樕樚樝樠樤樨樰樲 +0x4520: 樴樷樻樾樿橅橆橉橊橎橐橑橒橕橖橛橤橧橪橱橳橾檁檃檆檇檉檋檑檛檝 +0x4540: 檞檟檥檫檯檰檱檴檽檾檿櫆櫉櫈櫌櫐櫔櫕櫖櫜櫝櫤櫧櫬櫰櫱櫲櫼櫽欂欃欆 +0x4560: 欇欉欏欐欑欗欛欞欤欨欫欬欯欵欶欻欿歆歊歍歒歖歘歝歠歧歫歮歰歵歽 +0x4620: 歾殂殅殗殛殟殠殢殣殨殩殬殭殮殰殸殹殽殾毃毄毉毌毖毚毡毣毦毧毮毱 +0x4640: 毷毹毿氂氄氅氉氍氎氐氒氙氟氦氧氨氬氮氳氵氶氺氻氿汊汋汍汏汒汔汙汛 +0x4660: 汜汫汭汯汴汶汸汹汻沅沆沇沉沔沕沗沘沜沟沰沲沴泂泆泍泏泐泑泒泔泖 +0x4720: 泚泜泠泧泩泫泬泮泲泴洄洇洊洎洏洑洓洚洦洧洨汧洮洯洱洹洼洿浗浞浟 +0x4740: 浡浥浧浯浰浼涂涇涑涒涔涖涗涘涪涬涴涷涹涽涿淄淈淊淎淏淖淛淝淟淠淢 +0x4760: 淥淩淯淰淴淶淼渀渄渞渢渧渲渶渹渻渼湄湅湈湉湋湏湑湒湓湔湗湜湝湞 +0x4820: 湢湣湨湳湻湽溍溓溙溠溧溭溮溱溳溻溿滀滁滃滇滈滊滍滎滏滫滭滮滹滻 +0x4840: 滽漄漈漊漌漍漖漘漚漛漦漩漪漯漰漳漶漻漼漭潏潑潒潓潗潙潚潝潞潡潢潨 +0x4860: 潬潽潾澃澇澈澋澌澍澐澒澓澔澖澚澟澠澥澦澧澨澮澯澰澵澶澼濅濇濈濊 +0x4920: 濚濞濨濩濰濵濹濼濽瀀瀅瀆瀇瀍瀗瀠瀣瀯瀴瀷瀹瀼灃灄灈灉灊灋灔灕灝 +0x4940: 灞灎灤灥灬灮灵灶灾炁炅炆炔炕炖炗炘炛炤炫炰炱炴炷烊烑烓烔烕烖烘烜 +0x4960: 烤烺焃焄焅焆焇焋焌焏焞焠焫焭焯焰焱焸煁煅煆煇煊煋煐煒煗煚煜煞煠 +0x4a20: 煨煹熀熅熇熌熒熚熛熠熢熯熰熲熳熺熿燀燁燄燋燌燓燖燙燚燜燸燾爀爇 +0x4a40: 爈爉爓爗爚爝爟爤爫爯爴爸爹牁牂牃牅牎牏牐牓牕牖牚牜牞牠牣牨牫牮牯 +0x4a60: 牱牷牸牻牼牿犄犉犍犎犓犛犨犭犮犱犴犾狁狇狉狌狕狖狘狟狥狳狴狺狻 +0x4b20: 狾猂猄猅猇猋猍猒猓猘猙猞猢猤猧猨猬猱猲猵猺猻猽獃獍獐獒獖獘獝獞 +0x4b40: 獟獠獦獧獩獫獬獮獯獱獷獹獼玀玁玃玅玆玎玐玓玕玗玘玜玞玟玠玢玥玦玪 +0x4b60: 玫玭玵玷玹玼玽玿珅珆珉珋珌珏珒珓珖珙珝珡珣珦珧珩珴珵珷珹珺珻珽 +0x4c20: 珿琀琁琄琇琊琑琚琛琤琦琨琩琪琫琬琭琮琯琰琱琹瑀瑃瑄瑆瑇瑋瑍瑑瑒 +0x4c40: 瑗瑝瑢瑦瑧瑨瑫瑭瑮瑱瑲璀璁璅璆璇璉璏璐璑璒璘璙璚璜璟璠璡璣璦璨璩 +0x4c60: 璪璫璮璯璱璲璵璹璻璿瓈瓉瓌瓐瓓瓘瓚瓛瓞瓟瓤瓨瓪瓫瓯瓴瓺瓻瓼瓿甆 +0x4d20: 甒甖甗甠甡甤甧甩甪甯甶甹甽甾甿畀畃畇畈畎畐畒畗畞畟畡畯畱畹畺畻 +0x4d40: 畼畽畾疁疅疐疒疓疕疙疜疢疤疴疺疿痀痁痄痆痌痎痏痗痜痟痠痡痤痧痬痮 +0x4d60: 痯痱痹瘀瘂瘃瘄瘇瘈瘊瘌瘏瘒瘓瘕瘖瘙瘛瘜瘝瘞瘣瘥瘦瘩瘭瘲瘳瘵瘸瘹 +0x4e20: 瘺瘼癊癀癁癃癄癅癉癋癕癙癟癤癥癭癮癯癱癴皁皅皌皍皕皛皜皝皟皠皢 +0x4e40: 皣皤皥皦皧皨皪皭皽盁盅盉盋盌盎盔盙盠盦盨盬盰盱盶盹盼眀眆眊眎眒眔 +0x4e60: 眕眗眙眚眜眢眨眭眮眯眴眵眶眹眽眾睂睅睆睊睍睎睏睒睖睗睜睞睟睠睢 +0x4f20: 睤睧睪睬睰睲睳睴睺睽瞀瞄瞌瞍瞔瞕瞖瞚瞟瞢瞧瞪瞮瞯瞱瞵瞾矃矉矑矒 +0x4f40: 矕矙矞矟矠矤矦矪矬矰