|
Packit |
3d039e |
package Encode::EUCJPASCII;
|
|
Packit |
3d039e |
use strict;
|
|
Packit |
3d039e |
use warnings;
|
|
Packit |
3d039e |
our $VERSION = "0.03";
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
use Encode qw(:fallbacks);
|
|
Packit |
3d039e |
use XSLoader;
|
|
Packit |
3d039e |
XSLoader::load(__PACKAGE__,$VERSION);
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
Encode::define_alias(qr/\beuc-?jp(-?open)?(-?19970715)?-?ascii$/i
|
|
Packit |
3d039e |
=> '"eucJP-ascii"');
|
|
Packit |
3d039e |
Encode::define_alias(qr/\b(x-)?iso-?2022-?jp-?ascii$/i
|
|
Packit |
3d039e |
=> '"x-iso2022jp-ascii"');
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
my $name = 'x-iso2022jp-ascii';
|
|
Packit |
3d039e |
$Encode::Encoding{$name} = bless { Name => $name } => __PACKAGE__;
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
use base qw(Encode::Encoding);
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
# we override this to 1 so PerlIO works
|
|
Packit |
3d039e |
sub needs_lines { 1 }
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
use Encode::CJKConstants qw(:all);
|
|
Packit |
3d039e |
use Encode::JP::JIS7;
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
# 26 row-cell pairs swapped between JIS C 6226-1978 and JIS X 0208-1983.
|
|
Packit |
3d039e |
# cf. JIS X 0208:1997 Annex 2 Table 1.
|
|
Packit |
3d039e |
my @swap1978 = ("\x30\x33" => "\x72\x4D", "\x32\x29" => "\x72\x74",
|
|
Packit |
3d039e |
"\x33\x42" => "\x69\x5a", "\x33\x49" => "\x59\x78",
|
|
Packit |
3d039e |
"\x33\x76" => "\x63\x5e", "\x34\x43" => "\x5e\x75",
|
|
Packit |
3d039e |
"\x34\x52" => "\x6b\x5d", "\x37\x5b" => "\x70\x74",
|
|
Packit |
3d039e |
"\x39\x5c" => "\x62\x68", "\x3c\x49" => "\x69\x22",
|
|
Packit |
3d039e |
"\x3F\x59" => "\x70\x57", "\x41\x28" => "\x6c\x4d",
|
|
Packit |
3d039e |
"\x44\x5B" => "\x54\x64", "\x45\x57" => "\x62\x6a",
|
|
Packit |
3d039e |
"\x45\x6e" => "\x5b\x6d", "\x45\x73" => "\x5e\x39",
|
|
Packit |
3d039e |
"\x46\x76" => "\x6d\x6e", "\x47\x68" => "\x6a\x24",
|
|
Packit |
3d039e |
"\x49\x30" => "\x5B\x58", "\x4b\x79" => "\x50\x56",
|
|
Packit |
3d039e |
"\x4c\x79" => "\x69\x2e", "\x4F\x36" => "\x64\x46",
|
|
Packit |
3d039e |
"\x36\x46" => "\x74\x21", "\x4B\x6A" => "\x74\x22",
|
|
Packit |
3d039e |
"\x4D\x5A" => "\x74\x23", "\x60\x76" => "\x74\x24",
|
|
Packit |
3d039e |
);
|
|
Packit |
3d039e |
my %swap1978 = (@swap1978, reverse @swap1978);
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
sub decode($$;$) {
|
|
Packit |
3d039e |
my ( $obj, $str, $chk ) = @_;
|
|
Packit |
3d039e |
my $residue = '';
|
|
Packit |
3d039e |
if ($chk) {
|
|
Packit |
3d039e |
$str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
# Handle JIS X 0201 sequences.
|
|
Packit |
3d039e |
$str =~ s{\e\(J ([^\e]*) (?:\e\(B)?}{
|
|
Packit |
3d039e |
my $s = $1;
|
|
Packit |
3d039e |
$s =~ s{([\x5C\x7E]+)}{
|
|
Packit |
3d039e |
my $c = $1;
|
|
Packit |
3d039e |
$c =~ s/\x5C/\x21\x6F/g;
|
|
Packit |
3d039e |
$c =~ s/\x7E/\x21\x31/g;
|
|
Packit |
3d039e |
"\e\$B".$c."\e(B";
|
|
Packit |
3d039e |
}eg;
|
|
Packit |
3d039e |
($s =~ /^\e/? "\e(B": '').$s;
|
|
Packit |
3d039e |
}egsx;
|
|
Packit |
3d039e |
# Handle JIS C 6226-1978 sequences.
|
|
Packit |
3d039e |
$str =~ s{\e\$\@ ([^\e]*) (?:\e\$B)?}{
|
|
Packit |
3d039e |
my $s = $1;
|
|
Packit |
3d039e |
$s =~ s{([\x21-\x7E]{2})}{$swap1978{$1} || $1}eg;
|
|
Packit |
3d039e |
"\e\$B".$s;
|
|
Packit |
3d039e |
}egsx;
|
|
Packit |
3d039e |
$residue .= Encode::JP::JIS7::jis_euc( \$str );
|
|
Packit |
3d039e |
$_[1] = $residue if $chk;
|
|
Packit |
3d039e |
return Encode::decode( 'eucJP-ascii', $str, $chk );
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
sub encode($$;$) {
|
|
Packit |
3d039e |
my ( $obj, $utf8, $chk ) = @_;
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
# empty the input string in the stack so perlio is ok
|
|
Packit |
3d039e |
$_[1] = '' if $chk;
|
|
Packit |
3d039e |
my $octet = Encode::encode( 'eucJP-ascii', $utf8, $chk );
|
|
Packit |
3d039e |
Encode::JP::JIS7::euc_jis( \$octet, 1 );
|
|
Packit |
3d039e |
return $octet;
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
#
|
|
Packit |
3d039e |
# cat_decode
|
|
Packit |
3d039e |
#
|
|
Packit |
3d039e |
my $re_scan_jis_g = qr{
|
|
Packit |
3d039e |
\G ( ($RE{JIS_0212}) | (\e\$\@) | $RE{JIS_0208} |
|
|
Packit |
3d039e |
(\e\(J) | ($RE{ISO_ASC}) | ($RE{JIS_KANA}) | )
|
|
Packit |
3d039e |
([^\e]*)
|
|
Packit |
3d039e |
}x;
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
sub cat_decode { # ($obj, $dst, $src, $pos, $trm, $chk)
|
|
Packit |
3d039e |
my ( $obj, undef, undef, $pos, $trm ) = @_; # currently ignores $chk
|
|
Packit |
3d039e |
my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
|
|
Packit |
3d039e |
local ${^ENCODING};
|
|
Packit |
3d039e |
use bytes;
|
|
Packit |
3d039e |
my $opos = pos($$rsrc);
|
|
Packit |
3d039e |
pos($$rsrc) = $pos;
|
|
Packit |
3d039e |
while ( $$rsrc =~ /$re_scan_jis_g/gc ) {
|
|
Packit |
3d039e |
my ( $esc, $esc_0212, $esc_0208_1978, $esc_0201, $esc_asc, $esc_kana, $chunk ) =
|
|
Packit |
3d039e |
( $1, $2, $3, $4, $5, $6, $7 );
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
unless ($chunk) { $esc or last; next; }
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
if ( $esc && !$esc_asc && !$esc_0208_1978 && !$esc_0201 ) {
|
|
Packit |
3d039e |
$chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
|
|
Packit |
3d039e |
if ($esc_kana) {
|
|
Packit |
3d039e |
$chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
elsif ($esc_0212) {
|
|
Packit |
3d039e |
$chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
$chunk = Encode::decode( 'eucJP-ascii', $chunk, 0 );
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
elsif ( $esc_0208_1978 ) {
|
|
Packit |
3d039e |
$chunk =~ s{([\x21-\x7E]{2})}{$swap1978{$1} || $1}eg;
|
|
Packit |
3d039e |
$chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
|
|
Packit |
3d039e |
$chunk = Encode::decode( 'eucJP-ascii', $chunk, 0 );
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
elsif ( $esc_0201 ) {
|
|
Packit |
3d039e |
$chunk =~ s/\x5C/\xA1\xEF/og;
|
|
Packit |
3d039e |
$chunk =~ s/\x7E/\xA1\xB1/og;
|
|
Packit |
3d039e |
$chunk = Encode::decode( 'eucJP-ascii', $chunk, 0 );
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
elsif ( ( my $npos = index( $chunk, $trm ) ) >= 0 ) {
|
|
Packit |
3d039e |
$$rdst .= substr( $chunk, 0, $npos + length($trm) );
|
|
Packit |
3d039e |
$$rpos += length($esc) + $npos + length($trm);
|
|
Packit |
3d039e |
pos($$rsrc) = $opos;
|
|
Packit |
3d039e |
return 1;
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
$$rdst .= $chunk;
|
|
Packit |
3d039e |
$$rpos = pos($$rsrc);
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
$$rpos = pos($$rsrc);
|
|
Packit |
3d039e |
pos($$rsrc) = $opos;
|
|
Packit |
3d039e |
return '';
|
|
Packit |
3d039e |
}
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
1;
|
|
Packit |
3d039e |
__END__
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=head1 NAME
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
Encode::EUCJPASCII - eucJP-ascii - An eucJP-open mapping
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=head1 SYNOPSIS
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
use Encode::EUCJPASCII;
|
|
Packit |
3d039e |
use Encode qw/encode decode/;
|
|
Packit |
3d039e |
$eucjp = encode("eucJP-ascii", $utf8);
|
|
Packit |
3d039e |
$utf8 = decode("eucJP-ascii", $eucjp);
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=head1 DESCRIPTION
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
This module provides eucJP-ascii, one of eucJP-open mappings,
|
|
Packit |
3d039e |
and its derivative.
|
|
Packit |
3d039e |
Following encodings are supported.
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
Canonical Alias Description
|
|
Packit |
3d039e |
--------------------------------------------------------------
|
|
Packit |
3d039e |
eucJP-ascii eucJP-ascii
|
|
Packit |
3d039e |
qr/\beuc-?jp(-?open)?(-?19970715)?-?ascii$/i
|
|
Packit |
3d039e |
x-iso2022jp-ascii 7-bit counterpart
|
|
Packit |
3d039e |
qr/\b(x-)?iso-?2022-?jp-?ascii$/i
|
|
Packit |
3d039e |
--------------------------------------------------------------
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
B<Note>: C<x-iso2022jp-ascii> is unofficial encoding name:
|
|
Packit |
3d039e |
It had never been registered by any standards bodies.
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=head1 SEE ALSO
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
L<Encode>, L<Encode::JP>, L<Encode::EUCJPMS>
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
TOG/JVC CDE/Motif Technical WG (Oct. 1996).
|
|
Packit |
3d039e |
I<Problems and Solutions for Unicode and User/Vendor Defined Characters>.
|
|
Packit |
3d039e |
Revision at Jul. 15 1997.
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=head1 AUTHOR
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
Hatuka*nezumi - IKEDA Soji <hatuka(at)nezumi.nu>
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=head1 COPYRIGHT
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
Copyright (C) 2009 Hatuka*nezumi - IKEDA Soji.
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
This program is free software; you can redistribute it and/or modify it
|
|
Packit |
3d039e |
under the same terms as Perl itself.
|
|
Packit |
3d039e |
|
|
Packit |
3d039e |
=cut
|