|
Packit |
d0f5c2 |
#
|
|
Packit |
d0f5c2 |
# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
|
|
Packit |
d0f5c2 |
#
|
|
Packit |
d0f5c2 |
package Encode::Unicode::UTF7;
|
|
Packit |
d0f5c2 |
use strict;
|
|
Packit |
d0f5c2 |
use warnings;
|
|
Packit |
d0f5c2 |
use parent qw(Encode::Encoding);
|
|
Packit |
d0f5c2 |
__PACKAGE__->Define('UTF-7');
|
|
Packit |
d0f5c2 |
our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
|
|
Packit |
d0f5c2 |
use MIME::Base64;
|
|
Packit |
d0f5c2 |
use Encode qw(find_encoding);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
#
|
|
Packit |
d0f5c2 |
# Algorithms taken from Unicode::String by Gisle Aas
|
|
Packit |
d0f5c2 |
#
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
our $OPTIONAL_DIRECT_CHARS = 1;
|
|
Packit |
d0f5c2 |
my $specials = quotemeta "\'(),-./:?";
|
|
Packit |
d0f5c2 |
$OPTIONAL_DIRECT_CHARS
|
|
Packit |
d0f5c2 |
and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
|
|
Packit |
d0f5c2 |
# We use qr/[\n\r\t\ ] instead
|
|
Packit |
d0f5c2 |
my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
|
|
Packit |
d0f5c2 |
my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
|
|
Packit |
d0f5c2 |
my $e_utf16 = find_encoding("UTF-16BE");
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub needs_lines { 1 }
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub encode($$;$) {
|
|
Packit |
d0f5c2 |
my ( $obj, $str, $chk ) = @_;
|
|
Packit |
d0f5c2 |
return undef unless defined $str;
|
|
Packit |
d0f5c2 |
my $len = length($str);
|
|
Packit |
d0f5c2 |
pos($str) = 0;
|
|
Packit |
d0f5c2 |
my $bytes = substr($str, 0, 0); # to propagate taintedness
|
|
Packit |
d0f5c2 |
while ( pos($str) < $len ) {
|
|
Packit |
d0f5c2 |
if ( $str =~ /\G($re_asis+)/ogc ) {
|
|
Packit |
d0f5c2 |
my $octets = $1;
|
|
Packit |
d0f5c2 |
utf8::downgrade($octets);
|
|
Packit |
d0f5c2 |
$bytes .= $octets;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
|
|
Packit |
d0f5c2 |
if ( $1 eq "+" ) {
|
|
Packit |
d0f5c2 |
$bytes .= "+-";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
my $s = $1;
|
|
Packit |
d0f5c2 |
my $base64 = encode_base64( $e_utf16->encode($s), '' );
|
|
Packit |
d0f5c2 |
$base64 =~ s/=+$//;
|
|
Packit |
d0f5c2 |
$bytes .= "+$base64-";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
die "This should not happen! (pos=" . pos($str) . ")";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
$_[1] = '' if $chk;
|
|
Packit |
d0f5c2 |
return $bytes;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
sub decode($$;$) {
|
|
Packit |
d0f5c2 |
use re 'taint';
|
|
Packit |
d0f5c2 |
my ( $obj, $bytes, $chk ) = @_;
|
|
Packit |
d0f5c2 |
return undef unless defined $bytes;
|
|
Packit |
d0f5c2 |
my $len = length($bytes);
|
|
Packit |
d0f5c2 |
my $str = substr($bytes, 0, 0); # to propagate taintedness;
|
|
Packit |
d0f5c2 |
pos($bytes) = 0;
|
|
Packit |
d0f5c2 |
no warnings 'uninitialized';
|
|
Packit |
d0f5c2 |
while ( pos($bytes) < $len ) {
|
|
Packit |
d0f5c2 |
if ( $bytes =~ /\G([^+]+)/ogc ) {
|
|
Packit |
d0f5c2 |
$str .= $1;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( $bytes =~ /\G\+-/ogc ) {
|
|
Packit |
d0f5c2 |
$str .= "+";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
|
|
Packit |
d0f5c2 |
my $base64 = $1;
|
|
Packit |
d0f5c2 |
my $pad = length($base64) % 4;
|
|
Packit |
d0f5c2 |
$base64 .= "=" x ( 4 - $pad ) if $pad;
|
|
Packit |
d0f5c2 |
$str .= $e_utf16->decode( decode_base64($base64) );
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
elsif ( $bytes =~ /\G\+/ogc ) {
|
|
Packit |
d0f5c2 |
$^W and warn "Bad UTF7 data escape";
|
|
Packit |
d0f5c2 |
$str .= "+";
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
else {
|
|
Packit |
d0f5c2 |
die "This should not happen " . pos($bytes);
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
$_[1] = '' if $chk;
|
|
Packit |
d0f5c2 |
return $str;
|
|
Packit |
d0f5c2 |
}
|
|
Packit |
d0f5c2 |
1;
|
|
Packit |
d0f5c2 |
__END__
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 NAME
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
Encode::Unicode::UTF7 -- UTF-7 encoding
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SYNOPSIS
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
use Encode qw/encode decode/;
|
|
Packit |
d0f5c2 |
$utf7 = encode("UTF-7", $utf8);
|
|
Packit |
d0f5c2 |
$utf8 = decode("UTF-7", $ucs2);
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 ABSTRACT
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
This module implements UTF-7 encoding documented in RFC 2152. UTF-7,
|
|
Packit |
d0f5c2 |
as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It
|
|
Packit |
d0f5c2 |
is designed to be MTA-safe and expected to be a standard way to
|
|
Packit |
d0f5c2 |
exchange Unicoded mails via mails. But with the advent of UTF-8 and
|
|
Packit |
d0f5c2 |
8-bit compliant MTAs, UTF-7 is hardly ever used.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
UTF-7 was not supported by Encode until version 1.95 because of that.
|
|
Packit |
d0f5c2 |
But Unicode::String, a module by Gisle Aas which adds Unicode supports
|
|
Packit |
d0f5c2 |
to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
|
|
Packit |
d0f5c2 |
so Encode can supersede Unicode::String 100%.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 In Practice
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
When you want to encode Unicode for mails and web pages, however, do
|
|
Packit |
d0f5c2 |
not use UTF-7 unless you are sure your recipients and readers can
|
|
Packit |
d0f5c2 |
handle it. Very few MUAs and WWW Browsers support these days (only
|
|
Packit |
d0f5c2 |
Mozilla seems to support one). For general cases, use UTF-8 for
|
|
Packit |
d0f5c2 |
message body and MIME-Header for header instead.
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=head1 SEE ALSO
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
L<Encode>, L<Encode::Unicode>, L<Unicode::String>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
|
|
Packit |
d0f5c2 |
|
|
Packit |
d0f5c2 |
=cut
|