Blame lib/Encode/MIME/Header.pm

Packit d0f5c2
package Encode::MIME::Header;
Packit d0f5c2
use strict;
Packit d0f5c2
use warnings;
Packit d0f5c2
Packit d0f5c2
our $VERSION = do { my @r = ( q$Revision: 2.28 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
Packit d0f5c2
Packit d0f5c2
use Carp ();
Packit d0f5c2
use Encode ();
Packit d0f5c2
use MIME::Base64 ();
Packit d0f5c2
Packit d0f5c2
my %seed = (
Packit d0f5c2
    decode_b => 1,       # decodes 'B' encoding ?
Packit d0f5c2
    decode_q => 1,       # decodes 'Q' encoding ?
Packit d0f5c2
    encode   => 'B',     # encode with 'B' or 'Q' ?
Packit d0f5c2
    charset  => 'UTF-8', # encode charset
Packit d0f5c2
    bpl      => 75,      # bytes per line
Packit d0f5c2
);
Packit d0f5c2
Packit d0f5c2
my @objs;
Packit d0f5c2
Packit d0f5c2
push @objs, bless {
Packit d0f5c2
    %seed,
Packit d0f5c2
    Name     => 'MIME-Header',
Packit d0f5c2
} => __PACKAGE__;
Packit d0f5c2
Packit d0f5c2
push @objs, bless {
Packit d0f5c2
    %seed,
Packit d0f5c2
    decode_q => 0,
Packit d0f5c2
    Name     => 'MIME-B',
Packit d0f5c2
} => __PACKAGE__;
Packit d0f5c2
Packit d0f5c2
push @objs, bless {
Packit d0f5c2
    %seed,
Packit d0f5c2
    decode_b => 0,
Packit d0f5c2
    encode   => 'Q',
Packit d0f5c2
    Name     => 'MIME-Q',
Packit d0f5c2
} => __PACKAGE__;
Packit d0f5c2
Packit d0f5c2
Encode::define_encoding($_, $_->{Name}) foreach @objs;
Packit d0f5c2
Packit d0f5c2
use parent qw(Encode::Encoding);
Packit d0f5c2
Packit d0f5c2
sub needs_lines { 1 }
Packit d0f5c2
sub perlio_ok   { 0 }
Packit d0f5c2
Packit d0f5c2
# RFC 2047 and RFC 2231 grammar
Packit d0f5c2
my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
Packit d0f5c2
my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
Packit d0f5c2
my $re_encoding = qr/[QqBb]/;
Packit d0f5c2
my $re_encoded_text = qr/[^\?]*/;
Packit d0f5c2
my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
Packit d0f5c2
my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
Packit d0f5c2
my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
Packit d0f5c2
Packit d0f5c2
# in strict mode check also for valid base64 characters and also for valid quoted printable codes
Packit d0f5c2
my $re_encoding_strict_b = qr/[Bb]/;
Packit d0f5c2
my $re_encoding_strict_q = qr/[Qq]/;
Packit d0f5c2
my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
Packit d0f5c2
my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
Packit d0f5c2
my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
Packit d0f5c2
my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
Packit d0f5c2
Packit d0f5c2
my $re_newline = qr/(?:\r\n|[\r\n])/;
Packit d0f5c2
Packit d0f5c2
# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
Packit d0f5c2
# except in comments when separator between words and comment round brackets can be omitted
Packit d0f5c2
my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
Packit d0f5c2
my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
Packit d0f5c2
my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
Packit d0f5c2
Packit d0f5c2
my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
Packit d0f5c2
my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
Packit d0f5c2
Packit d0f5c2
my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
Packit d0f5c2
my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
Packit d0f5c2
Packit d0f5c2
our $STRICT_DECODE = 0;
Packit d0f5c2
Packit d0f5c2
sub decode($$;$) {
Packit d0f5c2
    my ($obj, $str, $chk) = @_;
Packit d0f5c2
    return undef unless defined $str;
Packit d0f5c2
Packit d0f5c2
    my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
Packit d0f5c2
    my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
Packit d0f5c2
Packit d0f5c2
    my $stop = 0;
Packit d0f5c2
    my $output = substr($str, 0, 0); # to propagate taintedness
Packit d0f5c2
Packit d0f5c2
    # decode each line separately, match whole continuous folded line at one call
Packit d0f5c2
    1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
Packit d0f5c2
Packit d0f5c2
        my $line = $1;
Packit d0f5c2
        my $sep = defined $2 ? $2 : '';
Packit d0f5c2
Packit d0f5c2
        $stop = 1 unless length($line) or length($sep);
Packit d0f5c2
Packit d0f5c2
        # NOTE: this code partially could break $chk support
Packit d0f5c2
        # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
Packit d0f5c2
        # fixes breaking inside multi-byte characters
Packit d0f5c2
        1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
Packit d0f5c2
Packit d0f5c2
        # process sequence of encoded MIME words at once
Packit d0f5c2
        1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
Packit d0f5c2
Packit d0f5c2
            my $begin = $1 . $2;
Packit d0f5c2
            my $words = $3;
Packit d0f5c2
Packit d0f5c2
            $begin =~ tr/\r\n//d;
Packit d0f5c2
            $output .= $begin;
Packit d0f5c2
Packit d0f5c2
            # decode one MIME word
Packit d0f5c2
            1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
Packit d0f5c2
Packit d0f5c2
                $output .= $1;
Packit d0f5c2
                my $orig = $2;
Packit d0f5c2
                my $charset = $3;
Packit d0f5c2
                my ($mime_enc, $text) = split /\?/, $5;
Packit d0f5c2
Packit d0f5c2
                $text =~ tr/\r\n//d;
Packit d0f5c2
Packit d0f5c2
                my $enc = Encode::find_mime_encoding($charset);
Packit d0f5c2
Packit d0f5c2
                # in non strict mode allow also perl encoding aliases
Packit d0f5c2
                if ( not defined $enc and not $STRICT_DECODE ) {
Packit d0f5c2
                    # make sure that decoded string will be always strict UTF-8
Packit d0f5c2
                    $charset = 'UTF-8' if lc($charset) eq 'utf8';
Packit d0f5c2
                    $enc = Encode::find_encoding($charset);
Packit d0f5c2
                }
Packit d0f5c2
Packit d0f5c2
                if ( not defined $enc ) {
Packit d0f5c2
                    Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
Packit d0f5c2
                    Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
Packit d0f5c2
                    $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
Packit d0f5c2
                    $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
Packit d0f5c2
                    $stop ? $orig : '';
Packit d0f5c2
                } else {
Packit d0f5c2
                    if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
Packit d0f5c2
                        my $decoded = _decode_b($enc, $text, $chk);
Packit d0f5c2
                        $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
Packit d0f5c2
                        $output .= (defined $decoded ? $decoded : $text) unless $stop;
Packit d0f5c2
                        $stop ? $orig : '';
Packit d0f5c2
                    } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
Packit d0f5c2
                        my $decoded = _decode_q($enc, $text, $chk);
Packit d0f5c2
                        $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
Packit d0f5c2
                        $output .= (defined $decoded ? $decoded : $text) unless $stop;
Packit d0f5c2
                        $stop ? $orig : '';
Packit d0f5c2
                    } else {
Packit d0f5c2
                        Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
Packit d0f5c2
                        Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
Packit d0f5c2
                        $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
Packit d0f5c2
                        $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
Packit d0f5c2
                        $stop ? $orig : '';
Packit d0f5c2
                    }
Packit d0f5c2
                }
Packit d0f5c2
Packit d0f5c2
            }se;
Packit d0f5c2
Packit d0f5c2
            if ( not $stop ) {
Packit d0f5c2
                $output .= $words;
Packit d0f5c2
                $words = '';
Packit d0f5c2
            }
Packit d0f5c2
Packit d0f5c2
            $words;
Packit d0f5c2
Packit d0f5c2
        }se;
Packit d0f5c2
Packit d0f5c2
        if ( not $stop ) {
Packit d0f5c2
            $line =~ tr/\r\n//d;
Packit d0f5c2
            $output .= $line . $sep;
Packit d0f5c2
            $line = '';
Packit d0f5c2
            $sep = '';
Packit d0f5c2
        }
Packit d0f5c2
Packit d0f5c2
        $line . $sep;
Packit d0f5c2
Packit d0f5c2
    }se;
Packit d0f5c2
Packit d0f5c2
    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
Packit d0f5c2
    return $output;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _decode_b {
Packit d0f5c2
    my ($enc, $text, $chk) = @_;
Packit d0f5c2
    # MIME::Base64::decode ignores everything after a '=' padding character
Packit d0f5c2
    # in non strict mode split string after each sequence of padding characters and decode each substring
Packit d0f5c2
    my $octets = $STRICT_DECODE ?
Packit d0f5c2
        MIME::Base64::decode($text) :
Packit d0f5c2
        join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
Packit d0f5c2
    return _decode_octets($enc, $octets, $chk);
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _decode_q {
Packit d0f5c2
    my ($enc, $text, $chk) = @_;
Packit d0f5c2
    $text =~ s/_/ /go;
Packit d0f5c2
    $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
Packit d0f5c2
    return _decode_octets($enc, $text, $chk);
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _decode_octets {
Packit d0f5c2
    my ($enc, $octets, $chk) = @_;
Packit d0f5c2
    $chk = 0 unless defined $chk;
Packit d0f5c2
    $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
Packit d0f5c2
    my $output = $enc->decode($octets, $chk);
Packit d0f5c2
    return undef if not ref $chk and $chk and $octets ne '';
Packit d0f5c2
    return $output;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub encode($$;$) {
Packit d0f5c2
    my ($obj, $str, $chk) = @_;
Packit d0f5c2
    return undef unless defined $str;
Packit d0f5c2
    my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
Packit d0f5c2
    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
Packit d0f5c2
    return $output . substr($str, 0, 0); # to propagate taintedness
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _fold_line {
Packit d0f5c2
    my ($obj, $line) = @_;
Packit d0f5c2
    my $bpl = $obj->{bpl};
Packit d0f5c2
    my $output = '';
Packit d0f5c2
Packit d0f5c2
    while ( length($line) ) {
Packit d0f5c2
        if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
Packit d0f5c2
            $output .= $1;
Packit d0f5c2
            $output .= "\r\n" . $2 if length($line);
Packit d0f5c2
        } elsif ( $line =~ s/(\s)(.*)$// ) {
Packit d0f5c2
            $output .= $line;
Packit d0f5c2
            $line = $2;
Packit d0f5c2
            $output .= "\r\n" . $1 if length($line);
Packit d0f5c2
        } else {
Packit d0f5c2
            $output .= $line;
Packit d0f5c2
            last;
Packit d0f5c2
        }
Packit d0f5c2
    }
Packit d0f5c2
Packit d0f5c2
    return $output;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _encode_string {
Packit d0f5c2
    my ($obj, $str, $chk) = @_;
Packit d0f5c2
    my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
Packit d0f5c2
    my $enc = Encode::find_mime_encoding($obj->{charset});
Packit d0f5c2
    my $enc_chk = $chk;
Packit d0f5c2
    $enc_chk = 0 unless defined $enc_chk;
Packit d0f5c2
    $enc_chk |= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk;
Packit d0f5c2
    my @result = ();
Packit d0f5c2
    my $octets = '';
Packit d0f5c2
    while ( length( my $chr = substr($str, 0, 1, '') ) ) {
Packit d0f5c2
        my $seq = $enc->encode($chr, $enc_chk);
Packit d0f5c2
        if ( not length($seq) ) {
Packit d0f5c2
            substr($str, 0, 0, $chr);
Packit d0f5c2
            last;
Packit d0f5c2
        }
Packit d0f5c2
        if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
Packit d0f5c2
            push @result, $obj->_encode_word($octets);
Packit d0f5c2
            $octets = '';
Packit d0f5c2
        }
Packit d0f5c2
        $octets .= $seq;
Packit d0f5c2
    }
Packit d0f5c2
    length($octets) and push @result, $obj->_encode_word($octets);
Packit d0f5c2
    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
Packit d0f5c2
    return join(' ', @result);
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _encode_word {
Packit d0f5c2
    my ($obj, $octets) = @_;
Packit d0f5c2
    my $charset = $obj->{charset};
Packit d0f5c2
    my $encode = $obj->{encode};
Packit d0f5c2
    my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
Packit d0f5c2
    return "=?$charset?$encode?$text?=";
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _encoded_word_len {
Packit d0f5c2
    my ($obj, $octets) = @_;
Packit d0f5c2
    my $charset = $obj->{charset};
Packit d0f5c2
    my $encode = $obj->{encode};
Packit d0f5c2
    my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
Packit d0f5c2
    return length("=?$charset?$encode??=") + $text_len;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _encode_b {
Packit d0f5c2
    my ($octets) = @_;
Packit d0f5c2
    return MIME::Base64::encode($octets, '');
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _encoded_b_len {
Packit d0f5c2
    my ($octets) = @_;
Packit d0f5c2
    return ( length($octets) + 2 ) / 3 * 4;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
Packit d0f5c2
Packit d0f5c2
sub _encode_q {
Packit d0f5c2
    my ($octets) = @_;
Packit d0f5c2
    $octets =~ s{($re_invalid_q_char)}{
Packit d0f5c2
        join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
Packit d0f5c2
    }egox;
Packit d0f5c2
    $octets =~ s/ /_/go;
Packit d0f5c2
    return $octets;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub _encoded_q_len {
Packit d0f5c2
    my ($octets) = @_;
Packit d0f5c2
    my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
Packit d0f5c2
    return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
1;
Packit d0f5c2
__END__
Packit d0f5c2
Packit d0f5c2
=head1 NAME
Packit d0f5c2
Packit d0f5c2
Encode::MIME::Header -- MIME encoding for an unstructured email header
Packit d0f5c2
Packit d0f5c2
=head1 SYNOPSIS
Packit d0f5c2
Packit d0f5c2
    use Encode qw(encode decode);
Packit d0f5c2
Packit d0f5c2
    my $mime_str = encode("MIME-Header", "Sample:Text \N{U+263A}");
Packit d0f5c2
    # $mime_str is "=?UTF-8?B?U2FtcGxlOlRleHQg4pi6?="
Packit d0f5c2
Packit d0f5c2
    my $mime_q_str = encode("MIME-Q", "Sample:Text \N{U+263A}");
Packit d0f5c2
    # $mime_q_str is "=?UTF-8?Q?Sample=3AText_=E2=98=BA?="
Packit d0f5c2
Packit d0f5c2
    my $str = decode("MIME-Header",
Packit d0f5c2
        "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n " .
Packit d0f5c2
        "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
Packit d0f5c2
    );
Packit d0f5c2
    # $str is "If you can read this you understand the example."
Packit d0f5c2
Packit d0f5c2
    use Encode qw(decode :fallbacks);
Packit d0f5c2
    use Encode::MIME::Header;
Packit d0f5c2
    local $Encode::MIME::Header::STRICT_DECODE = 1;
Packit d0f5c2
    my $strict_string = decode("MIME-Header", $mime_string, FB_CROAK);
Packit d0f5c2
    # use strict decoding and croak on errors
Packit d0f5c2
Packit d0f5c2
=head1 ABSTRACT
Packit d0f5c2
Packit d0f5c2
This module implements L<RFC 2047|https://tools.ietf.org/html/rfc2047> MIME
Packit d0f5c2
encoding for an unstructured field body of the email header.  It can also be
Packit d0f5c2
used for L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token.  However,
Packit d0f5c2
it cannot be used directly for the whole header with the field name or for the
Packit d0f5c2
structured header fields like From, To, Cc, Message-Id, etc...  There are 3
Packit d0f5c2
encoding names supported by this module: C<MIME-Header>, C<MIME-B> and
Packit d0f5c2
C<MIME-Q>.
Packit d0f5c2
Packit d0f5c2
=head1 DESCRIPTION
Packit d0f5c2
Packit d0f5c2
Decode method takes an unstructured field body of the email header (or
Packit d0f5c2
L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token) as its input and
Packit d0f5c2
decodes each MIME encoded-word from input string to a sequence of bytes
Packit d0f5c2
according to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
Packit d0f5c2
L<RFC 2231|https://tools.ietf.org/html/rfc2231>.  Subsequently, each sequence
Packit d0f5c2
of bytes with the corresponding MIME charset is decoded with
Packit d0f5c2
L<the Encode module|Encode> and finally, one output string is returned.  Text
Packit d0f5c2
parts of the input string which do not contain MIME encoded-word stay
Packit d0f5c2
unmodified in the output string.  Folded newlines between two consecutive MIME
Packit d0f5c2
encoded-words are discarded, others are preserved in the output string.
Packit d0f5c2
C<MIME-B> can decode Base64 variant, C<MIME-Q> can decode Quoted-Printable
Packit d0f5c2
variant and C<MIME-Header> can decode both of them.  If L<Encode module|Encode>
Packit d0f5c2
does not support particular MIME charset or chosen variant then an action based
Packit d0f5c2
on L<CHECK flags|Encode/Handling Malformed Data> is performed (by default, the
Packit d0f5c2
MIME encoded-word is not decoded).
Packit d0f5c2
Packit d0f5c2
Encode method takes a scalar string as its input and uses
Packit d0f5c2
L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for encoding it to UTF-8
Packit d0f5c2
bytes.  Then a sequence of UTF-8 bytes is encoded into MIME encoded-words
Packit d0f5c2
(C<MIME-Header> and C<MIME-B> use a Base64 variant while C<MIME-Q> uses a
Packit d0f5c2
Quoted-Printable variant) where each MIME encoded-word is limited to 75
Packit d0f5c2
characters.  MIME encoded-words are separated by C<CRLF SPACE> and joined to
Packit d0f5c2
one output string.  Output string is suitable for unstructured field body of
Packit d0f5c2
the email header.
Packit d0f5c2
Packit d0f5c2
Both encode and decode methods propagate
Packit d0f5c2
L<CHECK flags|Encode/Handling Malformed Data> when encoding and decoding the
Packit d0f5c2
MIME charset.
Packit d0f5c2
Packit d0f5c2
=head1 BUGS
Packit d0f5c2
Packit d0f5c2
Versions prior to 2.22 (part of Encode 2.83) have a malfunctioning decoder
Packit d0f5c2
and encoder.  The MIME encoder infamously inserted additional spaces or
Packit d0f5c2
discarded white spaces between consecutive MIME encoded-words, which led to
Packit d0f5c2
invalid MIME headers produced by this module.  The MIME decoder had a tendency
Packit d0f5c2
to discard white spaces, incorrectly interpret data or attempt to decode Base64
Packit d0f5c2
MIME encoded-words as Quoted-Printable.  These problems were fixed in version
Packit d0f5c2
2.22.  It is highly recommended not to use any version prior 2.22!
Packit d0f5c2
Packit d0f5c2
Versions prior to 2.24 (part of Encode 2.87) ignored
Packit d0f5c2
L<CHECK flags|Encode/Handling Malformed Data>.  The MIME encoder used
Packit d0f5c2
L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for input Unicode
Packit d0f5c2
strings which could lead to invalid UTF-8 sequences.  MIME decoder used also
Packit d0f5c2
L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> decoder and additionally
Packit d0f5c2
called the decode method with a C<Encode::FB_PERLQQ> flag (thus user-specified
Packit d0f5c2
L<CHECK flags|Encode/Handling Malformed Data> were ignored).  Moreover, it
Packit d0f5c2
automatically croaked when a MIME encoded-word contained unknown encoding.
Packit d0f5c2
Since version 2.24, this module uses
Packit d0f5c2
L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder and decoder.  And
Packit d0f5c2
L<CHECK flags|Encode/Handling Malformed Data> are correctly propagated.
Packit d0f5c2
Packit d0f5c2
Since version 2.22 (part of Encode 2.83), the MIME encoder should be fully
Packit d0f5c2
compliant to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
Packit d0f5c2
L<RFC 2231|https://tools.ietf.org/html/rfc2231>.  Due to the aforementioned
Packit d0f5c2
bugs in previous versions of the MIME encoder, there is a I<less strict>
Packit d0f5c2
compatible mode for the MIME decoder which is used by default.  It should be
Packit d0f5c2
able to decode MIME encoded-words encoded by pre 2.22 versions of this module.
Packit d0f5c2
However, note that this is not correct according to
Packit d0f5c2
L<RFC 2047|https://tools.ietf.org/html/rfc2047>.
Packit d0f5c2
Packit d0f5c2
In default I<not strict> mode the MIME decoder attempts to decode every substring
Packit d0f5c2
which looks like a MIME encoded-word.  Therefore, the MIME encoded-words do not
Packit d0f5c2
need to be separated by white space.  To enforce a correct I<strict> mode, set
Packit d0f5c2
variable C<$Encode::MIME::Header::STRICT_DECODE> to 1 e.g. by localizing:
Packit d0f5c2
Packit d0f5c2
  use Encode::MIME::Header;
Packit d0f5c2
  local $Encode::MIME::Header::STRICT_DECODE = 1;
Packit d0f5c2
Packit d0f5c2
=head1 AUTHORS
Packit d0f5c2
Packit d0f5c2
Pali E<lt>pali@cpan.orgE<gt>
Packit d0f5c2
Packit d0f5c2
=head1 SEE ALSO
Packit d0f5c2
Packit d0f5c2
L<Encode>,
Packit d0f5c2
L<RFC 822|https://tools.ietf.org/html/rfc822>,
Packit d0f5c2
L<RFC 2047|https://tools.ietf.org/html/rfc2047>,
Packit d0f5c2
L<RFC 2231|https://tools.ietf.org/html/rfc2231>
Packit d0f5c2
Packit d0f5c2
=cut