Tree - source-git/perl-HTTP-Message

source-git / perl-HTTP-Message

Files

Commit: a09cf70a7aff5f1a94912bb2658f1fe692897e6d
Blob Blame History Raw
package HTTP::Message;

use strict;
use warnings;

our $VERSION = '6.18';

require HTTP::Headers;
require Carp;

my $CRLF = "\015\012";   # "\r\n" is not portable
unless ($HTTP::URI_CLASS) {
    if ($ENV{PERL_HTTP_URI_CLASS}
    &&  $ENV{PERL_HTTP_URI_CLASS} =~ /^([\w:]+)$/) {
        $HTTP::URI_CLASS = $1;
    } else {
        $HTTP::URI_CLASS = "URI";
    }
}
eval "require $HTTP::URI_CLASS"; die $@ if $@;

*_utf8_downgrade = defined(&utf8::downgrade) ?
    sub {
        utf8::downgrade($_[0], 1) or
            Carp::croak("HTTP::Message content must be bytes")
    }
    :
    sub {
    };

sub new
{
    my($class, $header, $content) = @_;
    if (defined $header) {
	Carp::croak("Bad header argument") unless ref $header;
        if (ref($header) eq "ARRAY") {
	    $header = HTTP::Headers->new(@$header);
	}
	else {
	    $header = $header->clone;
	}
    }
    else {
	$header = HTTP::Headers->new;
    }
    if (defined $content) {
        _utf8_downgrade($content);
    }
    else {
        $content = '';
    }

    bless {
	'_headers' => $header,
	'_content' => $content,
    }, $class;
}


sub parse
{
    my($class, $str) = @_;

    my @hdr;
    while (1) {
	if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) {
	    push(@hdr, $1, $2);
	    $hdr[-1] =~ s/\r\z//;
	}
	elsif (@hdr && $str =~ s/^([ \t].*)\n?//) {
	    $hdr[-1] .= "\n$1";
	    $hdr[-1] =~ s/\r\z//;
	}
	else {
	    $str =~ s/^\r?\n//;
	    last;
	}
    }
    local $HTTP::Headers::TRANSLATE_UNDERSCORE;
    new($class, \@hdr, $str);
}


sub clone
{
    my $self  = shift;
    my $clone = HTTP::Message->new($self->headers,
				   $self->content);
    $clone->protocol($self->protocol);
    $clone;
}


sub clear {
    my $self = shift;
    $self->{_headers}->clear;
    $self->content("");
    delete $self->{_parts};
    return;
}


sub protocol {
    shift->_elem('_protocol',  @_);
}

sub headers {
    my $self = shift;

    # recalculation of _content might change headers, so we
    # need to force it now
    $self->_content unless exists $self->{_content};

    $self->{_headers};
}

sub headers_as_string {
    shift->headers->as_string(@_);
}


sub content  {

    my $self = $_[0];
    if (defined(wantarray)) {
	$self->_content unless exists $self->{_content};
	my $old = $self->{_content};
	$old = $$old if ref($old) eq "SCALAR";
	&_set_content if @_ > 1;
	return $old;
    }

    if (@_ > 1) {
	&_set_content;
    }
    else {
	Carp::carp("Useless content call in void context") if $^W;
    }
}


sub _set_content {
    my $self = $_[0];
    _utf8_downgrade($_[1]);
    if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") {
	${$self->{_content}} = defined( $_[1] ) ? $_[1] : '';
    }
    else {
	die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR";
	$self->{_content} = defined( $_[1] ) ? $_[1] : '';
	delete $self->{_content_ref};
    }
    delete $self->{_parts} unless $_[2];
}


sub add_content
{
    my $self = shift;
    $self->_content unless exists $self->{_content};
    my $chunkref = \$_[0];
    $chunkref = $$chunkref if ref($$chunkref);  # legacy

    _utf8_downgrade($$chunkref);

    my $ref = ref($self->{_content});
    if (!$ref) {
	$self->{_content} .= $$chunkref;
    }
    elsif ($ref eq "SCALAR") {
	${$self->{_content}} .= $$chunkref;
    }
    else {
	Carp::croak("Can't append to $ref content");
    }
    delete $self->{_parts};
}

sub add_content_utf8 {
    my($self, $buf)  = @_;
    utf8::upgrade($buf);
    utf8::encode($buf);
    $self->add_content($buf);
}

sub content_ref
{
    my $self = shift;
    $self->_content unless exists $self->{_content};
    delete $self->{_parts};
    my $old = \$self->{_content};
    my $old_cref = $self->{_content_ref};
    if (@_) {
	my $new = shift;
	Carp::croak("Setting content_ref to a non-ref") unless ref($new);
	delete $self->{_content};  # avoid modifying $$old
	$self->{_content} = $new;
	$self->{_content_ref}++;
    }
    $old = $$old if $old_cref;
    return $old;
}


sub content_charset
{
    my $self = shift;
    if (my $charset = $self->content_type_charset) {
	return $charset;
    }

    # time to start guessing
    my $cref = $self->decoded_content(ref => 1, charset => "none");

    # Unicode BOM
    for ($$cref) {
	return "UTF-8"     if /^\xEF\xBB\xBF/;
	return "UTF-32LE" if /^\xFF\xFE\x00\x00/;
	return "UTF-32BE" if /^\x00\x00\xFE\xFF/;
	return "UTF-16LE" if /^\xFF\xFE/;
	return "UTF-16BE" if /^\xFE\xFF/;
    }

    if ($self->content_is_xml) {
	# http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
	# XML entity not accompanied by external encoding information and not
	# in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration,
	# in which the first characters must be '<?xml'
	for ($$cref) {
	    return "UTF-32BE" if /^\x00\x00\x00</;
	    return "UTF-32LE" if /^<\x00\x00\x00/;
	    return "UTF-16BE" if /^(?:\x00\s)*\x00</;
	    return "UTF-16LE" if /^(?:\s\x00)*<\x00/;
	    if (/^\s*(<\?xml[^\x00]*?\?>)/) {
		if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) {
		    my $enc = $2;
		    $enc =~ s/^\s+//; $enc =~ s/\s+\z//;
		    return $enc if $enc;
		}
	    }
	}
	return "UTF-8";
    }
    elsif ($self->content_is_html) {
	# look for <META charset="..."> or <META content="...">
	# http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding
	require IO::HTML;
	# Use relaxed search to match previous versions of HTTP::Message:
	my $encoding = IO::HTML::find_charset_in($$cref, { encoding    => 1,
	                                                   need_pragma => 0 });
	return $encoding->mime_name if $encoding;
    }
    elsif ($self->content_type eq "application/json") {
	for ($$cref) {
	    # RFC 4627, ch 3
	    return "UTF-32BE" if /^\x00\x00\x00./s;
	    return "UTF-32LE" if /^.\x00\x00\x00/s;
	    return "UTF-16BE" if /^\x00.\x00./s;
	    return "UTF-16LE" if /^.\x00.\x00/s;
	    return "UTF-8";
	}
    }
    if ($self->content_type =~ /^text\//) {
	for ($$cref) {
	    if (length) {
		return "US-ASCII" unless /[\x80-\xFF]/;
		require Encode;
		eval {
		    Encode::decode_utf8($_, Encode::FB_CROAK() | Encode::LEAVE_SRC());
		};
		return "UTF-8" unless $@;
		return "ISO-8859-1";
	    }
	}
    }

    return undef;
}


sub decoded_content
{
    my($self, %opt) = @_;
    my $content_ref;
    my $content_ref_iscopy;

    eval {
	$content_ref = $self->content_ref;
	die "Can't decode ref content" if ref($content_ref) ne "SCALAR";

	if (my $h = $self->header("Content-Encoding")) {
	    $h =~ s/^\s+//;
	    $h =~ s/\s+$//;
	    for my $ce (reverse split(/\s*,\s*/, lc($h))) {
		next unless $ce;
		next if $ce eq "identity" || $ce eq "none";
		if ($ce eq "gzip" || $ce eq "x-gzip") {
		    require IO::Uncompress::Gunzip;
		    my $output;
		    IO::Uncompress::Gunzip::gunzip($content_ref, \$output, Transparent => 0)
			or die "Can't gunzip content: $IO::Uncompress::Gunzip::GunzipError";
		    $content_ref = \$output;
		    $content_ref_iscopy++;
		}
		elsif ($ce eq "x-bzip2" or $ce eq "bzip2") {
		    require IO::Uncompress::Bunzip2;
		    my $output;
		    IO::Uncompress::Bunzip2::bunzip2($content_ref, \$output, Transparent => 0)
			or die "Can't bunzip content: $IO::Uncompress::Bunzip2::Bunzip2Error";
		    $content_ref = \$output;
		    $content_ref_iscopy++;
		}
		elsif ($ce eq "deflate") {
		    require IO::Uncompress::Inflate;
		    my $output;
		    my $status = IO::Uncompress::Inflate::inflate($content_ref, \$output, Transparent => 0);
		    my $error = $IO::Uncompress::Inflate::InflateError;
		    unless ($status) {
			# "Content-Encoding: deflate" is supposed to mean the
			# "zlib" format of RFC 1950, but Microsoft got that
			# wrong, so some servers sends the raw compressed
			# "deflate" data.  This tries to inflate this format.
			$output = undef;
			require IO::Uncompress::RawInflate;
			unless (IO::Uncompress::RawInflate::rawinflate($content_ref, \$output)) {
			    $self->push_header("Client-Warning" =>
				"Could not raw inflate content: $IO::Uncompress::RawInflate::RawInflateError");
			    $output = undef;
			}
		    }
		    die "Can't inflate content: $error" unless defined $output;
		    $content_ref = \$output;
		    $content_ref_iscopy++;
		}
		elsif ($ce eq "compress" || $ce eq "x-compress") {
		    die "Can't uncompress content";
		}
		elsif ($ce eq "base64") {  # not really C-T-E, but should be harmless
		    require MIME::Base64;
		    $content_ref = \MIME::Base64::decode($$content_ref);
		    $content_ref_iscopy++;
		}
		elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
		    require MIME::QuotedPrint;
		    $content_ref = \MIME::QuotedPrint::decode($$content_ref);
		    $content_ref_iscopy++;
		}
		else {
		    die "Don't know how to decode Content-Encoding '$ce'";
		}
	    }
	}

	if ($self->content_is_text || (my $is_xml = $self->content_is_xml)) {
	    my $charset = lc(
	        $opt{charset} ||
		$self->content_type_charset ||
		$opt{default_charset} ||
		$self->content_charset ||
		"ISO-8859-1"
	    );
	    if ($charset eq "none") {
		# leave it as is
	    }
	    elsif ($charset eq "us-ascii" || $charset eq "iso-8859-1") {
		if ($$content_ref =~ /[^\x00-\x7F]/ && defined &utf8::upgrade) {
		    unless ($content_ref_iscopy) {
			my $copy = $$content_ref;
			$content_ref = \$copy;
			$content_ref_iscopy++;
		    }
		    utf8::upgrade($$content_ref);
		}
	    }
	    else {
		require Encode;
		eval {
		    $content_ref = \Encode::decode($charset, $$content_ref,
			 ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC());
		};
		if ($@) {
		    my $retried;
		    if ($@ =~ /^Unknown encoding/) {
			my $alt_charset = lc($opt{alt_charset} || "");
			if ($alt_charset && $charset ne $alt_charset) {
			    # Retry decoding with the alternative charset
			    $content_ref = \Encode::decode($alt_charset, $$content_ref,
				 ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC())
			        unless $alt_charset eq "none";
			    $retried++;
			}
		    }
		    die unless $retried;
		}
		die "Encode::decode() returned undef improperly" unless defined $$content_ref;
		if ($is_xml) {
		    # Get rid of the XML encoding declaration if present
		    $$content_ref =~ s/^\x{FEFF}//;
		    if ($$content_ref =~ /^(\s*<\?xml[^\x00]*?\?>)/) {
			substr($$content_ref, 0, length($1)) =~ s/\sencoding\s*=\s*(["']).*?\1//;
		    }
		}
	    }
	}
    };
    if ($@) {
	Carp::croak($@) if $opt{raise_error};
	return undef;
    }

    return $opt{ref} ? $content_ref : $$content_ref;
}


sub decodable
{
    # should match the Content-Encoding values that decoded_content can deal with
    my $self = shift;
    my @enc;
    # XXX preferably we should determine if the modules are available without loading
    # them here
    eval {
        require IO::Uncompress::Gunzip;
        push(@enc, "gzip", "x-gzip");
    };
    eval {
        require IO::Uncompress::Inflate;
        require IO::Uncompress::RawInflate;
        push(@enc, "deflate");
    };
    eval {
        require IO::Uncompress::Bunzip2;
        push(@enc, "x-bzip2", "bzip2");
    };
    # we don't care about announcing the 'identity', 'base64' and
    # 'quoted-printable' stuff
    return wantarray ? @enc : join(", ", @enc);
}


sub decode
{
    my $self = shift;
    return 1 unless $self->header("Content-Encoding");
    if (defined(my $content = $self->decoded_content(charset => "none"))) {
	$self->remove_header("Content-Encoding", "Content-Length", "Content-MD5");
	$self->content($content);
	return 1;
    }
    return 0;
}


sub encode
{
    my($self, @enc) = @_;

    Carp::croak("Can't encode multipart/* messages") if $self->content_type =~ m,^multipart/,;
    Carp::croak("Can't encode message/* messages") if $self->content_type =~ m,^message/,;

    return 1 unless @enc;  # nothing to do

    my $content = $self->content;
    for my $encoding (@enc) {
	if ($encoding eq "identity" || $encoding eq "none") {
	    # nothing to do
	}
	elsif ($encoding eq "base64") {
	    require MIME::Base64;
	    $content = MIME::Base64::encode($content);
	}
	elsif ($encoding eq "gzip" || $encoding eq "x-gzip") {
	    require IO::Compress::Gzip;
	    my $output;
	    IO::Compress::Gzip::gzip(\$content, \$output, Minimal => 1)
		or die "Can't gzip content: $IO::Compress::Gzip::GzipError";
	    $content = $output;
	}
	elsif ($encoding eq "deflate") {
	    require IO::Compress::Deflate;
	    my $output;
	    IO::Compress::Deflate::deflate(\$content, \$output)
		or die "Can't deflate content: $IO::Compress::Deflate::DeflateError";
	    $content = $output;
	}
	elsif ($encoding eq "x-bzip2" || $encoding eq "bzip2") {
	    require IO::Compress::Bzip2;
	    my $output;
	    IO::Compress::Bzip2::bzip2(\$content, \$output)
		or die "Can't bzip2 content: $IO::Compress::Bzip2::Bzip2Error";
	    $content = $output;
	}
	elsif ($encoding eq "rot13") {  # for the fun of it
	    $content =~ tr/A-Za-z/N-ZA-Mn-za-m/;
	}
	else {
	    return 0;
	}
    }
    my $h = $self->header("Content-Encoding");
    unshift(@enc, $h) if $h;
    $self->header("Content-Encoding", join(", ", @enc));
    $self->remove_header("Content-Length", "Content-MD5");
    $self->content($content);
    return 1;
}


sub as_string
{
    my($self, $eol) = @_;
    $eol = "\n" unless defined $eol;

    # The calculation of content might update the headers
    # so we need to do that first.
    my $content = $self->content;

    return join("", $self->{'_headers'}->as_string($eol),
		    $eol,
		    $content,
		    (@_ == 1 && length($content) &&
		     $content !~ /\n\z/) ? "\n" : "",
		);
}


sub dump
{
    my($self, %opt) = @_;
    my $content = $self->content;
    my $chopped = 0;
    if (!ref($content)) {
	my $maxlen = $opt{maxlength};
	$maxlen = 512 unless defined($maxlen);
	if ($maxlen && length($content) > $maxlen * 1.1 + 3) {
	    $chopped = length($content) - $maxlen;
	    $content = substr($content, 0, $maxlen) . "...";
	}

	$content =~ s/\\/\\\\/g;
	$content =~ s/\t/\\t/g;
	$content =~ s/\r/\\r/g;

	# no need for 3 digits in escape for these
	$content =~ s/([\0-\11\13-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;

	$content =~ s/([\0-\11\13-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
	$content =~ s/([^\12\040-\176])/sprintf('\\x{%X}',ord($1))/eg;

	# remaining whitespace
	$content =~ s/( +)\n/("\\40" x length($1)) . "\n"/eg;
	$content =~ s/(\n+)\n/("\\n" x length($1)) . "\n"/eg;
	$content =~ s/\n\z/\\n/;

	my $no_content = $opt{no_content};
	$no_content = "(no content)" unless defined $no_content;
	if ($content eq $no_content) {
	    # escape our $no_content marker
	    $content =~ s/^(.)/sprintf('\\x%02X',ord($1))/eg;
	}
	elsif ($content eq "") {
	    $content = $no_content;
	}
    }

    my @dump;
    push(@dump, $opt{preheader}) if $opt{preheader};
    push(@dump, $self->{_headers}->as_string, $content);
    push(@dump, "(+ $chopped more bytes not shown)") if $chopped;

    my $dump = join("\n", @dump, "");
    $dump =~ s/^/$opt{prefix}/gm if $opt{prefix};

    print $dump unless defined wantarray;
    return $dump;
}

# allow subclasses to override what will handle individual parts
sub _part_class {
    return __PACKAGE__;
}

sub parts {
    my $self = shift;
    if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) {
	$self->_parts;
    }
    my $old = $self->{_parts};
    if (@_) {
	my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_;
	my $ct = $self->content_type || "";
	if ($ct =~ m,^message/,) {
	    Carp::croak("Only one part allowed for $ct content")
		if @parts > 1;
	}
	elsif ($ct !~ m,^multipart/,) {
	    $self->remove_content_headers;
	    $self->content_type("multipart/mixed");
	}
	$self->{_parts} = \@parts;
	_stale_content($self);
    }
    return @$old if wantarray;
    return $old->[0];
}

sub add_part {
    my $self = shift;
    if (($self->content_type || "") !~ m,^multipart/,) {
	my $p = $self->_part_class->new(
	    $self->remove_content_headers,
	    $self->content(""),
	);
	$self->content_type("multipart/mixed");
	$self->{_parts} = [];
        if ($p->headers->header_field_names || $p->content ne "") {
            push(@{$self->{_parts}}, $p);
        }
    }
    elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") {
	$self->_parts;
    }

    push(@{$self->{_parts}}, @_);
    _stale_content($self);
    return;
}

sub _stale_content {
    my $self = shift;
    if (ref($self->{_content}) eq "SCALAR") {
	# must recalculate now
	$self->_content;
    }
    else {
	# just invalidate cache
	delete $self->{_content};
	delete $self->{_content_ref};
    }
}


# delegate all other method calls to the headers object.
our $AUTOLOAD;
sub AUTOLOAD
{
    my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);

    # We create the function here so that it will not need to be
    # autoloaded the next time.
    no strict 'refs';
    *$method = sub { local $Carp::Internal{+__PACKAGE__} = 1; shift->headers->$method(@_) };
    goto &$method;
}


sub DESTROY {}  # avoid AUTOLOADing it


# Private method to access members in %$self
sub _elem
{
    my $self = shift;
    my $elem = shift;
    my $old = $self->{$elem};
    $self->{$elem} = $_[0] if @_;
    return $old;
}


# Create private _parts attribute from current _content
sub _parts {
    my $self = shift;
    my $ct = $self->content_type;
    if ($ct =~ m,^multipart/,) {
	require HTTP::Headers::Util;
	my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type"));
	die "Assert" unless @h;
	my %h = @{$h[0]};
	if (defined(my $b = $h{boundary})) {
	    my $str = $self->content;
	    $str =~ s/\r?\n--\Q$b\E--.*//s;
	    if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) {
		$self->{_parts} = [map $self->_part_class->parse($_),
				   split(/\r?\n--\Q$b\E\r?\n/, $str)]
	    }
	}
    }
    elsif ($ct eq "message/http") {
	require HTTP::Request;
	require HTTP::Response;
	my $content = $self->content;
	my $class = ($content =~ m,^(HTTP/.*)\n,) ?
	    "HTTP::Response" : "HTTP::Request";
	$self->{_parts} = [$class->parse($content)];
    }
    elsif ($ct =~ m,^message/,) {
	$self->{_parts} = [ $self->_part_class->parse($self->content) ];
    }

    $self->{_parts} ||= [];
}


# Create private _content attribute from current _parts
sub _content {
    my $self = shift;
    my $ct = $self->{_headers}->header("Content-Type") || "multipart/mixed";
    if ($ct =~ m,^\s*message/,i) {
	_set_content($self, $self->{_parts}[0]->as_string($CRLF), 1);
	return;
    }

    require HTTP::Headers::Util;
    my @v = HTTP::Headers::Util::split_header_words($ct);
    Carp::carp("Multiple Content-Type headers") if @v > 1;
    @v = @{$v[0]};

    my $boundary;
    my $boundary_index;
    for (my @tmp = @v; @tmp;) {
	my($k, $v) = splice(@tmp, 0, 2);
	if ($k eq "boundary") {
	    $boundary = $v;
	    $boundary_index = @v - @tmp - 1;
	    last;
	}
    }

    my @parts = map $_->as_string($CRLF), @{$self->{_parts}};

    my $bno = 0;
    $boundary = _boundary() unless defined $boundary;
 CHECK_BOUNDARY:
    {
	for (@parts) {
	    if (index($_, $boundary) >= 0) {
		# must have a better boundary
		$boundary = _boundary(++$bno);
		redo CHECK_BOUNDARY;
	    }
	}
    }

    if ($boundary_index) {
	$v[$boundary_index] = $boundary;
    }
    else {
	push(@v, boundary => $boundary);
    }

    $ct = HTTP::Headers::Util::join_header_words(@v);
    $self->{_headers}->header("Content-Type", $ct);

    _set_content($self, "--$boundary$CRLF" .
	                join("$CRLF--$boundary$CRLF", @parts) .
			"$CRLF--$boundary--$CRLF",
                        1);
}


sub _boundary
{
    my $size = shift || return "xYzZY";
    require MIME::Base64;
    my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
    $b =~ s/[\W]/X/g;  # ensure alnum only
    $b;
}


1;

=pod

=encoding UTF-8

=head1 NAME

HTTP::Message - HTTP style message (base class)

=head1 VERSION

version 6.18

=head1 SYNOPSIS

 use base 'HTTP::Message';

=head1 DESCRIPTION

An C<HTTP::Message> object contains some headers and a content body.
The following methods are available:

=over 4

=item $mess = HTTP::Message->new

=item $mess = HTTP::Message->new( $headers )

=item $mess = HTTP::Message->new( $headers, $content )

This constructs a new message object.  Normally you would want
construct C<HTTP::Request> or C<HTTP::Response> objects instead.

The optional $header argument should be a reference to an
C<HTTP::Headers> object or a plain array reference of key/value pairs.
If an C<HTTP::Headers> object is provided then a copy of it will be
embedded into the constructed message, i.e. it will not be owned and
can be modified afterwards without affecting the message.

The optional $content argument should be a string of bytes.

=item $mess = HTTP::Message->parse( $str )

This constructs a new message object by parsing the given string.

=item $mess->headers

Returns the embedded C<HTTP::Headers> object.

=item $mess->headers_as_string

=item $mess->headers_as_string( $eol )

Call the as_string() method for the headers in the
message.  This will be the same as

    $mess->headers->as_string

but it will make your program a whole character shorter :-)

=item $mess->content

=item $mess->content( $bytes )

The content() method sets the raw content if an argument is given.  If no
argument is given the content is not touched.  In either case the
original raw content is returned.

If the C<undef> argument is given, the content is reset to its default value,
which is an empty string.

Note that the content should be a string of bytes.  Strings in perl
can contain characters outside the range of a byte.  The C<Encode>
module can be used to turn such strings into a string of bytes.

=item $mess->add_content( $bytes )

The add_content() methods appends more data bytes to the end of the
current content buffer.

=item $mess->add_content_utf8( $string )

The add_content_utf8() method appends the UTF-8 bytes representing the
string to the end of the current content buffer.

=item $mess->content_ref

=item $mess->content_ref( \$bytes )

The content_ref() method will return a reference to content buffer string.
It can be more efficient to access the content this way if the content
is huge, and it can even be used for direct manipulation of the content,
for instance:

  ${$res->content_ref} =~ s/\bfoo\b/bar/g;

This example would modify the content buffer in-place.

If an argument is passed it will setup the content to reference some
external source.  The content() and add_content() methods
will automatically dereference scalar references passed this way.  For
other references content() will return the reference itself and
add_content() will refuse to do anything.

=item $mess->content_charset

This returns the charset used by the content in the message.  The
charset is either found as the charset attribute of the
C<Content-Type> header or by guessing.

See L<http://www.w3.org/TR/REC-html40/charset.html#spec-char-encoding>
for details about how charset is determined.

=item $mess->decoded_content( %options )

Returns the content with any C<Content-Encoding> undone and for textual content
the raw content encoded to Perl's Unicode strings.  If the C<Content-Encoding>
or C<charset> of the message is unknown this method will fail by returning
C<undef>.

The following options can be specified.

=over

=item C<charset>

This override the charset parameter for text content.  The value
C<none> can used to suppress decoding of the charset.

=item C<default_charset>

This override the default charset guessed by content_charset() or
if that fails "ISO-8859-1".

=item C<alt_charset>

If decoding fails because the charset specified in the Content-Type header
isn't recognized by Perl's Encode module, then try decoding using this charset
instead of failing.  The C<alt_charset> might be specified as C<none> to simply
return the string without any decoding of charset as alternative.

=item C<charset_strict>

Abort decoding if malformed characters is found in the content.  By
default you get the substitution character ("\x{FFFD}") in place of
malformed characters.

=item C<raise_error>

If TRUE then raise an exception if not able to decode content.  Reason
might be that the specified C<Content-Encoding> or C<charset> is not
supported.  If this option is FALSE, then decoded_content() will return
C<undef> on errors, but will still set $@.

=item C<ref>

If TRUE then a reference to decoded content is returned.  This might
be more efficient in cases where the decoded content is identical to
the raw content as no data copying is required in this case.

=back

=item $mess->decodable

=item HTTP::Message::decodable()

This returns the encoding identifiers that decoded_content() can
process.  In scalar context returns a comma separated string of
identifiers.

This value is suitable for initializing the C<Accept-Encoding> request
header field.

=item $mess->decode

This method tries to replace the content of the message with the
decoded version and removes the C<Content-Encoding> header.  Returns
TRUE if successful and FALSE if not.

If the message does not have a C<Content-Encoding> header this method
does nothing and returns TRUE.

Note that the content of the message is still bytes after this method
has been called and you still need to call decoded_content() if you
want to process its content as a string.

=item $mess->encode( $encoding, ... )

Apply the given encodings to the content of the message.  Returns TRUE
if successful. The "identity" (non-)encoding is always supported; other
currently supported encodings, subject to availability of required
additional modules, are "gzip", "deflate", "x-bzip2" and "base64".

A successful call to this function will set the C<Content-Encoding>
header.

Note that C<multipart/*> or C<message/*> messages can't be encoded and
this method will croak if you try.

=item $mess->parts

=item $mess->parts( @parts )

=item $mess->parts( \@parts )

Messages can be composite, i.e. contain other messages.  The composite
messages have a content type of C<multipart/*> or C<message/*>.  This
method give access to the contained messages.

The argumentless form will return a list of C<HTTP::Message> objects.
If the content type of $msg is not C<multipart/*> or C<message/*> then
this will return the empty list.  In scalar context only the first
object is returned.  The returned message parts should be regarded as
read-only (future versions of this library might make it possible
to modify the parent by modifying the parts).

If the content type of $msg is C<message/*> then there will only be
one part returned.

If the content type is C<message/http>, then the return value will be
either an C<HTTP::Request> or an C<HTTP::Response> object.

If a @parts argument is given, then the content of the message will be
modified. The array reference form is provided so that an empty list
can be provided.  The @parts array should contain C<HTTP::Message>
objects.  The @parts objects are owned by $mess after this call and
should not be modified or made part of other messages.

When updating the message with this method and the old content type of
$mess is not C<multipart/*> or C<message/*>, then the content type is
set to C<multipart/mixed> and all other content headers are cleared.

This method will croak if the content type is C<message/*> and more
than one part is provided.

=item $mess->add_part( $part )

This will add a part to a message.  The $part argument should be
another C<HTTP::Message> object.  If the previous content type of
$mess is not C<multipart/*> then the old content (together with all
content headers) will be made part #1 and the content type made
C<multipart/mixed> before the new part is added.  The $part object is
owned by $mess after this call and should not be modified or made part
of other messages.

There is no return value.

=item $mess->clear

Will clear the headers and set the content to the empty string.  There
is no return value

=item $mess->protocol

=item $mess->protocol( $proto )

Sets the HTTP protocol used for the message.  The protocol() is a string
like C<HTTP/1.0> or C<HTTP/1.1>.

=item $mess->clone

Returns a copy of the message object.

=item $mess->as_string

=item $mess->as_string( $eol )

Returns the message formatted as a single string.

The optional $eol parameter specifies the line ending sequence to use.
The default is "\n".  If no $eol is given then as_string will ensure
that the returned string is newline terminated (even when the message
content is not).  No extra newline is appended if an explicit $eol is
passed.

=item $mess->dump( %opt )

Returns the message formatted as a string.  In void context print the string.

This differs from C<< $mess->as_string >> in that it escapes the bytes
of the content so that it's safe to print them and it limits how much
content to print.  The escapes syntax used is the same as for Perl's
double quoted strings.  If there is no content the string "(no
content)" is shown in its place.

Options to influence the output can be passed as key/value pairs. The
following options are recognized:

=over

=item maxlength => $num

How much of the content to show.  The default is 512.  Set this to 0
for unlimited.

If the content is longer then the string is chopped at the limit and
the string "...\n(### more bytes not shown)" appended.

=item no_content => $str

Replaces the "(no content)" marker.

=item prefix => $str

A string that will be prefixed to each line of the dump.

=back

=back

All methods unknown to C<HTTP::Message> itself are delegated to the
C<HTTP::Headers> object that is part of every message.  This allows
convenient access to these methods.  Refer to L<HTTP::Headers> for
details of these methods:

    $mess->header( $field => $val )
    $mess->push_header( $field => $val )
    $mess->init_header( $field => $val )
    $mess->remove_header( $field )
    $mess->remove_content_headers
    $mess->header_field_names
    $mess->scan( \&doit )

    $mess->date
    $mess->expires
    $mess->if_modified_since
    $mess->if_unmodified_since
    $mess->last_modified
    $mess->content_type
    $mess->content_encoding
    $mess->content_length
    $mess->content_language
    $mess->title
    $mess->user_agent
    $mess->server
    $mess->from
    $mess->referer
    $mess->www_authenticate
    $mess->authorization
    $mess->proxy_authorization
    $mess->authorization_basic
    $mess->proxy_authorization_basic

=head1 AUTHOR

Gisle Aas <gisle@activestate.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 1994-2017 by Gisle Aas.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut

__END__


#ABSTRACT: HTTP style message (base class)
source-git / perl-HTTP-Message

Source Code

Files