Blame lib/HTTP/Headers/Util.pm

Packit a09cf7
package HTTP::Headers::Util;
Packit a09cf7
Packit a09cf7
use strict;
Packit a09cf7
use warnings;
Packit a09cf7
Packit a09cf7
our $VERSION = '6.18';
Packit a09cf7
Packit a09cf7
use base 'Exporter';
Packit a09cf7
Packit a09cf7
our @EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
Packit a09cf7
Packit a09cf7
Packit a09cf7
sub split_header_words {
Packit a09cf7
    my @res = &_split_header_words;
Packit a09cf7
    for my $arr (@res) {
Packit a09cf7
	for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
Packit a09cf7
	    $arr->[$i] = lc($arr->[$i]);
Packit a09cf7
	}
Packit a09cf7
    }
Packit a09cf7
    return @res;
Packit a09cf7
}
Packit a09cf7
Packit a09cf7
sub _split_header_words
Packit a09cf7
{
Packit a09cf7
    my(@val) = @_;
Packit a09cf7
    my @res;
Packit a09cf7
    for (@val) {
Packit a09cf7
	my @cur;
Packit a09cf7
	while (length) {
Packit a09cf7
	    if (s/^\s*(=*[^\s=;,]+)//) {  # 'token' or parameter 'attribute'
Packit a09cf7
		push(@cur, $1);
Packit a09cf7
		# a quoted value
Packit a09cf7
		if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
Packit a09cf7
		    my $val = $1;
Packit a09cf7
		    $val =~ s/\\(.)/$1/g;
Packit a09cf7
		    push(@cur, $val);
Packit a09cf7
		# some unquoted value
Packit a09cf7
		}
Packit a09cf7
		elsif (s/^\s*=\s*([^;,\s]*)//) {
Packit a09cf7
		    my $val = $1;
Packit a09cf7
		    $val =~ s/\s+$//;
Packit a09cf7
		    push(@cur, $val);
Packit a09cf7
		# no value, a lone token
Packit a09cf7
		}
Packit a09cf7
		else {
Packit a09cf7
		    push(@cur, undef);
Packit a09cf7
		}
Packit a09cf7
	    }
Packit a09cf7
	    elsif (s/^\s*,//) {
Packit a09cf7
		push(@res, [@cur]) if @cur;
Packit a09cf7
		@cur = ();
Packit a09cf7
	    }
Packit a09cf7
	    elsif (s/^\s*;// || s/^\s+//) {
Packit a09cf7
		# continue
Packit a09cf7
	    }
Packit a09cf7
	    else {
Packit a09cf7
		die "This should not happen: '$_'";
Packit a09cf7
	    }
Packit a09cf7
	}
Packit a09cf7
	push(@res, \@cur) if @cur;
Packit a09cf7
    }
Packit a09cf7
    @res;
Packit a09cf7
}
Packit a09cf7
Packit a09cf7
Packit a09cf7
sub join_header_words
Packit a09cf7
{
Packit a09cf7
    @_ = ([@_]) if @_ && !ref($_[0]);
Packit a09cf7
    my @res;
Packit a09cf7
    for (@_) {
Packit a09cf7
	my @cur = @$_;
Packit a09cf7
	my @attr;
Packit a09cf7
	while (@cur) {
Packit a09cf7
	    my $k = shift @cur;
Packit a09cf7
	    my $v = shift @cur;
Packit a09cf7
	    if (defined $v) {
Packit a09cf7
		if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
Packit a09cf7
		    $v =~ s/([\"\\])/\\$1/g;  # escape " and \
Packit a09cf7
		    $k .= qq(="$v");
Packit a09cf7
		}
Packit a09cf7
		else {
Packit a09cf7
		    # token
Packit a09cf7
		    $k .= "=$v";
Packit a09cf7
		}
Packit a09cf7
	    }
Packit a09cf7
	    push(@attr, $k);
Packit a09cf7
	}
Packit a09cf7
	push(@res, join("; ", @attr)) if @attr;
Packit a09cf7
    }
Packit a09cf7
    join(", ", @res);
Packit a09cf7
}
Packit a09cf7
Packit a09cf7
Packit a09cf7
1;
Packit a09cf7
Packit a09cf7
=pod
Packit a09cf7
Packit a09cf7
=encoding UTF-8
Packit a09cf7
Packit a09cf7
=head1 NAME
Packit a09cf7
Packit a09cf7
HTTP::Headers::Util - Header value parsing utility functions
Packit a09cf7
Packit a09cf7
=head1 VERSION
Packit a09cf7
Packit a09cf7
version 6.18
Packit a09cf7
Packit a09cf7
=head1 SYNOPSIS
Packit a09cf7
Packit a09cf7
  use HTTP::Headers::Util qw(split_header_words);
Packit a09cf7
  @values = split_header_words($h->header("Content-Type"));
Packit a09cf7
Packit a09cf7
=head1 DESCRIPTION
Packit a09cf7
Packit a09cf7
This module provides a few functions that helps parsing and
Packit a09cf7
construction of valid HTTP header values.  None of the functions are
Packit a09cf7
exported by default.
Packit a09cf7
Packit a09cf7
The following functions are available:
Packit a09cf7
Packit a09cf7
=over 4
Packit a09cf7
Packit a09cf7
=item split_header_words( @header_values )
Packit a09cf7
Packit a09cf7
This function will parse the header values given as argument into a
Packit a09cf7
list of anonymous arrays containing key/value pairs.  The function
Packit a09cf7
knows how to deal with ",", ";" and "=" as well as quoted values after
Packit a09cf7
"=".  A list of space separated tokens are parsed as if they were
Packit a09cf7
separated by ";".
Packit a09cf7
Packit a09cf7
If the @header_values passed as argument contains multiple values,
Packit a09cf7
then they are treated as if they were a single value separated by
Packit a09cf7
comma ",".
Packit a09cf7
Packit a09cf7
This means that this function is useful for parsing header fields that
Packit a09cf7
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
Packit a09cf7
the requirement for tokens).
Packit a09cf7
Packit a09cf7
  headers           = #header
Packit a09cf7
  header            = (token | parameter) *( [";"] (token | parameter))
Packit a09cf7
Packit a09cf7
  token             = 1*<any CHAR except CTLs or separators>
Packit a09cf7
  separators        = "(" | ")" | "<" | ">" | "@"
Packit a09cf7
                    | "," | ";" | ":" | "\" | <">
Packit a09cf7
                    | "/" | "[" | "]" | "?" | "="
Packit a09cf7
                    | "{" | "}" | SP | HT
Packit a09cf7
Packit a09cf7
  quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
Packit a09cf7
  qdtext            = <any TEXT except <">>
Packit a09cf7
  quoted-pair       = "\" CHAR
Packit a09cf7
Packit a09cf7
  parameter         = attribute "=" value
Packit a09cf7
  attribute         = token
Packit a09cf7
  value             = token | quoted-string
Packit a09cf7
Packit a09cf7
Each I<header> is represented by an anonymous array of key/value
Packit a09cf7
pairs.  The keys will be all be forced to lower case.
Packit a09cf7
The value for a simple token (not part of a parameter) is C<undef>.
Packit a09cf7
Syntactically incorrect headers will not necessarily be parsed as you
Packit a09cf7
would want.
Packit a09cf7
Packit a09cf7
This is easier to describe with some examples:
Packit a09cf7
Packit a09cf7
   split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
Packit a09cf7
   split_header_words('text/html; charset="iso-8859-1"');
Packit a09cf7
   split_header_words('Basic realm="\\"foo\\\\bar\\""');
Packit a09cf7
Packit a09cf7
will return
Packit a09cf7
Packit a09cf7
   [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
Packit a09cf7
   ['text/html' => undef, charset => 'iso-8859-1']
Packit a09cf7
   [basic => undef, realm => "\"foo\\bar\""]
Packit a09cf7
Packit a09cf7
If you don't want the function to convert tokens and attribute keys to
Packit a09cf7
lower case you can call it as C<_split_header_words> instead (with a
Packit a09cf7
leading underscore).
Packit a09cf7
Packit a09cf7
=item join_header_words( @arrays )
Packit a09cf7
Packit a09cf7
This will do the opposite of the conversion done by split_header_words().
Packit a09cf7
It takes a list of anonymous arrays as arguments (or a list of
Packit a09cf7
key/value pairs) and produces a single header value.  Attribute values
Packit a09cf7
are quoted if needed.
Packit a09cf7
Packit a09cf7
Example:
Packit a09cf7
Packit a09cf7
   join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
Packit a09cf7
   join_header_words("text/plain" => undef, charset => "iso-8859/1");
Packit a09cf7
Packit a09cf7
will both return the string:
Packit a09cf7
Packit a09cf7
   text/plain; charset="iso-8859/1"
Packit a09cf7
Packit a09cf7
=back
Packit a09cf7
Packit a09cf7
=head1 AUTHOR
Packit a09cf7
Packit a09cf7
Gisle Aas <gisle@activestate.com>
Packit a09cf7
Packit a09cf7
=head1 COPYRIGHT AND LICENSE
Packit a09cf7
Packit a09cf7
This software is copyright (c) 1994-2017 by Gisle Aas.
Packit a09cf7
Packit a09cf7
This is free software; you can redistribute it and/or modify it under
Packit a09cf7
the same terms as the Perl 5 programming language system itself.
Packit a09cf7
Packit a09cf7
=cut
Packit a09cf7
Packit a09cf7
__END__
Packit a09cf7
Packit a09cf7
Packit a09cf7
#ABSTRACT: Header value parsing utility functions
Packit a09cf7