| package HTTP::Headers::Util;
use strict;
use warnings;
our $VERSION = '6.18';
use base 'Exporter';
our @EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
sub split_header_words {
    my @res = &_split_header_words;
    for my $arr (@res) {
	for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
	    $arr->[$i] = lc($arr->[$i]);
	}
    }
    return @res;
}
sub _split_header_words
{
    my(@val) = @_;
    my @res;
    for (@val) {
	my @cur;
	while (length) {
	    if (s/^\s*(=*[^\s=;,]+)//) {  # 'token' or parameter 'attribute'
		push(@cur, $1);
		# a quoted value
		if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
		    my $val = $1;
		    $val =~ s/\\(.)/$1/g;
		    push(@cur, $val);
		# some unquoted value
		}
		elsif (s/^\s*=\s*([^;,\s]*)//) {
		    my $val = $1;
		    $val =~ s/\s+$//;
		    push(@cur, $val);
		# no value, a lone token
		}
		else {
		    push(@cur, undef);
		}
	    }
	    elsif (s/^\s*,//) {
		push(@res, [@cur]) if @cur;
		@cur = ();
	    }
	    elsif (s/^\s*;// || s/^\s+//) {
		# continue
	    }
	    else {
		die "This should not happen: '$_'";
	    }
	}
	push(@res, \@cur) if @cur;
    }
    @res;
}
sub join_header_words
{
    @_ = ([@_]) if @_ && !ref($_[0]);
    my @res;
    for (@_) {
	my @cur = @$_;
	my @attr;
	while (@cur) {
	    my $k = shift @cur;
	    my $v = shift @cur;
	    if (defined $v) {
		if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
		    $v =~ s/([\"\\])/\\$1/g;  # escape " and \
		    $k .= qq(="$v");
		}
		else {
		    # token
		    $k .= "=$v";
		}
	    }
	    push(@attr, $k);
	}
	push(@res, join("; ", @attr)) if @attr;
    }
    join(", ", @res);
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Headers::Util - Header value parsing utility functions
=head1 VERSION
version 6.18
=head1 SYNOPSIS
  use HTTP::Headers::Util qw(split_header_words);
  @values = split_header_words($h->header("Content-Type"));
=head1 DESCRIPTION
This module provides a few functions that helps parsing and
construction of valid HTTP header values.  None of the functions are
exported by default.
The following functions are available:
=over 4
=item split_header_words( @header_values )
This function will parse the header values given as argument into a
list of anonymous arrays containing key/value pairs.  The function
knows how to deal with ",", ";" and "=" as well as quoted values after
"=".  A list of space separated tokens are parsed as if they were
separated by ";".
If the @header_values passed as argument contains multiple values,
then they are treated as if they were a single value separated by
comma ",".
This means that this function is useful for parsing header fields that
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
the requirement for tokens).
  headers           = #header
  header            = (token | parameter) *( [";"] (token | parameter))
  token             = 1*<any CHAR except CTLs or separators>
  separators        = "(" | ")" | "<" | ">" | "@"
                    | "," | ";" | ":" | "\" | <">
                    | "/" | "[" | "]" | "?" | "="
                    | "{" | "}" | SP | HT
  quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
  qdtext            = <any TEXT except <">>
  quoted-pair       = "\" CHAR
  parameter         = attribute "=" value
  attribute         = token
  value             = token | quoted-string
Each I<header> is represented by an anonymous array of key/value
pairs.  The keys will be all be forced to lower case.
The value for a simple token (not part of a parameter) is C<undef>.
Syntactically incorrect headers will not necessarily be parsed as you
would want.
This is easier to describe with some examples:
   split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
   split_header_words('text/html; charset="iso-8859-1"');
   split_header_words('Basic realm="\\"foo\\\\bar\\""');
will return
   [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
   ['text/html' => undef, charset => 'iso-8859-1']
   [basic => undef, realm => "\"foo\\bar\""]
If you don't want the function to convert tokens and attribute keys to
lower case you can call it as C<_split_header_words> instead (with a
leading underscore).
=item join_header_words( @arrays )
This will do the opposite of the conversion done by split_header_words().
It takes a list of anonymous arrays as arguments (or a list of
key/value pairs) and produces a single header value.  Attribute values
are quoted if needed.
Example:
   join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
   join_header_words("text/plain" => undef, charset => "iso-8859/1");
will both return the string:
   text/plain; charset="iso-8859/1"
=back
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994-2017 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Header value parsing utility functions
 |