package HTTP::Headers::Util; use strict; use warnings; our $VERSION = '6.16'; use base 'Exporter'; our @EXPORT_OK=qw(split_header_words _split_header_words join_header_words); sub split_header_words { my @res = &_split_header_words; for my $arr (@res) { for (my $i = @$arr - 2; $i >= 0; $i -= 2) { $arr->[$i] = lc($arr->[$i]); } } return @res; } sub _split_header_words { my(@val) = @_; my @res; for (@val) { my @cur; while (length) { if (s/^\s*(=*[^\s=;,]+)//) { # 'token' or parameter 'attribute' push(@cur, $1); # a quoted value if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) { my $val = $1; $val =~ s/\\(.)/$1/g; push(@cur, $val); # some unquoted value } elsif (s/^\s*=\s*([^;,\s]*)//) { my $val = $1; $val =~ s/\s+$//; push(@cur, $val); # no value, a lone token } else { push(@cur, undef); } } elsif (s/^\s*,//) { push(@res, [@cur]) if @cur; @cur = (); } elsif (s/^\s*;// || s/^\s+//) { # continue } else { die "This should not happen: '$_'"; } } push(@res, \@cur) if @cur; } @res; } sub join_header_words { @_ = ([@_]) if @_ && !ref($_[0]); my @res; for (@_) { my @cur = @$_; my @attr; while (@cur) { my $k = shift @cur; my $v = shift @cur; if (defined $v) { if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) { $v =~ s/([\"\\])/\\$1/g; # escape " and \ $k .= qq(="$v"); } else { # token $k .= "=$v"; } } push(@attr, $k); } push(@res, join("; ", @attr)) if @attr; } join(", ", @res); } 1; =pod =encoding UTF-8 =head1 NAME HTTP::Headers::Util - Header value parsing utility functions =head1 VERSION version 6.16 =head1 SYNOPSIS use HTTP::Headers::Util qw(split_header_words); @values = split_header_words($h->header("Content-Type")); =head1 DESCRIPTION This module provides a few functions that helps parsing and construction of valid HTTP header values. None of the functions are exported by default. The following functions are available: =over 4 =item split_header_words( @header_values ) This function will parse the header values given as argument into a list of anonymous arrays containing key/value pairs. The function knows how to deal with ",", ";" and "=" as well as quoted values after "=". A list of space separated tokens are parsed as if they were separated by ";". If the @header_values passed as argument contains multiple values, then they are treated as if they were a single value separated by comma ",". This means that this function is useful for parsing header fields that follow this syntax (BNF as from the HTTP/1.1 specification, but we relax the requirement for tokens). headers = #header header = (token | parameter) *( [";"] (token | parameter)) token = 1* separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) qdtext = > quoted-pair = "\" CHAR parameter = attribute "=" value attribute = token value = token | quoted-string Each I
is represented by an anonymous array of key/value pairs. The keys will be all be forced to lower case. The value for a simple token (not part of a parameter) is C. Syntactically incorrect headers will not necessarily be parsed as you would want. This is easier to describe with some examples: split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz'); split_header_words('text/html; charset="iso-8859-1"'); split_header_words('Basic realm="\\"foo\\\\bar\\""'); will return [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ] ['text/html' => undef, charset => 'iso-8859-1'] [basic => undef, realm => "\"foo\\bar\""] If you don't want the function to convert tokens and attribute keys to lower case you can call it as C<_split_header_words> instead (with a leading underscore). =item join_header_words( @arrays ) This will do the opposite of the conversion done by split_header_words(). It takes a list of anonymous arrays as arguments (or a list of key/value pairs) and produces a single header value. Attribute values are quoted if needed. Example: join_header_words(["text/plain" => undef, charset => "iso-8859/1"]); join_header_words("text/plain" => undef, charset => "iso-8859/1"); will both return the string: text/plain; charset="iso-8859/1" =back =head1 AUTHOR Gisle Aas =head1 COPYRIGHT AND LICENSE This software is copyright (c) 1994-2017 by Gisle Aas. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut __END__ #ABSTRACT: Header value parsing utility functions