1 files changed, 197 insertions, 0 deletions
diff --git a/lib/HTTP/Headers/Util.pm b/lib/HTTP/Headers/Util.pm
new file mode 100644
index 0000000..6e90eaf
--- /dev/null
+++ b/lib/HTTP/Headers/Util.pm
@@ -0,0 +1,197 @@
+package HTTP::Headers::Util;
+
+use strict;
+use warnings;
+
+our $VERSION = "6.10";
+
+use base 'Exporter';
+
+our @EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
+
+
+sub split_header_words {
+    my @res = &_split_header_words;
+    for my $arr (@res) {
+	for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
+	    $arr->[$i] = lc($arr->[$i]);
+	}
+    }
+    return @res;
+}
+
+sub _split_header_words
+{
+    my(@val) = @_;
+    my @res;
+    for (@val) {
+	my @cur;
+	while (length) {
+	    if (s/^\s*(=*[^\s=;,]+)//) {  # 'token' or parameter 'attribute'
+		push(@cur, $1);
+		# a quoted value
+		if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
+		    my $val = $1;
+		    $val =~ s/\\(.)/$1/g;
+		    push(@cur, $val);
+		# some unquoted value
+		}
+		elsif (s/^\s*=\s*([^;,\s]*)//) {
+		    my $val = $1;
+		    $val =~ s/\s+$//;
+		    push(@cur, $val);
+		# no value, a lone token
+		}
+		else {
+		    push(@cur, undef);
+		}
+	    }
+	    elsif (s/^\s*,//) {
+		push(@res, [@cur]) if @cur;
+		@cur = ();
+	    }
+	    elsif (s/^\s*;// || s/^\s+//) {
+		# continue
+	    }
+	    else {
+		die "This should not happen: '$_'";
+	    }
+	}
+	push(@res, \@cur) if @cur;
+    }
+    @res;
+}
+
+
+sub join_header_words
+{
+    @_ = ([@_]) if @_ && !ref($_[0]);
+    my @res;
+    for (@_) {
+	my @cur = @$_;
+	my @attr;
+	while (@cur) {
+	    my $k = shift @cur;
+	    my $v = shift @cur;
+	    if (defined $v) {
+		if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
+		    $v =~ s/([\"\\])/\\$1/g;  # escape " and \
+		    $k .= qq(="$v");
+		}
+		else {
+		    # token
+		    $k .= "=$v";
+		}
+	    }
+	    push(@attr, $k);
+	}
+	push(@res, join("; ", @attr)) if @attr;
+    }
+    join(", ", @res);
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Headers::Util - Header value parsing utility functions
+
+=head1 SYNOPSIS
+
+  use HTTP::Headers::Util qw(split_header_words);
+  @values = split_header_words($h->header("Content-Type"));
+
+=head1 DESCRIPTION
+
+This module provides a few functions that helps parsing and
+construction of valid HTTP header values.  None of the functions are
+exported by default.
+
+The following functions are available:
+
+=over 4
+
+
+=item split_header_words( @header_values )
+
+This function will parse the header values given as argument into a
+list of anonymous arrays containing key/value pairs.  The function
+knows how to deal with ",", ";" and "=" as well as quoted values after
+"=".  A list of space separated tokens are parsed as if they were
+separated by ";".
+
+If the @header_values passed as argument contains multiple values,
+then they are treated as if they were a single value separated by
+comma ",".
+
+This means that this function is useful for parsing header fields that
+follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+the requirement for tokens).
+
+  headers           = #header
+  header            = (token | parameter) *( [";"] (token | parameter))
+
+  token             = 1*<any CHAR except CTLs or separators>
+  separators        = "(" | ")" | "<" | ">" | "@"
+                    | "," | ";" | ":" | "\" | <">
+                    | "/" | "[" | "]" | "?" | "="
+                    | "{" | "}" | SP | HT
+
+  quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
+  qdtext            = <any TEXT except <">>
+  quoted-pair       = "\" CHAR
+
+  parameter         = attribute "=" value
+  attribute         = token
+  value             = token | quoted-string
+
+Each I<header> is represented by an anonymous array of key/value
+pairs.  The keys will be all be forced to lower case.
+The value for a simple token (not part of a parameter) is C<undef>.
+Syntactically incorrect headers will not necessarily be parsed as you
+would want.
+
+This is easier to describe with some examples:
+
+   split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
+   split_header_words('text/html; charset="iso-8859-1"');
+   split_header_words('Basic realm="\\"foo\\\\bar\\""');
+
+will return
+
+   [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
+   ['text/html' => undef, charset => 'iso-8859-1']
+   [basic => undef, realm => "\"foo\\bar\""]
+
+If you don't want the function to convert tokens and attribute keys to
+lower case you can call it as C<_split_header_words> instead (with a
+leading underscore).
+
+=item join_header_words( @arrays )
+
+This will do the opposite of the conversion done by split_header_words().
+It takes a list of anonymous arrays as arguments (or a list of
+key/value pairs) and produces a single header value.  Attribute values
+are quoted if needed.
+
+Example:
+
+   join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
+   join_header_words("text/plain" => undef, charset => "iso-8859/1");
+
+will both return the string:
+
+   text/plain; charset="iso-8859/1"
+
+=back
+
+=head1 COPYRIGHT
+
+Copyright 1997-1998, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+