diff options
Diffstat (limited to 'lib/HTTP')
-rw-r--r-- | lib/HTTP/Config.pm | 438 | ||||
-rw-r--r-- | lib/HTTP/Headers.pm | 871 | ||||
-rw-r--r-- | lib/HTTP/Headers/Auth.pm | 100 | ||||
-rw-r--r-- | lib/HTTP/Headers/ETag.pm | 96 | ||||
-rw-r--r-- | lib/HTTP/Headers/Util.pm | 197 | ||||
-rw-r--r-- | lib/HTTP/Message.pm | 1114 | ||||
-rw-r--r-- | lib/HTTP/Request.pm | 241 | ||||
-rw-r--r-- | lib/HTTP/Request/Common.pm | 521 | ||||
-rw-r--r-- | lib/HTTP/Response.pm | 644 | ||||
-rw-r--r-- | lib/HTTP/Status.pm | 269 |
10 files changed, 4491 insertions, 0 deletions
diff --git a/lib/HTTP/Config.pm b/lib/HTTP/Config.pm new file mode 100644 index 0000000..e43775d --- /dev/null +++ b/lib/HTTP/Config.pm @@ -0,0 +1,438 @@ +package HTTP::Config; + +use strict; +use warnings; + +use URI; + +our $VERSION = "6.10"; + +sub new { + my $class = shift; + return bless [], $class; +} + +sub entries { + my $self = shift; + @$self; +} + +sub empty { + my $self = shift; + not @$self; +} + +sub add { + if (@_ == 2) { + my $self = shift; + push(@$self, shift); + return; + } + my($self, %spec) = @_; + push(@$self, \%spec); + return; +} + +sub find2 { + my($self, %spec) = @_; + my @found; + my @rest; + ITEM: + for my $item (@$self) { + for my $k (keys %spec) { + no warnings 'uninitialized'; + if (!exists $item->{$k} || $spec{$k} ne $item->{$k}) { + push(@rest, $item); + next ITEM; + } + } + push(@found, $item); + } + return \@found unless wantarray; + return \@found, \@rest; +} + +sub find { + my $self = shift; + my $f = $self->find2(@_); + return @$f if wantarray; + return $f->[0]; +} + +sub remove { + my($self, %spec) = @_; + my($removed, $rest) = $self->find2(%spec); + @$self = @$rest if @$removed; + return @$removed; +} + +my %MATCH = ( + m_scheme => sub { + my($v, $uri) = @_; + return $uri->_scheme eq $v; # URI known to be canonical + }, + m_secure => sub { + my($v, $uri) = @_; + my $secure = $uri->can("secure") ? $uri->secure : $uri->_scheme eq "https"; + return $secure == !!$v; + }, + m_host_port => sub { + my($v, $uri) = @_; + return unless $uri->can("host_port"); + return $uri->host_port eq $v, 7; + }, + m_host => sub { + my($v, $uri) = @_; + return unless $uri->can("host"); + return $uri->host eq $v, 6; + }, + m_port => sub { + my($v, $uri) = @_; + return unless $uri->can("port"); + return $uri->port eq $v; + }, + m_domain => sub { + my($v, $uri) = @_; + return unless $uri->can("host"); + my $h = $uri->host; + $h = "$h.local" unless $h =~ /\./; + $v = ".$v" unless $v =~ /^\./; + return length($v), 5 if substr($h, -length($v)) eq $v; + return 0; + }, + m_path => sub { + my($v, $uri) = @_; + return unless $uri->can("path"); + return $uri->path eq $v, 4; + }, + m_path_prefix => sub { + my($v, $uri) = @_; + return unless $uri->can("path"); + my $path = $uri->path; + my $len = length($v); + return $len, 3 if $path eq $v; + return 0 if length($path) <= $len; + $v .= "/" unless $v =~ m,/\z,,; + return $len, 3 if substr($path, 0, length($v)) eq $v; + return 0; + }, + m_path_match => sub { + my($v, $uri) = @_; + return unless $uri->can("path"); + return $uri->path =~ $v; + }, + m_uri__ => sub { + my($v, $k, $uri) = @_; + return unless $uri->can($k); + return 1 unless defined $v; + return $uri->$k eq $v; + }, + m_method => sub { + my($v, $uri, $request) = @_; + return $request && $request->method eq $v; + }, + m_proxy => sub { + my($v, $uri, $request) = @_; + return $request && ($request->{proxy} || "") eq $v; + }, + m_code => sub { + my($v, $uri, $request, $response) = @_; + $v =~ s/xx\z//; + return unless $response; + return length($v), 2 if substr($response->code, 0, length($v)) eq $v; + }, + m_media_type => sub { # for request too?? + my($v, $uri, $request, $response) = @_; + return unless $response; + return 1, 1 if $v eq "*/*"; + my $ct = $response->content_type; + return 2, 1 if $v =~ s,/\*\z,, && $ct =~ m,^\Q$v\E/,; + return 3, 1 if $v eq "html" && $response->content_is_html; + return 4, 1 if $v eq "xhtml" && $response->content_is_xhtml; + return 10, 1 if $v eq $ct; + return 0; + }, + m_header__ => sub { + my($v, $k, $uri, $request, $response) = @_; + return unless $request; + return 1 if $request->header($k) eq $v; + return 1 if $response && $response->header($k) eq $v; + return 0; + }, + m_response_attr__ => sub { + my($v, $k, $uri, $request, $response) = @_; + return unless $response; + return 1 if !defined($v) && exists $response->{$k}; + return 0 unless exists $response->{$k}; + return 1 if $response->{$k} eq $v; + return 0; + }, +); + +sub matching { + my $self = shift; + if (@_ == 1) { + if ($_[0]->can("request")) { + unshift(@_, $_[0]->request); + unshift(@_, undef) unless defined $_[0]; + } + unshift(@_, $_[0]->uri_canonical) if $_[0] && $_[0]->can("uri_canonical"); + } + my($uri, $request, $response) = @_; + $uri = URI->new($uri) unless ref($uri); + + my @m; + ITEM: + for my $item (@$self) { + my $order; + for my $ikey (keys %$item) { + my $mkey = $ikey; + my $k; + $k = $1 if $mkey =~ s/__(.*)/__/; + if (my $m = $MATCH{$mkey}) { + #print "$ikey $mkey\n"; + my($c, $o); + my @arg = ( + defined($k) ? $k : (), + $uri, $request, $response + ); + my $v = $item->{$ikey}; + $v = [$v] unless ref($v) eq "ARRAY"; + for (@$v) { + ($c, $o) = $m->($_, @arg); + #print " - $_ ==> $c $o\n"; + last if $c; + } + next ITEM unless $c; + $order->[$o || 0] += $c; + } + } + $order->[7] ||= 0; + $item->{_order} = join(".", reverse map sprintf("%03d", $_ || 0), @$order); + push(@m, $item); + } + @m = sort { $b->{_order} cmp $a->{_order} } @m; + delete $_->{_order} for @m; + return @m if wantarray; + return $m[0]; +} + +sub add_item { + my $self = shift; + my $item = shift; + return $self->add(item => $item, @_); +} + +sub remove_items { + my $self = shift; + return map $_->{item}, $self->remove(@_); +} + +sub matching_items { + my $self = shift; + return map $_->{item}, $self->matching(@_); +} + +1; + +__END__ + +=head1 NAME + +HTTP::Config - Configuration for request and response objects + +=head1 SYNOPSIS + + use HTTP::Config; + my $c = HTTP::Config->new; + $c->add(m_domain => ".example.com", m_scheme => "http", verbose => 1); + + use HTTP::Request; + my $request = HTTP::Request->new(GET => "http://www.example.com"); + + if (my @m = $c->matching($request)) { + print "Yadayada\n" if $m[0]->{verbose}; + } + +=head1 DESCRIPTION + +An C<HTTP::Config> object is a list of entries that +can be matched against request or request/response pairs. Its +purpose is to hold configuration data that can be looked up given a +request or response object. + +Each configuration entry is a hash. Some keys specify matching to +occur against attributes of request/response objects. Other keys can +be used to hold user data. + +The following methods are provided: + +=over 4 + +=item $conf = HTTP::Config->new + +Constructs a new empty C<HTTP::Config> object and returns it. + +=item $conf->entries + +Returns the list of entries in the configuration object. +In scalar context returns the number of entries. + +=item $conf->empty + +Return true if there are no entries in the configuration object. +This is just a shorthand for C<< not $conf->entries >>. + +=item $conf->add( %matchspec, %other ) + +=item $conf->add( \%entry ) + +Adds a new entry to the configuration. +You can either pass separate key/value pairs or a hash reference. + +=item $conf->remove( %spec ) + +Removes (and returns) the entries that have matches for all the key/value pairs in %spec. +If %spec is empty this will match all entries; so it will empty the configuation object. + +=item $conf->matching( $uri, $request, $response ) + +=item $conf->matching( $uri ) + +=item $conf->matching( $request ) + +=item $conf->matching( $response ) + +Returns the entries that match the given $uri, $request and $response triplet. + +If called with a single $request object then the $uri is obtained by calling its 'uri_canonical' method. +If called with a single $response object, then the request object is obtained by calling its 'request' method; +and then the $uri is obtained as if a single $request was provided. + +The entries are returned with the most specific matches first. +In scalar context returns the most specific match or C<undef> in none match. + +=item $conf->add_item( $item, %matchspec ) + +=item $conf->remove_items( %spec ) + +=item $conf->matching_items( $uri, $request, $response ) + +Wrappers that hides the entries themselves. + +=back + +=head2 Matching + +The following keys on a configuration entry specify matching. For all +of these you can provide an array of values instead of a single value. +The entry matches if at least one of the values in the array matches. + +Entries that require match against a response object attribute will never match +unless a response object was provided. + +=over + +=item m_scheme => $scheme + +Matches if the URI uses the specified scheme; e.g. "http". + +=item m_secure => $bool + +If $bool is TRUE; matches if the URI uses a secure scheme. If $bool +is FALSE; matches if the URI does not use a secure scheme. An example +of a secure scheme is "https". + +=item m_host_port => "$hostname:$port" + +Matches if the URI's host_port method return the specified value. + +=item m_host => $hostname + +Matches if the URI's host method returns the specified value. + +=item m_port => $port + +Matches if the URI's port method returns the specified value. + +=item m_domain => ".$domain" + +Matches if the URI's host method return a value that within the given +domain. The hostname "www.example.com" will for instance match the +domain ".com". + +=item m_path => $path + +Matches if the URI's path method returns the specified value. + +=item m_path_prefix => $path + +Matches if the URI's path is the specified path or has the specified +path as prefix. + +=item m_path_match => $Regexp + +Matches if the regular expression matches the URI's path. Eg. qr/\.html$/. + +=item m_method => $method + +Matches if the request method matches the specified value. Eg. "GET" or "POST". + +=item m_code => $digit + +=item m_code => $status_code + +Matches if the response status code matches. If a single digit is +specified; matches for all response status codes beginning with that digit. + +=item m_proxy => $url + +Matches if the request is to be sent to the given Proxy server. + +=item m_media_type => "*/*" + +=item m_media_type => "text/*" + +=item m_media_type => "html" + +=item m_media_type => "xhtml" + +=item m_media_type => "text/html" + +Matches if the response media type matches. + +With a value of "html" matches if $response->content_is_html returns TRUE. +With a value of "xhtml" matches if $response->content_is_xhtml returns TRUE. + +=item m_uri__I<$method> => undef + +Matches if the URI object provides the method. + +=item m_uri__I<$method> => $string + +Matches if the URI's $method method returns the given value. + +=item m_header__I<$field> => $string + +Matches if either the request or the response have a header $field with the given value. + +=item m_response_attr__I<$key> => undef + +=item m_response_attr__I<$key> => $string + +Matches if the response object has that key, or the entry has the given value. + +=back + +=head1 SEE ALSO + +L<URI>, L<HTTP::Request>, L<HTTP::Response> + +=head1 COPYRIGHT + +Copyright 2008, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut diff --git a/lib/HTTP/Headers.pm b/lib/HTTP/Headers.pm new file mode 100644 index 0000000..a53b232 --- /dev/null +++ b/lib/HTTP/Headers.pm @@ -0,0 +1,871 @@ +package HTTP::Headers; + +use strict; +use warnings; + +use Carp (); + +our $VERSION = "6.10"; + +# The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used +# as a replacement for '-' in header field names. +our $TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE; + +# "Good Practice" order of HTTP message headers: +# - General-Headers +# - Request-Headers +# - Response-Headers +# - Entity-Headers + +my @general_headers = qw( + Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade + Via Warning +); + +my @request_headers = qw( + Accept Accept-Charset Accept-Encoding Accept-Language + Authorization Expect From Host + If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since + Max-Forwards Proxy-Authorization Range Referer TE User-Agent +); + +my @response_headers = qw( + Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server + Vary WWW-Authenticate +); + +my @entity_headers = qw( + Allow Content-Encoding Content-Language Content-Length Content-Location + Content-MD5 Content-Range Content-Type Expires Last-Modified +); + +my %entity_header = map { lc($_) => 1 } @entity_headers; + +my @header_order = ( + @general_headers, + @request_headers, + @response_headers, + @entity_headers, +); + +# Make alternative representations of @header_order. This is used +# for sorting and case matching. +my %header_order; +my %standard_case; + +{ + my $i = 0; + for (@header_order) { + my $lc = lc $_; + $header_order{$lc} = ++$i; + $standard_case{$lc} = $_; + } +} + + + +sub new +{ + my($class) = shift; + my $self = bless {}, $class; + $self->header(@_) if @_; # set up initial headers + $self; +} + + +sub header +{ + my $self = shift; + Carp::croak('Usage: $h->header($field, ...)') unless @_; + my(@old); + my %seen; + while (@_) { + my $field = shift; + my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET'; + @old = $self->_header($field, shift, $op); + } + return @old if wantarray; + return $old[0] if @old <= 1; + join(", ", @old); +} + +sub clear +{ + my $self = shift; + %$self = (); +} + + +sub push_header +{ + my $self = shift; + return $self->_header(@_, 'PUSH_H') if @_ == 2; + while (@_) { + $self->_header(splice(@_, 0, 2), 'PUSH_H'); + } +} + + +sub init_header +{ + Carp::croak('Usage: $h->init_header($field, $val)') if @_ != 3; + shift->_header(@_, 'INIT'); +} + + +sub remove_header +{ + my($self, @fields) = @_; + my $field; + my @values; + foreach $field (@fields) { + $field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE; + my $v = delete $self->{lc $field}; + push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v; + } + return @values; +} + +sub remove_content_headers +{ + my $self = shift; + unless (defined(wantarray)) { + # fast branch that does not create return object + delete @$self{grep $entity_header{$_} || /^content-/, keys %$self}; + return; + } + + my $c = ref($self)->new; + for my $f (grep $entity_header{$_} || /^content-/, keys %$self) { + $c->{$f} = delete $self->{$f}; + } + if (exists $self->{'::std_case'}) { + $c->{'::std_case'} = $self->{'::std_case'}; + } + $c; +} + + +sub _header +{ + my($self, $field, $val, $op) = @_; + + Carp::croak("Illegal field name '$field'") + if rindex($field, ':') > 1 || !length($field); + + unless ($field =~ /^:/) { + $field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE; + my $old = $field; + $field = lc $field; + unless($standard_case{$field} || $self->{'::std_case'}{$field}) { + # generate a %std_case entry for this field + $old =~ s/\b(\w)/\u$1/g; + $self->{'::std_case'}{$field} = $old; + } + } + + $op ||= defined($val) ? 'SET' : 'GET'; + if ($op eq 'PUSH_H') { + # Like PUSH but where we don't care about the return value + if (exists $self->{$field}) { + my $h = $self->{$field}; + if (ref($h) eq 'ARRAY') { + push(@$h, ref($val) eq "ARRAY" ? @$val : $val); + } + else { + $self->{$field} = [$h, ref($val) eq "ARRAY" ? @$val : $val] + } + return; + } + $self->{$field} = $val; + return; + } + + my $h = $self->{$field}; + my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ()); + + unless ($op eq 'GET' || ($op eq 'INIT' && @old)) { + if (defined($val)) { + my @new = ($op eq 'PUSH') ? @old : (); + if (ref($val) ne 'ARRAY') { + push(@new, $val); + } + else { + push(@new, @$val); + } + $self->{$field} = @new > 1 ? \@new : $new[0]; + } + elsif ($op ne 'PUSH') { + delete $self->{$field}; + } + } + @old; +} + + +sub _sorted_field_names +{ + my $self = shift; + return [ sort { + ($header_order{$a} || 999) <=> ($header_order{$b} || 999) || + $a cmp $b + } grep !/^::/, keys %$self ]; +} + + +sub header_field_names { + my $self = shift; + return map $standard_case{$_} || $self->{'::std_case'}{$_} || $_, @{ $self->_sorted_field_names }, + if wantarray; + return grep !/^::/, keys %$self; +} + + +sub scan +{ + my($self, $sub) = @_; + my $key; + for $key (@{ $self->_sorted_field_names }) { + my $vals = $self->{$key}; + if (ref($vals) eq 'ARRAY') { + my $val; + for $val (@$vals) { + $sub->($standard_case{$key} || $self->{'::std_case'}{$key} || $key, $val); + } + } + else { + $sub->($standard_case{$key} || $self->{'::std_case'}{$key} || $key, $vals); + } + } +} + +sub flatten { + my($self)=@_; + + ( + map { + my $k = $_; + map { + ( $k => $_ ) + } $self->header($_); + } $self->header_field_names + ); +} + +sub as_string +{ + my($self, $endl) = @_; + $endl = "\n" unless defined $endl; + + my @result = (); + for my $key (@{ $self->_sorted_field_names }) { + next if index($key, '_') == 0; + my $vals = $self->{$key}; + if ( ref($vals) eq 'ARRAY' ) { + for my $val (@$vals) { + my $field = $standard_case{$key} || $self->{'::std_case'}{$key} || $key; + $field =~ s/^://; + if ( index($val, "\n") >= 0 ) { + $val = _process_newline($val, $endl); + } + push @result, $field . ': ' . $val; + } + } + else { + my $field = $standard_case{$key} || $self->{'::std_case'}{$key} || $key; + $field =~ s/^://; + if ( index($vals, "\n") >= 0 ) { + $vals = _process_newline($vals, $endl); + } + push @result, $field . ': ' . $vals; + } + } + + join($endl, @result, ''); +} + +sub _process_newline { + local $_ = shift; + my $endl = shift; + # must handle header values with embedded newlines with care + s/\s+$//; # trailing newlines and space must go + s/\n(\x0d?\n)+/\n/g; # no empty lines + s/\n([^\040\t])/\n $1/g; # initial space for continuation + s/\n/$endl/g; # substitute with requested line ending + $_; +} + + + +if (eval { require Storable; 1 }) { + *clone = \&Storable::dclone; +} else { + *clone = sub { + my $self = shift; + my $clone = HTTP::Headers->new; + $self->scan(sub { $clone->push_header(@_);} ); + $clone; + }; +} + + +sub _date_header +{ + require HTTP::Date; + my($self, $header, $time) = @_; + my($old) = $self->_header($header); + if (defined $time) { + $self->_header($header, HTTP::Date::time2str($time)); + } + $old =~ s/;.*// if defined($old); + HTTP::Date::str2time($old); +} + + +sub date { shift->_date_header('Date', @_); } +sub expires { shift->_date_header('Expires', @_); } +sub if_modified_since { shift->_date_header('If-Modified-Since', @_); } +sub if_unmodified_since { shift->_date_header('If-Unmodified-Since', @_); } +sub last_modified { shift->_date_header('Last-Modified', @_); } + +# This is used as a private LWP extension. The Client-Date header is +# added as a timestamp to a response when it has been received. +sub client_date { shift->_date_header('Client-Date', @_); } + +# The retry_after field is dual format (can also be a expressed as +# number of seconds from now), so we don't provide an easy way to +# access it until we have know how both these interfaces can be +# addressed. One possibility is to return a negative value for +# relative seconds and a positive value for epoch based time values. +#sub retry_after { shift->_date_header('Retry-After', @_); } + +sub content_type { + my $self = shift; + my $ct = $self->{'content-type'}; + $self->{'content-type'} = shift if @_; + $ct = $ct->[0] if ref($ct) eq 'ARRAY'; + return '' unless defined($ct) && length($ct); + my @ct = split(/;\s*/, $ct, 2); + for ($ct[0]) { + s/\s+//g; + $_ = lc($_); + } + wantarray ? @ct : $ct[0]; +} + +sub content_type_charset { + my $self = shift; + require HTTP::Headers::Util; + my $h = $self->{'content-type'}; + $h = $h->[0] if ref($h); + $h = "" unless defined $h; + my @v = HTTP::Headers::Util::split_header_words($h); + if (@v) { + my($ct, undef, %ct_param) = @{$v[0]}; + my $charset = $ct_param{charset}; + if ($ct) { + $ct = lc($ct); + $ct =~ s/\s+//; + } + if ($charset) { + $charset = uc($charset); + $charset =~ s/^\s+//; $charset =~ s/\s+\z//; + undef($charset) if $charset eq ""; + } + return $ct, $charset if wantarray; + return $charset; + } + return undef, undef if wantarray; + return undef; +} + +sub content_is_text { + my $self = shift; + return $self->content_type =~ m,^text/,; +} + +sub content_is_html { + my $self = shift; + return $self->content_type eq 'text/html' || $self->content_is_xhtml; +} + +sub content_is_xhtml { + my $ct = shift->content_type; + return $ct eq "application/xhtml+xml" || + $ct eq "application/vnd.wap.xhtml+xml"; +} + +sub content_is_xml { + my $ct = shift->content_type; + return 1 if $ct eq "text/xml"; + return 1 if $ct eq "application/xml"; + return 1 if $ct =~ /\+xml$/; + return 0; +} + +sub referer { + my $self = shift; + if (@_ && $_[0] =~ /#/) { + # Strip fragment per RFC 2616, section 14.36. + my $uri = shift; + if (ref($uri)) { + $uri = $uri->clone; + $uri->fragment(undef); + } + else { + $uri =~ s/\#.*//; + } + unshift @_, $uri; + } + ($self->_header('Referer', @_))[0]; +} +*referrer = \&referer; # on tchrist's request + +sub title { (shift->_header('Title', @_))[0] } +sub content_encoding { (shift->_header('Content-Encoding', @_))[0] } +sub content_language { (shift->_header('Content-Language', @_))[0] } +sub content_length { (shift->_header('Content-Length', @_))[0] } + +sub user_agent { (shift->_header('User-Agent', @_))[0] } +sub server { (shift->_header('Server', @_))[0] } + +sub from { (shift->_header('From', @_))[0] } +sub warning { (shift->_header('Warning', @_))[0] } + +sub www_authenticate { (shift->_header('WWW-Authenticate', @_))[0] } +sub authorization { (shift->_header('Authorization', @_))[0] } + +sub proxy_authenticate { (shift->_header('Proxy-Authenticate', @_))[0] } +sub proxy_authorization { (shift->_header('Proxy-Authorization', @_))[0] } + +sub authorization_basic { shift->_basic_auth("Authorization", @_) } +sub proxy_authorization_basic { shift->_basic_auth("Proxy-Authorization", @_) } + +sub _basic_auth { + require MIME::Base64; + my($self, $h, $user, $passwd) = @_; + my($old) = $self->_header($h); + if (defined $user) { + Carp::croak("Basic authorization user name can't contain ':'") + if $user =~ /:/; + $passwd = '' unless defined $passwd; + $self->_header($h => 'Basic ' . + MIME::Base64::encode("$user:$passwd", '')); + } + if (defined $old && $old =~ s/^\s*Basic\s+//) { + my $val = MIME::Base64::decode($old); + return $val unless wantarray; + return split(/:/, $val, 2); + } + return; +} + + +1; + +__END__ + +=head1 NAME + +HTTP::Headers - Class encapsulating HTTP Message headers + +=head1 SYNOPSIS + + require HTTP::Headers; + $h = HTTP::Headers->new; + + $h->header('Content-Type' => 'text/plain'); # set + $ct = $h->header('Content-Type'); # get + $h->remove_header('Content-Type'); # delete + +=head1 DESCRIPTION + +The C<HTTP::Headers> class encapsulates HTTP-style message headers. +The headers consist of attribute-value pairs also called fields, which +may be repeated, and which are printed in a particular order. The +field names are cases insensitive. + +Instances of this class are usually created as member variables of the +C<HTTP::Request> and C<HTTP::Response> classes, internal to the +library. + +The following methods are available: + +=over 4 + +=item $h = HTTP::Headers->new + +Constructs a new C<HTTP::Headers> object. You might pass some initial +attribute-value pairs as parameters to the constructor. I<E.g.>: + + $h = HTTP::Headers->new( + Date => 'Thu, 03 Feb 1994 00:00:00 GMT', + Content_Type => 'text/html; version=3.2', + Content_Base => 'http://www.perl.org/'); + +The constructor arguments are passed to the C<header> method which is +described below. + +=item $h->clone + +Returns a copy of this C<HTTP::Headers> object. + +=item $h->header( $field ) + +=item $h->header( $field => $value ) + +=item $h->header( $f1 => $v1, $f2 => $v2, ... ) + +Get or set the value of one or more header fields. The header field +name ($field) is not case sensitive. To make the life easier for perl +users who wants to avoid quoting before the => operator, you can use +'_' as a replacement for '-' in header names. + +The header() method accepts multiple ($field => $value) pairs, which +means that you can update several fields with a single invocation. + +The $value argument may be a plain string or a reference to an array +of strings for a multi-valued field. If the $value is provided as +C<undef> then the field is removed. If the $value is not given, then +that header field will remain unchanged. + +The old value (or values) of the last of the header fields is returned. +If no such field exists C<undef> will be returned. + +A multi-valued field will be returned as separate values in list +context and will be concatenated with ", " as separator in scalar +context. The HTTP spec (RFC 2616) promises that joining multiple +values in this way will not change the semantic of a header field, but +in practice there are cases like old-style Netscape cookies (see +L<HTTP::Cookies>) where "," is used as part of the syntax of a single +field value. + +Examples: + + $header->header(MIME_Version => '1.0', + User_Agent => 'My-Web-Client/0.01'); + $header->header(Accept => "text/html, text/plain, image/*"); + $header->header(Accept => [qw(text/html text/plain image/*)]); + @accepts = $header->header('Accept'); # get multiple values + $accepts = $header->header('Accept'); # get values as a single string + +=item $h->push_header( $field => $value ) + +=item $h->push_header( $f1 => $v1, $f2 => $v2, ... ) + +Add a new field value for the specified header field. Previous values +for the same field are retained. + +As for the header() method, the field name ($field) is not case +sensitive and '_' can be used as a replacement for '-'. + +The $value argument may be a scalar or a reference to a list of +scalars. + + $header->push_header(Accept => 'image/jpeg'); + $header->push_header(Accept => [map "image/$_", qw(gif png tiff)]); + +=item $h->init_header( $field => $value ) + +Set the specified header to the given value, but only if no previous +value for that field is set. + +The header field name ($field) is not case sensitive and '_' +can be used as a replacement for '-'. + +The $value argument may be a scalar or a reference to a list of +scalars. + +=item $h->remove_header( $field, ... ) + +This function removes the header fields with the specified names. + +The header field names ($field) are not case sensitive and '_' +can be used as a replacement for '-'. + +The return value is the values of the fields removed. In scalar +context the number of fields removed is returned. + +Note that if you pass in multiple field names then it is generally not +possible to tell which of the returned values belonged to which field. + +=item $h->remove_content_headers + +This will remove all the header fields used to describe the content of +a message. All header field names prefixed with C<Content-> fall +into this category, as well as C<Allow>, C<Expires> and +C<Last-Modified>. RFC 2616 denotes these fields as I<Entity Header +Fields>. + +The return value is a new C<HTTP::Headers> object that contains the +removed headers only. + +=item $h->clear + +This will remove all header fields. + +=item $h->header_field_names + +Returns the list of distinct names for the fields present in the +header. The field names have case as suggested by HTTP spec, and the +names are returned in the recommended "Good Practice" order. + +In scalar context return the number of distinct field names. + +=item $h->scan( \&process_header_field ) + +Apply a subroutine to each header field in turn. The callback routine +is called with two parameters; the name of the field and a single +value (a string). If a header field is multi-valued, then the +routine is called once for each value. The field name passed to the +callback routine has case as suggested by HTTP spec, and the headers +will be visited in the recommended "Good Practice" order. + +Any return values of the callback routine are ignored. The loop can +be broken by raising an exception (C<die>), but the caller of scan() +would have to trap the exception itself. + +=item $h->flatten() + +Returns the list of pairs of keys and values. + +=item $h->as_string + +=item $h->as_string( $eol ) + +Return the header fields as a formatted MIME header. Since it +internally uses the C<scan> method to build the string, the result +will use case as suggested by HTTP spec, and it will follow +recommended "Good Practice" of ordering the header fields. Long header +values are not folded. + +The optional $eol parameter specifies the line ending sequence to +use. The default is "\n". Embedded "\n" characters in header field +values will be substituted with this line ending sequence. + +=back + +=head1 CONVENIENCE METHODS + +The most frequently used headers can also be accessed through the +following convenience methods. Most of these methods can both be used to read +and to set the value of a header. The header value is set if you pass +an argument to the method. The old header value is always returned. +If the given header did not exist then C<undef> is returned. + +Methods that deal with dates/times always convert their value to system +time (seconds since Jan 1, 1970) and they also expect this kind of +value when the header value is set. + +=over 4 + +=item $h->date + +This header represents the date and time at which the message was +originated. I<E.g.>: + + $h->date(time); # set current date + +=item $h->expires + +This header gives the date and time after which the entity should be +considered stale. + +=item $h->if_modified_since + +=item $h->if_unmodified_since + +These header fields are used to make a request conditional. If the requested +resource has (or has not) been modified since the time specified in this field, +then the server will return a C<304 Not Modified> response instead of +the document itself. + +=item $h->last_modified + +This header indicates the date and time at which the resource was last +modified. I<E.g.>: + + # check if document is more than 1 hour old + if (my $last_mod = $h->last_modified) { + if ($last_mod < time - 60*60) { + ... + } + } + +=item $h->content_type + +The Content-Type header field indicates the media type of the message +content. I<E.g.>: + + $h->content_type('text/html'); + +The value returned will be converted to lower case, and potential +parameters will be chopped off and returned as a separate value if in +an array context. If there is no such header field, then the empty +string is returned. This makes it safe to do the following: + + if ($h->content_type eq 'text/html') { + # we enter this place even if the real header value happens to + # be 'TEXT/HTML; version=3.0' + ... + } + +=item $h->content_type_charset + +Returns the upper-cased charset specified in the Content-Type header. In list +context return the lower-cased bare content type followed by the upper-cased +charset. Both values will be C<undef> if not specified in the header. + +=item $h->content_is_text + +Returns TRUE if the Content-Type header field indicate that the +content is textual. + +=item $h->content_is_html + +Returns TRUE if the Content-Type header field indicate that the +content is some kind of HTML (including XHTML). This method can't be +used to set Content-Type. + +=item $h->content_is_xhtml + +Returns TRUE if the Content-Type header field indicate that the +content is XHTML. This method can't be used to set Content-Type. + +=item $h->content_is_xml + +Returns TRUE if the Content-Type header field indicate that the +content is XML. This method can't be used to set Content-Type. + +=item $h->content_encoding + +The Content-Encoding header field is used as a modifier to the +media type. When present, its value indicates what additional +encoding mechanism has been applied to the resource. + +=item $h->content_length + +A decimal number indicating the size in bytes of the message content. + +=item $h->content_language + +The natural language(s) of the intended audience for the message +content. The value is one or more language tags as defined by RFC +1766. Eg. "no" for some kind of Norwegian and "en-US" for English the +way it is written in the US. + +=item $h->title + +The title of the document. In libwww-perl this header will be +initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element +of HTML documents. I<This header is no longer part of the HTTP +standard.> + +=item $h->user_agent + +This header field is used in request messages and contains information +about the user agent originating the request. I<E.g.>: + + $h->user_agent('Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)'); + +=item $h->server + +The server header field contains information about the software being +used by the originating server program handling the request. + +=item $h->from + +This header should contain an Internet e-mail address for the human +user who controls the requesting user agent. The address should be +machine-usable, as defined by RFC822. E.g.: + + $h->from('King Kong <king@kong.com>'); + +I<This header is no longer part of the HTTP standard.> + +=item $h->referer + +Used to specify the address (URI) of the document from which the +requested resource address was obtained. + +The "Free On-line Dictionary of Computing" as this to say about the +word I<referer>: + + <World-Wide Web> A misspelling of "referrer" which + somehow made it into the {HTTP} standard. A given {web + page}'s referer (sic) is the {URL} of whatever web page + contains the link that the user followed to the current + page. Most browsers pass this information as part of a + request. + + (1998-10-19) + +By popular demand C<referrer> exists as an alias for this method so you +can avoid this misspelling in your programs and still send the right +thing on the wire. + +When setting the referrer, this method removes the fragment from the +given URI if it is present, as mandated by RFC2616. Note that +the removal does I<not> happen automatically if using the header(), +push_header() or init_header() methods to set the referrer. + +=item $h->www_authenticate + +This header must be included as part of a C<401 Unauthorized> response. +The field value consist of a challenge that indicates the +authentication scheme and parameters applicable to the requested URI. + +=item $h->proxy_authenticate + +This header must be included in a C<407 Proxy Authentication Required> +response. + +=item $h->authorization + +=item $h->proxy_authorization + +A user agent that wishes to authenticate itself with a server or a +proxy, may do so by including these headers. + +=item $h->authorization_basic + +This method is used to get or set an authorization header that use the +"Basic Authentication Scheme". In array context it will return two +values; the user name and the password. In scalar context it will +return I<"uname:password"> as a single string value. + +When used to set the header value, it expects two arguments. I<E.g.>: + + $h->authorization_basic($uname, $password); + +The method will croak if the $uname contains a colon ':'. + +=item $h->proxy_authorization_basic + +Same as authorization_basic() but will set the "Proxy-Authorization" +header instead. + +=back + +=head1 NON-CANONICALIZED FIELD NAMES + +The header field name spelling is normally canonicalized including the +'_' to '-' translation. There are some application where this is not +appropriate. Prefixing field names with ':' allow you to force a +specific spelling. For example if you really want a header field name +to show up as C<foo_bar> instead of "Foo-Bar", you might set it like +this: + + $h->header(":foo_bar" => 1); + +These field names are returned with the ':' intact for +$h->header_field_names and the $h->scan callback, but the colons do +not show in $h->as_string. + +=head1 COPYRIGHT + +Copyright 1995-2005 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + diff --git a/lib/HTTP/Headers/Auth.pm b/lib/HTTP/Headers/Auth.pm new file mode 100644 index 0000000..9af4509 --- /dev/null +++ b/lib/HTTP/Headers/Auth.pm @@ -0,0 +1,100 @@ +package HTTP::Headers::Auth; + +use strict; +use warnings; + +our $VERSION = "6.10"; + +use HTTP::Headers; + +package + HTTP::Headers; + +BEGIN { + # we provide a new (and better) implementations below + undef(&www_authenticate); + undef(&proxy_authenticate); +} + +require HTTP::Headers::Util; + +sub _parse_authenticate +{ + my @ret; + for (HTTP::Headers::Util::split_header_words(@_)) { + if (!defined($_->[1])) { + # this is a new auth scheme + push(@ret, shift(@$_) => {}); + shift @$_; + } + if (@ret) { + # this a new parameter pair for the last auth scheme + while (@$_) { + my $k = shift @$_; + my $v = shift @$_; + $ret[-1]{$k} = $v; + } + } + else { + # something wrong, parameter pair without any scheme seen + # IGNORE + } + } + @ret; +} + +sub _authenticate +{ + my $self = shift; + my $header = shift; + my @old = $self->_header($header); + if (@_) { + $self->remove_header($header); + my @new = @_; + while (@new) { + my $a_scheme = shift(@new); + if ($a_scheme =~ /\s/) { + # assume complete valid value, pass it through + $self->push_header($header, $a_scheme); + } + else { + my @param; + if (@new) { + my $p = $new[0]; + if (ref($p) eq "ARRAY") { + @param = @$p; + shift(@new); + } + elsif (ref($p) eq "HASH") { + @param = %$p; + shift(@new); + } + } + my $val = ucfirst(lc($a_scheme)); + if (@param) { + my $sep = " "; + while (@param) { + my $k = shift @param; + my $v = shift @param; + if ($v =~ /[^0-9a-zA-Z]/ || lc($k) eq "realm") { + # must quote the value + $v =~ s,([\\\"]),\\$1,g; + $v = qq("$v"); + } + $val .= "$sep$k=$v"; + $sep = ", "; + } + } + $self->push_header($header, $val); + } + } + } + return unless defined wantarray; + wantarray ? _parse_authenticate(@old) : join(", ", @old); +} + + +sub www_authenticate { shift->_authenticate("WWW-Authenticate", @_) } +sub proxy_authenticate { shift->_authenticate("Proxy-Authenticate", @_) } + +1; diff --git a/lib/HTTP/Headers/ETag.pm b/lib/HTTP/Headers/ETag.pm new file mode 100644 index 0000000..2531668 --- /dev/null +++ b/lib/HTTP/Headers/ETag.pm @@ -0,0 +1,96 @@ +package HTTP::Headers::ETag; + +use strict; +use warnings; + +our $VERSION = "6.10"; + +require HTTP::Date; + +require HTTP::Headers; +package + HTTP::Headers; + +sub _etags +{ + my $self = shift; + my $header = shift; + my @old = _split_etag_list($self->_header($header)); + if (@_) { + $self->_header($header => join(", ", _split_etag_list(@_))); + } + wantarray ? @old : join(", ", @old); +} + +sub etag { shift->_etags("ETag", @_); } +sub if_match { shift->_etags("If-Match", @_); } +sub if_none_match { shift->_etags("If-None-Match", @_); } + +sub if_range { + # Either a date or an entity-tag + my $self = shift; + my @old = $self->_header("If-Range"); + if (@_) { + my $new = shift; + if (!defined $new) { + $self->remove_header("If-Range"); + } + elsif ($new =~ /^\d+$/) { + $self->_date_header("If-Range", $new); + } + else { + $self->_etags("If-Range", $new); + } + } + return unless defined(wantarray); + for (@old) { + my $t = HTTP::Date::str2time($_); + $_ = $t if $t; + } + wantarray ? @old : join(", ", @old); +} + + +# Split a list of entity tag values. The return value is a list +# consisting of one element per entity tag. Suitable for parsing +# headers like C<If-Match>, C<If-None-Match>. You might even want to +# use it on C<ETag> and C<If-Range> entity tag values, because it will +# normalize them to the common form. +# +# entity-tag = [ weak ] opaque-tag +# weak = "W/" +# opaque-tag = quoted-string + + +sub _split_etag_list +{ + my(@val) = @_; + my @res; + for (@val) { + while (length) { + my $weak = ""; + $weak = "W/" if s,^\s*[wW]/,,; + my $etag = ""; + if (s/^\s*(\"[^\"\\]*(?:\\.[^\"\\]*)*\")//) { + push(@res, "$weak$1"); + } + elsif (s/^\s*,//) { + push(@res, qq(W/"")) if $weak; + } + elsif (s/^\s*([^,\s]+)//) { + $etag = $1; + $etag =~ s/([\"\\])/\\$1/g; + push(@res, qq($weak"$etag")); + } + elsif (s/^\s+// || !length) { + push(@res, qq(W/"")) if $weak; + } + else { + die "This should not happen: '$_'"; + } + } + } + @res; +} + +1; diff --git a/lib/HTTP/Headers/Util.pm b/lib/HTTP/Headers/Util.pm new file mode 100644 index 0000000..6e90eaf --- /dev/null +++ b/lib/HTTP/Headers/Util.pm @@ -0,0 +1,197 @@ +package HTTP::Headers::Util; + +use strict; +use warnings; + +our $VERSION = "6.10"; + +use base 'Exporter'; + +our @EXPORT_OK=qw(split_header_words _split_header_words join_header_words); + + +sub split_header_words { + my @res = &_split_header_words; + for my $arr (@res) { + for (my $i = @$arr - 2; $i >= 0; $i -= 2) { + $arr->[$i] = lc($arr->[$i]); + } + } + return @res; +} + +sub _split_header_words +{ + my(@val) = @_; + my @res; + for (@val) { + my @cur; + while (length) { + if (s/^\s*(=*[^\s=;,]+)//) { # 'token' or parameter 'attribute' + push(@cur, $1); + # a quoted value + if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) { + my $val = $1; + $val =~ s/\\(.)/$1/g; + push(@cur, $val); + # some unquoted value + } + elsif (s/^\s*=\s*([^;,\s]*)//) { + my $val = $1; + $val =~ s/\s+$//; + push(@cur, $val); + # no value, a lone token + } + else { + push(@cur, undef); + } + } + elsif (s/^\s*,//) { + push(@res, [@cur]) if @cur; + @cur = (); + } + elsif (s/^\s*;// || s/^\s+//) { + # continue + } + else { + die "This should not happen: '$_'"; + } + } + push(@res, \@cur) if @cur; + } + @res; +} + + +sub join_header_words +{ + @_ = ([@_]) if @_ && !ref($_[0]); + my @res; + for (@_) { + my @cur = @$_; + my @attr; + while (@cur) { + my $k = shift @cur; + my $v = shift @cur; + if (defined $v) { + if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) { + $v =~ s/([\"\\])/\\$1/g; # escape " and \ + $k .= qq(="$v"); + } + else { + # token + $k .= "=$v"; + } + } + push(@attr, $k); + } + push(@res, join("; ", @attr)) if @attr; + } + join(", ", @res); +} + + +1; + +__END__ + +=head1 NAME + +HTTP::Headers::Util - Header value parsing utility functions + +=head1 SYNOPSIS + + use HTTP::Headers::Util qw(split_header_words); + @values = split_header_words($h->header("Content-Type")); + +=head1 DESCRIPTION + +This module provides a few functions that helps parsing and +construction of valid HTTP header values. None of the functions are +exported by default. + +The following functions are available: + +=over 4 + + +=item split_header_words( @header_values ) + +This function will parse the header values given as argument into a +list of anonymous arrays containing key/value pairs. The function +knows how to deal with ",", ";" and "=" as well as quoted values after +"=". A list of space separated tokens are parsed as if they were +separated by ";". + +If the @header_values passed as argument contains multiple values, +then they are treated as if they were a single value separated by +comma ",". + +This means that this function is useful for parsing header fields that +follow this syntax (BNF as from the HTTP/1.1 specification, but we relax +the requirement for tokens). + + headers = #header + header = (token | parameter) *( [";"] (token | parameter)) + + token = 1*<any CHAR except CTLs or separators> + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = <any TEXT except <">> + quoted-pair = "\" CHAR + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + +Each I<header> is represented by an anonymous array of key/value +pairs. The keys will be all be forced to lower case. +The value for a simple token (not part of a parameter) is C<undef>. +Syntactically incorrect headers will not necessarily be parsed as you +would want. + +This is easier to describe with some examples: + + split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz'); + split_header_words('text/html; charset="iso-8859-1"'); + split_header_words('Basic realm="\\"foo\\\\bar\\""'); + +will return + + [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ] + ['text/html' => undef, charset => 'iso-8859-1'] + [basic => undef, realm => "\"foo\\bar\""] + +If you don't want the function to convert tokens and attribute keys to +lower case you can call it as C<_split_header_words> instead (with a +leading underscore). + +=item join_header_words( @arrays ) + +This will do the opposite of the conversion done by split_header_words(). +It takes a list of anonymous arrays as arguments (or a list of +key/value pairs) and produces a single header value. Attribute values +are quoted if needed. + +Example: + + join_header_words(["text/plain" => undef, charset => "iso-8859/1"]); + join_header_words("text/plain" => undef, charset => "iso-8859/1"); + +will both return the string: + + text/plain; charset="iso-8859/1" + +=back + +=head1 COPYRIGHT + +Copyright 1997-1998, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm new file mode 100644 index 0000000..5f38324 --- /dev/null +++ b/lib/HTTP/Message.pm @@ -0,0 +1,1114 @@ +package HTTP::Message; + +use strict; +use warnings; + +our $VERSION = "6.10"; + +require HTTP::Headers; +require Carp; + +my $CRLF = "\015\012"; # "\r\n" is not portable +unless ($HTTP::URI_CLASS) { + if ($ENV{PERL_HTTP_URI_CLASS} + && $ENV{PERL_HTTP_URI_CLASS} =~ /^([\w:]+)$/) { + $HTTP::URI_CLASS = $1; + } else { + $HTTP::URI_CLASS = "URI"; + } +} +eval "require $HTTP::URI_CLASS"; die $@ if $@; + +*_utf8_downgrade = defined(&utf8::downgrade) ? + sub { + utf8::downgrade($_[0], 1) or + Carp::croak("HTTP::Message content must be bytes") + } + : + sub { + }; + +sub new +{ + my($class, $header, $content) = @_; + if (defined $header) { + Carp::croak("Bad header argument") unless ref $header; + if (ref($header) eq "ARRAY") { + $header = HTTP::Headers->new(@$header); + } + else { + $header = $header->clone; + } + } + else { + $header = HTTP::Headers->new; + } + if (defined $content) { + _utf8_downgrade($content); + } + else { + $content = ''; + } + + bless { + '_headers' => $header, + '_content' => $content, + }, $class; +} + + +sub parse +{ + my($class, $str) = @_; + + my @hdr; + while (1) { + if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) { + push(@hdr, $1, $2); + $hdr[-1] =~ s/\r\z//; + } + elsif (@hdr && $str =~ s/^([ \t].*)\n?//) { + $hdr[-1] .= "\n$1"; + $hdr[-1] =~ s/\r\z//; + } + else { + $str =~ s/^\r?\n//; + last; + } + } + local $HTTP::Headers::TRANSLATE_UNDERSCORE; + new($class, \@hdr, $str); +} + + +sub clone +{ + my $self = shift; + my $clone = HTTP::Message->new($self->headers, + $self->content); + $clone->protocol($self->protocol); + $clone; +} + + +sub clear { + my $self = shift; + $self->{_headers}->clear; + $self->content(""); + delete $self->{_parts}; + return; +} + + +sub protocol { + shift->_elem('_protocol', @_); +} + +sub headers { + my $self = shift; + + # recalculation of _content might change headers, so we + # need to force it now + $self->_content unless exists $self->{_content}; + + $self->{_headers}; +} + +sub headers_as_string { + shift->headers->as_string(@_); +} + + +sub content { + + my $self = $_[0]; + if (defined(wantarray)) { + $self->_content unless exists $self->{_content}; + my $old = $self->{_content}; + $old = $$old if ref($old) eq "SCALAR"; + &_set_content if @_ > 1; + return $old; + } + + if (@_ > 1) { + &_set_content; + } + else { + Carp::carp("Useless content call in void context") if $^W; + } +} + + +sub _set_content { + my $self = $_[0]; + _utf8_downgrade($_[1]); + if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") { + ${$self->{_content}} = $_[1]; + } + else { + die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR"; + $self->{_content} = $_[1]; + delete $self->{_content_ref}; + } + delete $self->{_parts} unless $_[2]; +} + + +sub add_content +{ + my $self = shift; + $self->_content unless exists $self->{_content}; + my $chunkref = \$_[0]; + $chunkref = $$chunkref if ref($$chunkref); # legacy + + _utf8_downgrade($$chunkref); + + my $ref = ref($self->{_content}); + if (!$ref) { + $self->{_content} .= $$chunkref; + } + elsif ($ref eq "SCALAR") { + ${$self->{_content}} .= $$chunkref; + } + else { + Carp::croak("Can't append to $ref content"); + } + delete $self->{_parts}; +} + +sub add_content_utf8 { + my($self, $buf) = @_; + utf8::upgrade($buf); + utf8::encode($buf); + $self->add_content($buf); +} + +sub content_ref +{ + my $self = shift; + $self->_content unless exists $self->{_content}; + delete $self->{_parts}; + my $old = \$self->{_content}; + my $old_cref = $self->{_content_ref}; + if (@_) { + my $new = shift; + Carp::croak("Setting content_ref to a non-ref") unless ref($new); + delete $self->{_content}; # avoid modifying $$old + $self->{_content} = $new; + $self->{_content_ref}++; + } + $old = $$old if $old_cref; + return $old; +} + + +sub content_charset +{ + my $self = shift; + if (my $charset = $self->content_type_charset) { + return $charset; + } + + # time to start guessing + my $cref = $self->decoded_content(ref => 1, charset => "none"); + + # Unicode BOM + for ($$cref) { + return "UTF-8" if /^\xEF\xBB\xBF/; + return "UTF-32LE" if /^\xFF\xFE\x00\x00/; + return "UTF-32BE" if /^\x00\x00\xFE\xFF/; + return "UTF-16LE" if /^\xFF\xFE/; + return "UTF-16BE" if /^\xFE\xFF/; + } + + if ($self->content_is_xml) { + # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing + # XML entity not accompanied by external encoding information and not + # in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration, + # in which the first characters must be '<?xml' + for ($$cref) { + return "UTF-32BE" if /^\x00\x00\x00</; + return "UTF-32LE" if /^<\x00\x00\x00/; + return "UTF-16BE" if /^(?:\x00\s)*\x00</; + return "UTF-16LE" if /^(?:\s\x00)*<\x00/; + if (/^\s*(<\?xml[^\x00]*?\?>)/) { + if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) { + my $enc = $2; + $enc =~ s/^\s+//; $enc =~ s/\s+\z//; + return $enc if $enc; + } + } + } + return "UTF-8"; + } + elsif ($self->content_is_html) { + # look for <META charset="..."> or <META content="..."> + # http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding + require IO::HTML; + # Use relaxed search to match previous versions of HTTP::Message: + my $encoding = IO::HTML::find_charset_in($$cref, { encoding => 1, + need_pragma => 0 }); + return $encoding->mime_name if $encoding; + } + elsif ($self->content_type eq "application/json") { + for ($$cref) { + # RFC 4627, ch 3 + return "UTF-32BE" if /^\x00\x00\x00./s; + return "UTF-32LE" if /^.\x00\x00\x00/s; + return "UTF-16BE" if /^\x00.\x00./s; + return "UTF-16LE" if /^.\x00.\x00/s; + return "UTF-8"; + } + } + if ($self->content_type =~ /^text\//) { + for ($$cref) { + if (length) { + return "US-ASCII" unless /[\x80-\xFF]/; + require Encode; + eval { + Encode::decode_utf8($_, Encode::FB_CROAK() | Encode::LEAVE_SRC()); + }; + return "UTF-8" unless $@; + return "ISO-8859-1"; + } + } + } + + return undef; +} + + +sub decoded_content +{ + my($self, %opt) = @_; + my $content_ref; + my $content_ref_iscopy; + + eval { + $content_ref = $self->content_ref; + die "Can't decode ref content" if ref($content_ref) ne "SCALAR"; + + if (my $h = $self->header("Content-Encoding")) { + $h =~ s/^\s+//; + $h =~ s/\s+$//; + for my $ce (reverse split(/\s*,\s*/, lc($h))) { + next unless $ce; + next if $ce eq "identity" || $ce eq "none"; + if ($ce eq "gzip" || $ce eq "x-gzip") { + require IO::Uncompress::Gunzip; + my $output; + IO::Uncompress::Gunzip::gunzip($content_ref, \$output, Transparent => 0) + or die "Can't gunzip content: $IO::Uncompress::Gunzip::GunzipError"; + $content_ref = \$output; + $content_ref_iscopy++; + } + elsif ($ce eq "x-bzip2" or $ce eq "bzip2") { + require IO::Uncompress::Bunzip2; + my $output; + IO::Uncompress::Bunzip2::bunzip2($content_ref, \$output, Transparent => 0) + or die "Can't bunzip content: $IO::Uncompress::Bunzip2::Bunzip2Error"; + $content_ref = \$output; + $content_ref_iscopy++; + } + elsif ($ce eq "deflate") { + require IO::Uncompress::Inflate; + my $output; + my $status = IO::Uncompress::Inflate::inflate($content_ref, \$output, Transparent => 0); + my $error = $IO::Uncompress::Inflate::InflateError; + unless ($status) { + # "Content-Encoding: deflate" is supposed to mean the + # "zlib" format of RFC 1950, but Microsoft got that + # wrong, so some servers sends the raw compressed + # "deflate" data. This tries to inflate this format. + $output = undef; + require IO::Uncompress::RawInflate; + unless (IO::Uncompress::RawInflate::rawinflate($content_ref, \$output)) { + $self->push_header("Client-Warning" => + "Could not raw inflate content: $IO::Uncompress::RawInflate::RawInflateError"); + $output = undef; + } + } + die "Can't inflate content: $error" unless defined $output; + $content_ref = \$output; + $content_ref_iscopy++; + } + elsif ($ce eq "compress" || $ce eq "x-compress") { + die "Can't uncompress content"; + } + elsif ($ce eq "base64") { # not really C-T-E, but should be harmless + require MIME::Base64; + $content_ref = \MIME::Base64::decode($$content_ref); + $content_ref_iscopy++; + } + elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless + require MIME::QuotedPrint; + $content_ref = \MIME::QuotedPrint::decode($$content_ref); + $content_ref_iscopy++; + } + else { + die "Don't know how to decode Content-Encoding '$ce'"; + } + } + } + + if ($self->content_is_text || (my $is_xml = $self->content_is_xml)) { + my $charset = lc( + $opt{charset} || + $self->content_type_charset || + $opt{default_charset} || + $self->content_charset || + "ISO-8859-1" + ); + if ($charset eq "none") { + # leave it as is + } + elsif ($charset eq "us-ascii" || $charset eq "iso-8859-1") { + if ($$content_ref =~ /[^\x00-\x7F]/ && defined &utf8::upgrade) { + unless ($content_ref_iscopy) { + my $copy = $$content_ref; + $content_ref = \$copy; + $content_ref_iscopy++; + } + utf8::upgrade($$content_ref); + } + } + else { + require Encode; + eval { + $content_ref = \Encode::decode($charset, $$content_ref, + ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC()); + }; + if ($@) { + my $retried; + if ($@ =~ /^Unknown encoding/) { + my $alt_charset = lc($opt{alt_charset} || ""); + if ($alt_charset && $charset ne $alt_charset) { + # Retry decoding with the alternative charset + $content_ref = \Encode::decode($alt_charset, $$content_ref, + ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC()) + unless $alt_charset eq "none"; + $retried++; + } + } + die unless $retried; + } + die "Encode::decode() returned undef improperly" unless defined $$content_ref; + if ($is_xml) { + # Get rid of the XML encoding declaration if present + $$content_ref =~ s/^\x{FEFF}//; + if ($$content_ref =~ /^(\s*<\?xml[^\x00]*?\?>)/) { + substr($$content_ref, 0, length($1)) =~ s/\sencoding\s*=\s*(["']).*?\1//; + } + } + } + } + }; + if ($@) { + Carp::croak($@) if $opt{raise_error}; + return undef; + } + + return $opt{ref} ? $content_ref : $$content_ref; +} + + +sub decodable +{ + # should match the Content-Encoding values that decoded_content can deal with + my $self = shift; + my @enc; + # XXX preferably we should determine if the modules are available without loading + # them here + eval { + require IO::Uncompress::Gunzip; + push(@enc, "gzip", "x-gzip"); + }; + eval { + require IO::Uncompress::Inflate; + require IO::Uncompress::RawInflate; + push(@enc, "deflate"); + }; + eval { + require IO::Uncompress::Bunzip2; + push(@enc, "x-bzip2"); + }; + # we don't care about announcing the 'identity', 'base64' and + # 'quoted-printable' stuff + return wantarray ? @enc : join(", ", @enc); +} + + +sub decode +{ + my $self = shift; + return 1 unless $self->header("Content-Encoding"); + if (defined(my $content = $self->decoded_content(charset => "none"))) { + $self->remove_header("Content-Encoding", "Content-Length", "Content-MD5"); + $self->content($content); + return 1; + } + return 0; +} + + +sub encode +{ + my($self, @enc) = @_; + + Carp::croak("Can't encode multipart/* messages") if $self->content_type =~ m,^multipart/,; + Carp::croak("Can't encode message/* messages") if $self->content_type =~ m,^message/,; + + return 1 unless @enc; # nothing to do + + my $content = $self->content; + for my $encoding (@enc) { + if ($encoding eq "identity") { + # nothing to do + } + elsif ($encoding eq "base64") { + require MIME::Base64; + $content = MIME::Base64::encode($content); + } + elsif ($encoding eq "gzip" || $encoding eq "x-gzip") { + require IO::Compress::Gzip; + my $output; + IO::Compress::Gzip::gzip(\$content, \$output, Minimal => 1) + or die "Can't gzip content: $IO::Compress::Gzip::GzipError"; + $content = $output; + } + elsif ($encoding eq "deflate") { + require IO::Compress::Deflate; + my $output; + IO::Compress::Deflate::deflate(\$content, \$output) + or die "Can't deflate content: $IO::Compress::Deflate::DeflateError"; + $content = $output; + } + elsif ($encoding eq "x-bzip2") { + require IO::Compress::Bzip2; + my $output; + IO::Compress::Bzip2::bzip2(\$content, \$output) + or die "Can't bzip2 content: $IO::Compress::Bzip2::Bzip2Error"; + $content = $output; + } + elsif ($encoding eq "rot13") { # for the fun of it + $content =~ tr/A-Za-z/N-ZA-Mn-za-m/; + } + else { + return 0; + } + } + my $h = $self->header("Content-Encoding"); + unshift(@enc, $h) if $h; + $self->header("Content-Encoding", join(", ", @enc)); + $self->remove_header("Content-Length", "Content-MD5"); + $self->content($content); + return 1; +} + + +sub as_string +{ + my($self, $eol) = @_; + $eol = "\n" unless defined $eol; + + # The calculation of content might update the headers + # so we need to do that first. + my $content = $self->content; + + return join("", $self->{'_headers'}->as_string($eol), + $eol, + $content, + (@_ == 1 && length($content) && + $content !~ /\n\z/) ? "\n" : "", + ); +} + + +sub dump +{ + my($self, %opt) = @_; + my $content = $self->content; + my $chopped = 0; + if (!ref($content)) { + my $maxlen = $opt{maxlength}; + $maxlen = 512 unless defined($maxlen); + if ($maxlen && length($content) > $maxlen * 1.1 + 3) { + $chopped = length($content) - $maxlen; + $content = substr($content, 0, $maxlen) . "..."; + } + + $content =~ s/\\/\\\\/g; + $content =~ s/\t/\\t/g; + $content =~ s/\r/\\r/g; + + # no need for 3 digits in escape for these + $content =~ s/([\0-\11\13-\037])(?!\d)/sprintf('\\%o',ord($1))/eg; + + $content =~ s/([\0-\11\13-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg; + $content =~ s/([^\12\040-\176])/sprintf('\\x{%X}',ord($1))/eg; + + # remaining whitespace + $content =~ s/( +)\n/("\\40" x length($1)) . "\n"/eg; + $content =~ s/(\n+)\n/("\\n" x length($1)) . "\n"/eg; + $content =~ s/\n\z/\\n/; + + my $no_content = $opt{no_content}; + $no_content = "(no content)" unless defined $no_content; + if ($content eq $no_content) { + # escape our $no_content marker + $content =~ s/^(.)/sprintf('\\x%02X',ord($1))/eg; + } + elsif ($content eq "") { + $content = $no_content; + } + } + + my @dump; + push(@dump, $opt{preheader}) if $opt{preheader}; + push(@dump, $self->{_headers}->as_string, $content); + push(@dump, "(+ $chopped more bytes not shown)") if $chopped; + + my $dump = join("\n", @dump, ""); + $dump =~ s/^/$opt{prefix}/gm if $opt{prefix}; + + print $dump unless defined wantarray; + return $dump; +} + +# allow subclasses to override what will handle individual parts +sub _part_class { + return __PACKAGE__; +} + +sub parts { + my $self = shift; + if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) { + $self->_parts; + } + my $old = $self->{_parts}; + if (@_) { + my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_; + my $ct = $self->content_type || ""; + if ($ct =~ m,^message/,) { + Carp::croak("Only one part allowed for $ct content") + if @parts > 1; + } + elsif ($ct !~ m,^multipart/,) { + $self->remove_content_headers; + $self->content_type("multipart/mixed"); + } + $self->{_parts} = \@parts; + _stale_content($self); + } + return @$old if wantarray; + return $old->[0]; +} + +sub add_part { + my $self = shift; + if (($self->content_type || "") !~ m,^multipart/,) { + my $p = $self->_part_class->new( + $self->remove_content_headers, + $self->content(""), + ); + $self->content_type("multipart/mixed"); + $self->{_parts} = []; + if ($p->headers->header_field_names || $p->content ne "") { + push(@{$self->{_parts}}, $p); + } + } + elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") { + $self->_parts; + } + + push(@{$self->{_parts}}, @_); + _stale_content($self); + return; +} + +sub _stale_content { + my $self = shift; + if (ref($self->{_content}) eq "SCALAR") { + # must recalculate now + $self->_content; + } + else { + # just invalidate cache + delete $self->{_content}; + delete $self->{_content_ref}; + } +} + + +# delegate all other method calls to the headers object. +our $AUTOLOAD; +sub AUTOLOAD +{ + my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2); + + # We create the function here so that it will not need to be + # autoloaded the next time. + no strict 'refs'; + *$method = sub { local $Carp::Internal{+__PACKAGE__} = 1; shift->headers->$method(@_) }; + goto &$method; +} + + +sub DESTROY {} # avoid AUTOLOADing it + + +# Private method to access members in %$self +sub _elem +{ + my $self = shift; + my $elem = shift; + my $old = $self->{$elem}; + $self->{$elem} = $_[0] if @_; + return $old; +} + + +# Create private _parts attribute from current _content +sub _parts { + my $self = shift; + my $ct = $self->content_type; + if ($ct =~ m,^multipart/,) { + require HTTP::Headers::Util; + my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type")); + die "Assert" unless @h; + my %h = @{$h[0]}; + if (defined(my $b = $h{boundary})) { + my $str = $self->content; + $str =~ s/\r?\n--\Q$b\E--.*//s; + if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) { + $self->{_parts} = [map $self->_part_class->parse($_), + split(/\r?\n--\Q$b\E\r?\n/, $str)] + } + } + } + elsif ($ct eq "message/http") { + require HTTP::Request; + require HTTP::Response; + my $content = $self->content; + my $class = ($content =~ m,^(HTTP/.*)\n,) ? + "HTTP::Response" : "HTTP::Request"; + $self->{_parts} = [$class->parse($content)]; + } + elsif ($ct =~ m,^message/,) { + $self->{_parts} = [ $self->_part_class->parse($self->content) ]; + } + + $self->{_parts} ||= []; +} + + +# Create private _content attribute from current _parts +sub _content { + my $self = shift; + my $ct = $self->{_headers}->header("Content-Type") || "multipart/mixed"; + if ($ct =~ m,^\s*message/,i) { + _set_content($self, $self->{_parts}[0]->as_string($CRLF), 1); + return; + } + + require HTTP::Headers::Util; + my @v = HTTP::Headers::Util::split_header_words($ct); + Carp::carp("Multiple Content-Type headers") if @v > 1; + @v = @{$v[0]}; + + my $boundary; + my $boundary_index; + for (my @tmp = @v; @tmp;) { + my($k, $v) = splice(@tmp, 0, 2); + if ($k eq "boundary") { + $boundary = $v; + $boundary_index = @v - @tmp - 1; + last; + } + } + + my @parts = map $_->as_string($CRLF), @{$self->{_parts}}; + + my $bno = 0; + $boundary = _boundary() unless defined $boundary; + CHECK_BOUNDARY: + { + for (@parts) { + if (index($_, $boundary) >= 0) { + # must have a better boundary + $boundary = _boundary(++$bno); + redo CHECK_BOUNDARY; + } + } + } + + if ($boundary_index) { + $v[$boundary_index] = $boundary; + } + else { + push(@v, boundary => $boundary); + } + + $ct = HTTP::Headers::Util::join_header_words(@v); + $self->{_headers}->header("Content-Type", $ct); + + _set_content($self, "--$boundary$CRLF" . + join("$CRLF--$boundary$CRLF", @parts) . + "$CRLF--$boundary--$CRLF", + 1); +} + + +sub _boundary +{ + my $size = shift || return "xYzZY"; + require MIME::Base64; + my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), ""); + $b =~ s/[\W]/X/g; # ensure alnum only + $b; +} + + +1; + + +__END__ + +=head1 NAME + +HTTP::Message - HTTP style message (base class) + +=head1 SYNOPSIS + + use base 'HTTP::Message'; + +=head1 DESCRIPTION + +An C<HTTP::Message> object contains some headers and a content body. +The following methods are available: + +=over 4 + +=item $mess = HTTP::Message->new + +=item $mess = HTTP::Message->new( $headers ) + +=item $mess = HTTP::Message->new( $headers, $content ) + +This constructs a new message object. Normally you would want +construct C<HTTP::Request> or C<HTTP::Response> objects instead. + +The optional $header argument should be a reference to an +C<HTTP::Headers> object or a plain array reference of key/value pairs. +If an C<HTTP::Headers> object is provided then a copy of it will be +embedded into the constructed message, i.e. it will not be owned and +can be modified afterwards without affecting the message. + +The optional $content argument should be a string of bytes. + +=item $mess = HTTP::Message->parse( $str ) + +This constructs a new message object by parsing the given string. + +=item $mess->headers + +Returns the embedded C<HTTP::Headers> object. + +=item $mess->headers_as_string + +=item $mess->headers_as_string( $eol ) + +Call the as_string() method for the headers in the +message. This will be the same as + + $mess->headers->as_string + +but it will make your program a whole character shorter :-) + +=item $mess->content + +=item $mess->content( $bytes ) + +The content() method sets the raw content if an argument is given. If no +argument is given the content is not touched. In either case the +original raw content is returned. + +Note that the content should be a string of bytes. Strings in perl +can contain characters outside the range of a byte. The C<Encode> +module can be used to turn such strings into a string of bytes. + +=item $mess->add_content( $bytes ) + +The add_content() methods appends more data bytes to the end of the +current content buffer. + +=item $mess->add_content_utf8( $string ) + +The add_content_utf8() method appends the UTF-8 bytes representing the +string to the end of the current content buffer. + +=item $mess->content_ref + +=item $mess->content_ref( \$bytes ) + +The content_ref() method will return a reference to content buffer string. +It can be more efficient to access the content this way if the content +is huge, and it can even be used for direct manipulation of the content, +for instance: + + ${$res->content_ref} =~ s/\bfoo\b/bar/g; + +This example would modify the content buffer in-place. + +If an argument is passed it will setup the content to reference some +external source. The content() and add_content() methods +will automatically dereference scalar references passed this way. For +other references content() will return the reference itself and +add_content() will refuse to do anything. + +=item $mess->content_charset + +This returns the charset used by the content in the message. The +charset is either found as the charset attribute of the +C<Content-Type> header or by guessing. + +See L<http://www.w3.org/TR/REC-html40/charset.html#spec-char-encoding> +for details about how charset is determined. + +=item $mess->decoded_content( %options ) + +Returns the content with any C<Content-Encoding> undone and for textual content +the raw content encoded to Perl's Unicode strings. If the C<Content-Encoding> +or C<charset> of the message is unknown this method will fail by returning +C<undef>. + +The following options can be specified. + +=over + +=item C<charset> + +This override the charset parameter for text content. The value +C<none> can used to suppress decoding of the charset. + +=item C<default_charset> + +This override the default charset guessed by content_charset() or +if that fails "ISO-8859-1". + +=item C<alt_charset> + +If decoding fails because the charset specified in the Content-Type header +isn't recognized by Perl's Encode module, then try decoding using this charset +instead of failing. The C<alt_charset> might be specified as C<none> to simply +return the string without any decoding of charset as alternative. + +=item C<charset_strict> + +Abort decoding if malformed characters is found in the content. By +default you get the substitution character ("\x{FFFD}") in place of +malformed characters. + +=item C<raise_error> + +If TRUE then raise an exception if not able to decode content. Reason +might be that the specified C<Content-Encoding> or C<charset> is not +supported. If this option is FALSE, then decoded_content() will return +C<undef> on errors, but will still set $@. + +=item C<ref> + +If TRUE then a reference to decoded content is returned. This might +be more efficient in cases where the decoded content is identical to +the raw content as no data copying is required in this case. + +=back + +=item $mess->decodable + +=item HTTP::Message::decodable() + +This returns the encoding identifiers that decoded_content() can +process. In scalar context returns a comma separated string of +identifiers. + +This value is suitable for initializing the C<Accept-Encoding> request +header field. + +=item $mess->decode + +This method tries to replace the content of the message with the +decoded version and removes the C<Content-Encoding> header. Returns +TRUE if successful and FALSE if not. + +If the message does not have a C<Content-Encoding> header this method +does nothing and returns TRUE. + +Note that the content of the message is still bytes after this method +has been called and you still need to call decoded_content() if you +want to process its content as a string. + +=item $mess->encode( $encoding, ... ) + +Apply the given encodings to the content of the message. Returns TRUE +if successful. The "identity" (non-)encoding is always supported; other +currently supported encodings, subject to availability of required +additional modules, are "gzip", "deflate", "x-bzip2" and "base64". + +A successful call to this function will set the C<Content-Encoding> +header. + +Note that C<multipart/*> or C<message/*> messages can't be encoded and +this method will croak if you try. + +=item $mess->parts + +=item $mess->parts( @parts ) + +=item $mess->parts( \@parts ) + +Messages can be composite, i.e. contain other messages. The composite +messages have a content type of C<multipart/*> or C<message/*>. This +method give access to the contained messages. + +The argumentless form will return a list of C<HTTP::Message> objects. +If the content type of $msg is not C<multipart/*> or C<message/*> then +this will return the empty list. In scalar context only the first +object is returned. The returned message parts should be regarded as +read-only (future versions of this library might make it possible +to modify the parent by modifying the parts). + +If the content type of $msg is C<message/*> then there will only be +one part returned. + +If the content type is C<message/http>, then the return value will be +either an C<HTTP::Request> or an C<HTTP::Response> object. + +If a @parts argument is given, then the content of the message will be +modified. The array reference form is provided so that an empty list +can be provided. The @parts array should contain C<HTTP::Message> +objects. The @parts objects are owned by $mess after this call and +should not be modified or made part of other messages. + +When updating the message with this method and the old content type of +$mess is not C<multipart/*> or C<message/*>, then the content type is +set to C<multipart/mixed> and all other content headers are cleared. + +This method will croak if the content type is C<message/*> and more +than one part is provided. + +=item $mess->add_part( $part ) + +This will add a part to a message. The $part argument should be +another C<HTTP::Message> object. If the previous content type of +$mess is not C<multipart/*> then the old content (together with all +content headers) will be made part #1 and the content type made +C<multipart/mixed> before the new part is added. The $part object is +owned by $mess after this call and should not be modified or made part +of other messages. + +There is no return value. + +=item $mess->clear + +Will clear the headers and set the content to the empty string. There +is no return value + +=item $mess->protocol + +=item $mess->protocol( $proto ) + +Sets the HTTP protocol used for the message. The protocol() is a string +like C<HTTP/1.0> or C<HTTP/1.1>. + +=item $mess->clone + +Returns a copy of the message object. + +=item $mess->as_string + +=item $mess->as_string( $eol ) + +Returns the message formatted as a single string. + +The optional $eol parameter specifies the line ending sequence to use. +The default is "\n". If no $eol is given then as_string will ensure +that the returned string is newline terminated (even when the message +content is not). No extra newline is appended if an explicit $eol is +passed. + +=item $mess->dump( %opt ) + +Returns the message formatted as a string. In void context print the string. + +This differs from C<< $mess->as_string >> in that it escapes the bytes +of the content so that it's safe to print them and it limits how much +content to print. The escapes syntax used is the same as for Perl's +double quoted strings. If there is no content the string "(no +content)" is shown in its place. + +Options to influence the output can be passed as key/value pairs. The +following options are recognized: + +=over + +=item maxlength => $num + +How much of the content to show. The default is 512. Set this to 0 +for unlimited. + +If the content is longer then the string is chopped at the limit and +the string "...\n(### more bytes not shown)" appended. + +=item no_content => $str + +Replaces the "(no content)" marker. + +=item prefix => $str + +A string that will be prefixed to each line of the dump. + +=back + +=back + +All methods unknown to C<HTTP::Message> itself are delegated to the +C<HTTP::Headers> object that is part of every message. This allows +convenient access to these methods. Refer to L<HTTP::Headers> for +details of these methods: + + $mess->header( $field => $val ) + $mess->push_header( $field => $val ) + $mess->init_header( $field => $val ) + $mess->remove_header( $field ) + $mess->remove_content_headers + $mess->header_field_names + $mess->scan( \&doit ) + + $mess->date + $mess->expires + $mess->if_modified_since + $mess->if_unmodified_since + $mess->last_modified + $mess->content_type + $mess->content_encoding + $mess->content_length + $mess->content_language + $mess->title + $mess->user_agent + $mess->server + $mess->from + $mess->referer + $mess->www_authenticate + $mess->authorization + $mess->proxy_authorization + $mess->authorization_basic + $mess->proxy_authorization_basic + +=head1 COPYRIGHT + +Copyright 1995-2004 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + diff --git a/lib/HTTP/Request.pm b/lib/HTTP/Request.pm new file mode 100644 index 0000000..6a0eda1 --- /dev/null +++ b/lib/HTTP/Request.pm @@ -0,0 +1,241 @@ +package HTTP::Request; + +use strict; +use warnings; + +use base 'HTTP::Message'; + +our $VERSION = "6.10"; + +sub new +{ + my($class, $method, $uri, $header, $content) = @_; + my $self = $class->SUPER::new($header, $content); + $self->method($method); + $self->uri($uri); + $self; +} + + +sub parse +{ + my($class, $str) = @_; + my $request_line; + if ($str =~ s/^(.*)\n//) { + $request_line = $1; + } + else { + $request_line = $str; + $str = ""; + } + + my $self = $class->SUPER::parse($str); + my($method, $uri, $protocol) = split(' ', $request_line); + $self->method($method) if defined($method); + $self->uri($uri) if defined($uri); + $self->protocol($protocol) if $protocol; + $self; +} + + +sub clone +{ + my $self = shift; + my $clone = bless $self->SUPER::clone, ref($self); + $clone->method($self->method); + $clone->uri($self->uri); + $clone; +} + + +sub method +{ + shift->_elem('_method', @_); +} + + +sub uri +{ + my $self = shift; + my $old = $self->{'_uri'}; + if (@_) { + my $uri = shift; + if (!defined $uri) { + # that's ok + } + elsif (ref $uri) { + Carp::croak("A URI can't be a " . ref($uri) . " reference") + if ref($uri) eq 'HASH' or ref($uri) eq 'ARRAY'; + Carp::croak("Can't use a " . ref($uri) . " object as a URI") + unless $uri->can('scheme'); + $uri = $uri->clone; + unless ($HTTP::URI_CLASS eq "URI") { + # Argh!! Hate this... old LWP legacy! + eval { local $SIG{__DIE__}; $uri = $uri->abs; }; + die $@ if $@ && $@ !~ /Missing base argument/; + } + } + else { + $uri = $HTTP::URI_CLASS->new($uri); + } + $self->{'_uri'} = $uri; + delete $self->{'_uri_canonical'}; + } + $old; +} + +*url = \&uri; # legacy + +sub uri_canonical +{ + my $self = shift; + return $self->{'_uri_canonical'} ||= $self->{'_uri'}->canonical; +} + + +sub accept_decodable +{ + my $self = shift; + $self->header("Accept-Encoding", scalar($self->decodable)); +} + +sub as_string +{ + my $self = shift; + my($eol) = @_; + $eol = "\n" unless defined $eol; + + my $req_line = $self->method || "-"; + my $uri = $self->uri; + $uri = (defined $uri) ? $uri->as_string : "-"; + $req_line .= " $uri"; + my $proto = $self->protocol; + $req_line .= " $proto" if $proto; + + return join($eol, $req_line, $self->SUPER::as_string(@_)); +} + +sub dump +{ + my $self = shift; + my @pre = ($self->method || "-", $self->uri || "-"); + if (my $prot = $self->protocol) { + push(@pre, $prot); + } + + return $self->SUPER::dump( + preheader => join(" ", @pre), + @_, + ); +} + + +1; + +__END__ + +=head1 NAME + +HTTP::Request - HTTP style request message + +=head1 SYNOPSIS + + require HTTP::Request; + $request = HTTP::Request->new(GET => 'http://www.example.com/'); + +and usually used like this: + + $ua = LWP::UserAgent->new; + $response = $ua->request($request); + +=head1 DESCRIPTION + +C<HTTP::Request> is a class encapsulating HTTP style requests, +consisting of a request line, some headers, and a content body. Note +that the LWP library uses HTTP style requests even for non-HTTP +protocols. Instances of this class are usually passed to the +request() method of an C<LWP::UserAgent> object. + +C<HTTP::Request> is a subclass of C<HTTP::Message> and therefore +inherits its methods. The following additional methods are available: + +=over 4 + +=item $r = HTTP::Request->new( $method, $uri ) + +=item $r = HTTP::Request->new( $method, $uri, $header ) + +=item $r = HTTP::Request->new( $method, $uri, $header, $content ) + +Constructs a new C<HTTP::Request> object describing a request on the +object $uri using method $method. The $method argument must be a +string. The $uri argument can be either a string, or a reference to a +C<URI> object. The optional $header argument should be a reference to +an C<HTTP::Headers> object or a plain array reference of key/value +pairs. The optional $content argument should be a string of bytes. + +=item $r = HTTP::Request->parse( $str ) + +This constructs a new request object by parsing the given string. + +=item $r->method + +=item $r->method( $val ) + +This is used to get/set the method attribute. The method should be a +short string like "GET", "HEAD", "PUT" or "POST". + +=item $r->uri + +=item $r->uri( $val ) + +This is used to get/set the uri attribute. The $val can be a +reference to a URI object or a plain string. If a string is given, +then it should be parsable as an absolute URI. + +=item $r->header( $field ) + +=item $r->header( $field => $value ) + +This is used to get/set header values and it is inherited from +C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for +details and other similar methods that can be used to access the +headers. + +=item $r->accept_decodable + +This will set the C<Accept-Encoding> header to the list of encodings +that decoded_content() can decode. + +=item $r->content + +=item $r->content( $bytes ) + +This is used to get/set the content and it is inherited from the +C<HTTP::Message> base class. See L<HTTP::Message> for details and +other methods that can be used to access the content. + +Note that the content should be a string of bytes. Strings in perl +can contain characters outside the range of a byte. The C<Encode> +module can be used to turn such strings into a string of bytes. + +=item $r->as_string + +=item $r->as_string( $eol ) + +Method returning a textual representation of the request. + +=back + +=head1 SEE ALSO + +L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Request::Common>, +L<HTTP::Response> + +=head1 COPYRIGHT + +Copyright 1995-2004 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + diff --git a/lib/HTTP/Request/Common.pm b/lib/HTTP/Request/Common.pm new file mode 100644 index 0000000..7431211 --- /dev/null +++ b/lib/HTTP/Request/Common.pm @@ -0,0 +1,521 @@ +package HTTP::Request::Common; + +use strict; +use warnings; + +our $DYNAMIC_FILE_UPLOAD ||= 0; # make it defined (don't know why) + +use Exporter 5.57 'import'; + +our @EXPORT =qw(GET HEAD PUT POST); +our @EXPORT_OK = qw($DYNAMIC_FILE_UPLOAD DELETE); + +require HTTP::Request; +use Carp(); + +our $VERSION = "6.10"; + +my $CRLF = "\015\012"; # "\r\n" is not portable + +sub GET { _simple_req('GET', @_); } +sub HEAD { _simple_req('HEAD', @_); } +sub DELETE { _simple_req('DELETE', @_); } + +for my $type (qw(PUT POST)) { + no strict 'refs'; + *{ __PACKAGE__ . "::" . $type } = sub { + return request_type_with_data($type, @_); + }; +} + +sub request_type_with_data +{ + my $type = shift; + my $url = shift; + my $req = HTTP::Request->new($type => $url); + my $content; + $content = shift if @_ and ref $_[0]; + my($k, $v); + while (($k,$v) = splice(@_, 0, 2)) { + if (lc($k) eq 'content') { + $content = $v; + } + else { + $req->push_header($k, $v); + } + } + my $ct = $req->header('Content-Type'); + unless ($ct) { + $ct = 'application/x-www-form-urlencoded'; + } + elsif ($ct eq 'form-data') { + $ct = 'multipart/form-data'; + } + + if (ref $content) { + if ($ct =~ m,^multipart/form-data\s*(;|$),i) { + require HTTP::Headers::Util; + my @v = HTTP::Headers::Util::split_header_words($ct); + Carp::carp("Multiple Content-Type headers") if @v > 1; + @v = @{$v[0]}; + + my $boundary; + my $boundary_index; + for (my @tmp = @v; @tmp;) { + my($k, $v) = splice(@tmp, 0, 2); + if ($k eq "boundary") { + $boundary = $v; + $boundary_index = @v - @tmp - 1; + last; + } + } + + ($content, $boundary) = form_data($content, $boundary, $req); + + if ($boundary_index) { + $v[$boundary_index] = $boundary; + } + else { + push(@v, boundary => $boundary); + } + + $ct = HTTP::Headers::Util::join_header_words(@v); + } + else { + # We use a temporary URI object to format + # the application/x-www-form-urlencoded content. + require URI; + my $url = URI->new('http:'); + $url->query_form(ref($content) eq "HASH" ? %$content : @$content); + $content = $url->query; + + # HTML/4.01 says that line breaks are represented as "CR LF" pairs (i.e., `%0D%0A') + $content =~ s/(?<!%0D)%0A/%0D%0A/g if defined($content); + } + } + + $req->header('Content-Type' => $ct); # might be redundant + if (defined($content)) { + $req->header('Content-Length' => + length($content)) unless ref($content); + $req->content($content); + } + else { + $req->header('Content-Length' => 0); + } + $req; +} + + +sub _simple_req +{ + my($method, $url) = splice(@_, 0, 2); + my $req = HTTP::Request->new($method => $url); + my($k, $v); + my $content; + while (($k,$v) = splice(@_, 0, 2)) { + if (lc($k) eq 'content') { + $req->add_content($v); + $content++; + } + else { + $req->push_header($k, $v); + } + } + if ($content && !defined($req->header("Content-Length"))) { + $req->header("Content-Length", length(${$req->content_ref})); + } + $req; +} + + +sub form_data # RFC1867 +{ + my($data, $boundary, $req) = @_; + my @data = ref($data) eq "HASH" ? %$data : @$data; # copy + my $fhparts; + my @parts; + while (my ($k,$v) = splice(@data, 0, 2)) { + if (!ref($v)) { + $k =~ s/([\\\"])/\\$1/g; # escape quotes and backslashes + no warnings 'uninitialized'; + push(@parts, + qq(Content-Disposition: form-data; name="$k"$CRLF$CRLF$v)); + } + else { + my($file, $usename, @headers) = @$v; + unless (defined $usename) { + $usename = $file; + $usename =~ s,.*/,, if defined($usename); + } + $k =~ s/([\\\"])/\\$1/g; + my $disp = qq(form-data; name="$k"); + if (defined($usename) and length($usename)) { + $usename =~ s/([\\\"])/\\$1/g; + $disp .= qq(; filename="$usename"); + } + my $content = ""; + my $h = HTTP::Headers->new(@headers); + if ($file) { + open(my $fh, "<", $file) or Carp::croak("Can't open file $file: $!"); + binmode($fh); + if ($DYNAMIC_FILE_UPLOAD) { + # will read file later, close it now in order to + # not accumulate to many open file handles + close($fh); + $content = \$file; + } + else { + local($/) = undef; # slurp files + $content = <$fh>; + close($fh); + } + unless ($h->header("Content-Type")) { + require LWP::MediaTypes; + LWP::MediaTypes::guess_media_type($file, $h); + } + } + if ($h->header("Content-Disposition")) { + # just to get it sorted first + $disp = $h->header("Content-Disposition"); + $h->remove_header("Content-Disposition"); + } + if ($h->header("Content")) { + $content = $h->header("Content"); + $h->remove_header("Content"); + } + my $head = join($CRLF, "Content-Disposition: $disp", + $h->as_string($CRLF), + ""); + if (ref $content) { + push(@parts, [$head, $$content]); + $fhparts++; + } + else { + push(@parts, $head . $content); + } + } + } + return ("", "none") unless @parts; + + my $content; + if ($fhparts) { + $boundary = boundary(10) # hopefully enough randomness + unless $boundary; + + # add the boundaries to the @parts array + for (1..@parts-1) { + splice(@parts, $_*2-1, 0, "$CRLF--$boundary$CRLF"); + } + unshift(@parts, "--$boundary$CRLF"); + push(@parts, "$CRLF--$boundary--$CRLF"); + + # See if we can generate Content-Length header + my $length = 0; + for (@parts) { + if (ref $_) { + my ($head, $f) = @$_; + my $file_size; + unless ( -f $f && ($file_size = -s _) ) { + # The file is either a dynamic file like /dev/audio + # or perhaps a file in the /proc file system where + # stat may return a 0 size even though reading it + # will produce data. So we cannot make + # a Content-Length header. + undef $length; + last; + } + $length += $file_size + length $head; + } + else { + $length += length; + } + } + $length && $req->header('Content-Length' => $length); + + # set up a closure that will return content piecemeal + $content = sub { + for (;;) { + unless (@parts) { + defined $length && $length != 0 && + Carp::croak "length of data sent did not match calculated Content-Length header. Probably because uploaded file changed in size during transfer."; + return; + } + my $p = shift @parts; + unless (ref $p) { + $p .= shift @parts while @parts && !ref($parts[0]); + defined $length && ($length -= length $p); + return $p; + } + my($buf, $fh) = @$p; + unless (ref($fh)) { + my $file = $fh; + undef($fh); + open($fh, "<", $file) || Carp::croak("Can't open file $file: $!"); + binmode($fh); + } + my $buflength = length $buf; + my $n = read($fh, $buf, 2048, $buflength); + if ($n) { + $buflength += $n; + unshift(@parts, ["", $fh]); + } + else { + close($fh); + } + if ($buflength) { + defined $length && ($length -= $buflength); + return $buf + } + } + }; + + } + else { + $boundary = boundary() unless $boundary; + + my $bno = 0; + CHECK_BOUNDARY: + { + for (@parts) { + if (index($_, $boundary) >= 0) { + # must have a better boundary + $boundary = boundary(++$bno); + redo CHECK_BOUNDARY; + } + } + last; + } + $content = "--$boundary$CRLF" . + join("$CRLF--$boundary$CRLF", @parts) . + "$CRLF--$boundary--$CRLF"; + } + + wantarray ? ($content, $boundary) : $content; +} + + +sub boundary +{ + my $size = shift || return "xYzZY"; + require MIME::Base64; + my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), ""); + $b =~ s/[\W]/X/g; # ensure alnum only + $b; +} + +1; + +__END__ + +=head1 NAME + +HTTP::Request::Common - Construct common HTTP::Request objects + +=head1 SYNOPSIS + + use HTTP::Request::Common; + $ua = LWP::UserAgent->new; + $ua->request(GET 'http://www.sn.no/'); + $ua->request(POST 'http://somewhere/foo', [foo => bar, bar => foo]); + +=head1 DESCRIPTION + +This module provide functions that return newly created C<HTTP::Request> +objects. These functions are usually more convenient to use than the +standard C<HTTP::Request> constructor for the most common requests. The +following functions are provided: + +=over 4 + +=item GET $url + +=item GET $url, Header => Value,... + +The GET() function returns an C<HTTP::Request> object initialized with +the "GET" method and the specified URL. It is roughly equivalent to the +following call + + HTTP::Request->new( + GET => $url, + HTTP::Headers->new(Header => Value,...), + ) + +but is less cluttered. What is different is that a header named +C<Content> will initialize the content part of the request instead of +setting a header field. Note that GET requests should normally not +have a content, so this hack makes more sense for the PUT() and POST() +functions described below. + +The get(...) method of C<LWP::UserAgent> exists as a shortcut for +$ua->request(GET ...). + +=item HEAD $url + +=item HEAD $url, Header => Value,... + +Like GET() but the method in the request is "HEAD". + +The head(...) method of "LWP::UserAgent" exists as a shortcut for +$ua->request(HEAD ...). + +=item PUT $url + +=item PUT $url, Header => Value,... + +=item PUT $url, Header => Value,..., Content => $content + +Like GET() but the method in the request is "PUT". + +The content of the request can be specified using the "Content" +pseudo-header. This steals a bit of the header field namespace as +there is no way to directly specify a header that is actually called +"Content". If you really need this you must update the request +returned in a separate statement. + +=item DELETE $url + +=item DELETE $url, Header => Value,... + +Like GET() but the method in the request is "DELETE". This function +is not exported by default. + +=item POST $url + +=item POST $url, Header => Value,... + +=item POST $url, $form_ref, Header => Value,... + +=item POST $url, Header => Value,..., Content => $form_ref + +=item POST $url, Header => Value,..., Content => $content + +This works mostly like PUT() with "POST" as the method, but this +function also takes a second optional array or hash reference +parameter $form_ref. As for PUT() the content can also be specified +directly using the "Content" pseudo-header, and you may also provide +the $form_ref this way. + +The $form_ref argument can be used to pass key/value pairs for the +form content. By default we will initialize a request using the +C<application/x-www-form-urlencoded> content type. This means that +you can emulate an HTML E<lt>form> POSTing like this: + + POST 'http://www.perl.org/survey.cgi', + [ name => 'Gisle Aas', + email => 'gisle@aas.no', + gender => 'M', + born => '1964', + perc => '3%', + ]; + +This will create an HTTP::Request object that looks like this: + + POST http://www.perl.org/survey.cgi + Content-Length: 66 + Content-Type: application/x-www-form-urlencoded + + name=Gisle%20Aas&email=gisle%40aas.no&gender=M&born=1964&perc=3%25 + +Multivalued form fields can be specified by either repeating the field +name or by passing the value as an array reference. + +The POST method also supports the C<multipart/form-data> content used +for I<Form-based File Upload> as specified in RFC 1867. You trigger +this content format by specifying a content type of C<'form-data'> as +one of the request headers. If one of the values in the $form_ref is +an array reference, then it is treated as a file part specification +with the following interpretation: + + [ $file, $filename, Header => Value... ] + [ undef, $filename, Header => Value,..., Content => $content ] + +The first value in the array ($file) is the name of a file to open. +This file will be read and its content placed in the request. The +routine will croak if the file can't be opened. Use an C<undef> as +$file value if you want to specify the content directly with a +C<Content> header. The $filename is the filename to report in the +request. If this value is undefined, then the basename of the $file +will be used. You can specify an empty string as $filename if you +want to suppress sending the filename when you provide a $file value. + +If a $file is provided by no C<Content-Type> header, then C<Content-Type> +and C<Content-Encoding> will be filled in automatically with the values +returned by LWP::MediaTypes::guess_media_type() + +Sending my F<~/.profile> to the survey used as example above can be +achieved by this: + + POST 'http://www.perl.org/survey.cgi', + Content_Type => 'form-data', + Content => [ name => 'Gisle Aas', + email => 'gisle@aas.no', + gender => 'M', + born => '1964', + init => ["$ENV{HOME}/.profile"], + ] + +This will create an HTTP::Request object that almost looks this (the +boundary and the content of your F<~/.profile> is likely to be +different): + + POST http://www.perl.org/survey.cgi + Content-Length: 388 + Content-Type: multipart/form-data; boundary="6G+f" + + --6G+f + Content-Disposition: form-data; name="name" + + Gisle Aas + --6G+f + Content-Disposition: form-data; name="email" + + gisle@aas.no + --6G+f + Content-Disposition: form-data; name="gender" + + M + --6G+f + Content-Disposition: form-data; name="born" + + 1964 + --6G+f + Content-Disposition: form-data; name="init"; filename=".profile" + Content-Type: text/plain + + PATH=/local/perl/bin:$PATH + export PATH + + --6G+f-- + +If you set the $DYNAMIC_FILE_UPLOAD variable (exportable) to some TRUE +value, then you get back a request object with a subroutine closure as +the content attribute. This subroutine will read the content of any +files on demand and return it in suitable chunks. This allow you to +upload arbitrary big files without using lots of memory. You can even +upload infinite files like F</dev/audio> if you wish; however, if +the file is not a plain file, there will be no Content-Length header +defined for the request. Not all servers (or server +applications) like this. Also, if the file(s) change in size between +the time the Content-Length is calculated and the time that the last +chunk is delivered, the subroutine will C<Croak>. + +The post(...) method of "LWP::UserAgent" exists as a shortcut for +$ua->request(POST ...). + +=back + +=head1 SEE ALSO + +L<HTTP::Request>, L<LWP::UserAgent> + + +=head1 COPYRIGHT + +Copyright 1997-2004, Gisle Aas + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + diff --git a/lib/HTTP/Response.pm b/lib/HTTP/Response.pm new file mode 100644 index 0000000..f239ab6 --- /dev/null +++ b/lib/HTTP/Response.pm @@ -0,0 +1,644 @@ +package HTTP::Response; + +use strict; +use warnings; + +use base 'HTTP::Message'; + +our $VERSION = "6.10"; + +use HTTP::Status (); + + +sub new +{ + my($class, $rc, $msg, $header, $content) = @_; + my $self = $class->SUPER::new($header, $content); + $self->code($rc); + $self->message($msg); + $self; +} + + +sub parse +{ + my($class, $str) = @_; + my $status_line; + if ($str =~ s/^(.*)\n//) { + $status_line = $1; + } + else { + $status_line = $str; + $str = ""; + } + + my $self = $class->SUPER::parse($str); + my($protocol, $code, $message); + if ($status_line =~ /^\d{3} /) { + # Looks like a response created by HTTP::Response->new + ($code, $message) = split(' ', $status_line, 2); + } else { + ($protocol, $code, $message) = split(' ', $status_line, 3); + } + $self->protocol($protocol) if $protocol; + $self->code($code) if defined($code); + $self->message($message) if defined($message); + $self; +} + + +sub clone +{ + my $self = shift; + my $clone = bless $self->SUPER::clone, ref($self); + $clone->code($self->code); + $clone->message($self->message); + $clone->request($self->request->clone) if $self->request; + # we don't clone previous + $clone; +} + + +sub code { shift->_elem('_rc', @_); } +sub message { shift->_elem('_msg', @_); } +sub previous { shift->_elem('_previous',@_); } +sub request { shift->_elem('_request', @_); } + + +sub status_line +{ + my $self = shift; + my $code = $self->{'_rc'} || "000"; + my $mess = $self->{'_msg'} || HTTP::Status::status_message($code) || "Unknown code"; + return "$code $mess"; +} + + +sub base +{ + my $self = shift; + my $base = ( + $self->header('Content-Base'), # used to be HTTP/1.1 + $self->header('Content-Location'), # HTTP/1.1 + $self->header('Base'), # HTTP/1.0 + )[0]; + if ($base && $base =~ /^$URI::scheme_re:/o) { + # already absolute + return $HTTP::URI_CLASS->new($base); + } + + my $req = $self->request; + if ($req) { + # if $base is undef here, the return value is effectively + # just a copy of $self->request->uri. + return $HTTP::URI_CLASS->new_abs($base, $req->uri); + } + + # can't find an absolute base + return undef; +} + + +sub redirects { + my $self = shift; + my @r; + my $r = $self; + while (my $p = $r->previous) { + push(@r, $p); + $r = $p; + } + return @r unless wantarray; + return reverse @r; +} + + +sub filename +{ + my $self = shift; + my $file; + + my $cd = $self->header('Content-Disposition'); + if ($cd) { + require HTTP::Headers::Util; + if (my @cd = HTTP::Headers::Util::split_header_words($cd)) { + my ($disposition, undef, %cd_param) = @{$cd[-1]}; + $file = $cd_param{filename}; + + # RFC 2047 encoded? + if ($file && $file =~ /^=\?(.+?)\?(.+?)\?(.+)\?=$/) { + my $charset = $1; + my $encoding = uc($2); + my $encfile = $3; + + if ($encoding eq 'Q' || $encoding eq 'B') { + local($SIG{__DIE__}); + eval { + if ($encoding eq 'Q') { + $encfile =~ s/_/ /g; + require MIME::QuotedPrint; + $encfile = MIME::QuotedPrint::decode($encfile); + } + else { # $encoding eq 'B' + require MIME::Base64; + $encfile = MIME::Base64::decode($encfile); + } + + require Encode; + require Encode::Locale; + Encode::from_to($encfile, $charset, "locale_fs"); + }; + + $file = $encfile unless $@; + } + } + } + } + + unless (defined($file) && length($file)) { + my $uri; + if (my $cl = $self->header('Content-Location')) { + $uri = URI->new($cl); + } + elsif (my $request = $self->request) { + $uri = $request->uri; + } + + if ($uri) { + $file = ($uri->path_segments)[-1]; + } + } + + if ($file) { + $file =~ s,.*[\\/],,; # basename + } + + if ($file && !length($file)) { + $file = undef; + } + + $file; +} + + +sub as_string +{ + my $self = shift; + my($eol) = @_; + $eol = "\n" unless defined $eol; + + my $status_line = $self->status_line; + my $proto = $self->protocol; + $status_line = "$proto $status_line" if $proto; + + return join($eol, $status_line, $self->SUPER::as_string(@_)); +} + + +sub dump +{ + my $self = shift; + + my $status_line = $self->status_line; + my $proto = $self->protocol; + $status_line = "$proto $status_line" if $proto; + + return $self->SUPER::dump( + preheader => $status_line, + @_, + ); +} + + +sub is_info { HTTP::Status::is_info (shift->{'_rc'}); } +sub is_success { HTTP::Status::is_success (shift->{'_rc'}); } +sub is_redirect { HTTP::Status::is_redirect (shift->{'_rc'}); } +sub is_error { HTTP::Status::is_error (shift->{'_rc'}); } +sub is_client_error { HTTP::Status::is_client_error (shift->{'_rc'}); } +sub is_server_error { HTTP::Status::is_server_error (shift->{'_rc'}); } + + +sub error_as_HTML +{ + my $self = shift; + my $title = 'An Error Occurred'; + my $body = $self->status_line; + $body =~ s/&/&/g; + $body =~ s/</</g; + return <<EOM; +<html> +<head><title>$title</title></head> +<body> +<h1>$title</h1> +<p>$body</p> +</body> +</html> +EOM +} + + +sub current_age +{ + my $self = shift; + my $time = shift; + + # Implementation of RFC 2616 section 13.2.3 + # (age calculations) + my $response_time = $self->client_date; + my $date = $self->date; + + my $age = 0; + if ($response_time && $date) { + $age = $response_time - $date; # apparent_age + $age = 0 if $age < 0; + } + + my $age_v = $self->header('Age'); + if ($age_v && $age_v > $age) { + $age = $age_v; # corrected_received_age + } + + if ($response_time) { + my $request = $self->request; + if ($request) { + my $request_time = $request->date; + if ($request_time && $request_time < $response_time) { + # Add response_delay to age to get 'corrected_initial_age' + $age += $response_time - $request_time; + } + } + $age += ($time || time) - $response_time; + } + return $age; +} + + +sub freshness_lifetime +{ + my($self, %opt) = @_; + + # First look for the Cache-Control: max-age=n header + for my $cc ($self->header('Cache-Control')) { + for my $cc_dir (split(/\s*,\s*/, $cc)) { + return $1 if $cc_dir =~ /^max-age\s*=\s*(\d+)/i; + } + } + + # Next possibility is to look at the "Expires" header + my $date = $self->date || $self->client_date || $opt{time} || time; + if (my $expires = $self->expires) { + return $expires - $date; + } + + # Must apply heuristic expiration + return undef if exists $opt{heuristic_expiry} && !$opt{heuristic_expiry}; + + # Default heuristic expiration parameters + $opt{h_min} ||= 60; + $opt{h_max} ||= 24 * 3600; + $opt{h_lastmod_fraction} ||= 0.10; # 10% since last-mod suggested by RFC2616 + $opt{h_default} ||= 3600; + + # Should give a warning if more than 24 hours according to + # RFC 2616 section 13.2.4. Here we just make this the default + # maximum value. + + if (my $last_modified = $self->last_modified) { + my $h_exp = ($date - $last_modified) * $opt{h_lastmod_fraction}; + return $opt{h_min} if $h_exp < $opt{h_min}; + return $opt{h_max} if $h_exp > $opt{h_max}; + return $h_exp; + } + + # default when all else fails + return $opt{h_min} if $opt{h_min} > $opt{h_default}; + return $opt{h_default}; +} + + +sub is_fresh +{ + my($self, %opt) = @_; + $opt{time} ||= time; + my $f = $self->freshness_lifetime(%opt); + return undef unless defined($f); + return $f > $self->current_age($opt{time}); +} + + +sub fresh_until +{ + my($self, %opt) = @_; + $opt{time} ||= time; + my $f = $self->freshness_lifetime(%opt); + return undef unless defined($f); + return $f - $self->current_age($opt{time}) + $opt{time}; +} + +1; + + +__END__ + +=head1 NAME + +HTTP::Response - HTTP style response message + +=head1 SYNOPSIS + +Response objects are returned by the request() method of the C<LWP::UserAgent>: + + # ... + $response = $ua->request($request) + if ($response->is_success) { + print $response->decoded_content; + } + else { + print STDERR $response->status_line, "\n"; + } + +=head1 DESCRIPTION + +The C<HTTP::Response> class encapsulates HTTP style responses. A +response consists of a response line, some headers, and a content +body. Note that the LWP library uses HTTP style responses even for +non-HTTP protocol schemes. Instances of this class are usually +created and returned by the request() method of an C<LWP::UserAgent> +object. + +C<HTTP::Response> is a subclass of C<HTTP::Message> and therefore +inherits its methods. The following additional methods are available: + +=over 4 + +=item $r = HTTP::Response->new( $code ) + +=item $r = HTTP::Response->new( $code, $msg ) + +=item $r = HTTP::Response->new( $code, $msg, $header ) + +=item $r = HTTP::Response->new( $code, $msg, $header, $content ) + +Constructs a new C<HTTP::Response> object describing a response with +response code $code and optional message $msg. The optional $header +argument should be a reference to an C<HTTP::Headers> object or a +plain array reference of key/value pairs. The optional $content +argument should be a string of bytes. The meanings of these arguments are +described below. + +=item $r = HTTP::Response->parse( $str ) + +This constructs a new response object by parsing the given string. + +=item $r->code + +=item $r->code( $code ) + +This is used to get/set the code attribute. The code is a 3 digit +number that encode the overall outcome of an HTTP response. The +C<HTTP::Status> module provide constants that provide mnemonic names +for the code attribute. + +=item $r->message + +=item $r->message( $message ) + +This is used to get/set the message attribute. The message is a short +human readable single line string that explains the response code. + +=item $r->header( $field ) + +=item $r->header( $field => $value ) + +This is used to get/set header values and it is inherited from +C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for +details and other similar methods that can be used to access the +headers. + +=item $r->content + +=item $r->content( $bytes ) + +This is used to get/set the raw content and it is inherited from the +C<HTTP::Message> base class. See L<HTTP::Message> for details and +other methods that can be used to access the content. + +=item $r->decoded_content( %options ) + +This will return the content after any C<Content-Encoding> and +charsets have been decoded. See L<HTTP::Message> for details. + +=item $r->request + +=item $r->request( $request ) + +This is used to get/set the request attribute. The request attribute +is a reference to the request that caused this response. It does +not have to be the same request passed to the $ua->request() method, +because there might have been redirects and authorization retries in +between. + +=item $r->previous + +=item $r->previous( $response ) + +This is used to get/set the previous attribute. The previous +attribute is used to link together chains of responses. You get +chains of responses if the first response is redirect or unauthorized. +The value is C<undef> if this is the first response in a chain. + +Note that the method $r->redirects is provided as a more convenient +way to access the response chain. + +=item $r->status_line + +Returns the string "E<lt>code> E<lt>message>". If the message attribute +is not set then the official name of E<lt>code> (see L<HTTP::Status>) +is substituted. + +=item $r->base + +Returns the base URI for this response. The return value will be a +reference to a URI object. + +The base URI is obtained from one the following sources (in priority +order): + +=over 4 + +=item 1. + +Embedded in the document content, for instance <BASE HREF="..."> +in HTML documents. + +=item 2. + +A "Content-Base:" or a "Content-Location:" header in the response. + +For backwards compatibility with older HTTP implementations we will +also look for the "Base:" header. + +=item 3. + +The URI used to request this response. This might not be the original +URI that was passed to $ua->request() method, because we might have +received some redirect responses first. + +=back + +If none of these sources provide an absolute URI, undef is returned. + +When the LWP protocol modules produce the HTTP::Response object, then +any base URI embedded in the document (step 1) will already have +initialized the "Content-Base:" header. This means that this method +only performs the last 2 steps (the content is not always available +either). + +=item $r->filename + +Returns a filename for this response. Note that doing sanity checks +on the returned filename (eg. removing characters that cannot be used +on the target filesystem where the filename would be used, and +laundering it for security purposes) are the caller's responsibility; +the only related thing done by this method is that it makes a simple +attempt to return a plain filename with no preceding path segments. + +The filename is obtained from one the following sources (in priority +order): + +=over 4 + +=item 1. + +A "Content-Disposition:" header in the response. Proper decoding of +RFC 2047 encoded filenames requires the C<MIME::QuotedPrint> (for "Q" +encoding), C<MIME::Base64> (for "B" encoding), and C<Encode> modules. + +=item 2. + +A "Content-Location:" header in the response. + +=item 3. + +The URI used to request this response. This might not be the original +URI that was passed to $ua->request() method, because we might have +received some redirect responses first. + +=back + +If a filename cannot be derived from any of these sources, undef is +returned. + +=item $r->as_string + +=item $r->as_string( $eol ) + +Returns a textual representation of the response. + +=item $r->is_info + +=item $r->is_success + +=item $r->is_redirect + +=item $r->is_error + +=item $r->is_client_error + +=item $r->is_server_error + +These methods indicate if the response was informational, successful, a +redirection, or an error. See L<HTTP::Status> for the meaning of these. + +=item $r->error_as_HTML + +Returns a string containing a complete HTML document indicating what +error occurred. This method should only be called when $r->is_error +is TRUE. + +=item $r->redirects + +Returns the list of redirect responses that lead up to this response +by following the $r->previous chain. The list order is oldest first. + +In scalar context return the number of redirect responses leading up +to this one. + +=item $r->current_age + +Calculates the "current age" of the response as specified by RFC 2616 +section 13.2.3. The age of a response is the time since it was sent +by the origin server. The returned value is a number representing the +age in seconds. + +=item $r->freshness_lifetime( %opt ) + +Calculates the "freshness lifetime" of the response as specified by +RFC 2616 section 13.2.4. The "freshness lifetime" is the length of +time between the generation of a response and its expiration time. +The returned value is the number of seconds until expiry. + +If the response does not contain an "Expires" or a "Cache-Control" +header, then this function will apply some simple heuristic based on +the "Last-Modified" header to determine a suitable lifetime. The +following options might be passed to control the heuristics: + +=over + +=item heuristic_expiry => $bool + +If passed as a FALSE value, don't apply heuristics and just return +C<undef> when "Expires" or "Cache-Control" is lacking. + +=item h_lastmod_fraction => $num + +This number represent the fraction of the difference since the +"Last-Modified" timestamp to make the expiry time. The default is +C<0.10>, the suggested typical setting of 10% in RFC 2616. + +=item h_min => $sec + +This is the lower limit of the heuristic expiry age to use. The +default is C<60> (1 minute). + +=item h_max => $sec + +This is the upper limit of the heuristic expiry age to use. The +default is C<86400> (24 hours). + +=item h_default => $sec + +This is the expiry age to use when nothing else applies. The default +is C<3600> (1 hour) or "h_min" if greater. + +=back + +=item $r->is_fresh( %opt ) + +Returns TRUE if the response is fresh, based on the values of +freshness_lifetime() and current_age(). If the response is no longer +fresh, then it has to be re-fetched or re-validated by the origin +server. + +Options might be passed to control expiry heuristics, see the +description of freshness_lifetime(). + +=item $r->fresh_until( %opt ) + +Returns the time (seconds since epoch) when this entity is no longer fresh. + +Options might be passed to control expiry heuristics, see the +description of freshness_lifetime(). + +=back + +=head1 SEE ALSO + +L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Status>, L<HTTP::Request> + +=head1 COPYRIGHT + +Copyright 1995-2004 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + diff --git a/lib/HTTP/Status.pm b/lib/HTTP/Status.pm new file mode 100644 index 0000000..aa531d1 --- /dev/null +++ b/lib/HTTP/Status.pm @@ -0,0 +1,269 @@ +package HTTP::Status; + +use strict; +use warnings; + +require 5.002; # because we use prototypes + +use base 'Exporter'; +our @EXPORT = qw(is_info is_success is_redirect is_error status_message); +our @EXPORT_OK = qw(is_client_error is_server_error); + +our $VERSION = "6.10"; + +# Note also addition of mnemonics to @EXPORT below + +# Unmarked codes are from RFC 2616 +# See also: http://en.wikipedia.org/wiki/List_of_HTTP_status_codes + +my %StatusCode = ( + 100 => 'Continue', + 101 => 'Switching Protocols', + 102 => 'Processing', # RFC 2518 (WebDAV) + 200 => 'OK', + 201 => 'Created', + 202 => 'Accepted', + 203 => 'Non-Authoritative Information', + 204 => 'No Content', + 205 => 'Reset Content', + 206 => 'Partial Content', + 207 => 'Multi-Status', # RFC 2518 (WebDAV) + 208 => 'Already Reported', # RFC 5842 + 300 => 'Multiple Choices', + 301 => 'Moved Permanently', + 302 => 'Found', + 303 => 'See Other', + 304 => 'Not Modified', + 305 => 'Use Proxy', + 307 => 'Temporary Redirect', + 308 => 'Permanent Redirect', # RFC 7238 + 400 => 'Bad Request', + 401 => 'Unauthorized', + 402 => 'Payment Required', + 403 => 'Forbidden', + 404 => 'Not Found', + 405 => 'Method Not Allowed', + 406 => 'Not Acceptable', + 407 => 'Proxy Authentication Required', + 408 => 'Request Timeout', + 409 => 'Conflict', + 410 => 'Gone', + 411 => 'Length Required', + 412 => 'Precondition Failed', + 413 => 'Request Entity Too Large', + 414 => 'Request-URI Too Large', + 415 => 'Unsupported Media Type', + 416 => 'Request Range Not Satisfiable', + 417 => 'Expectation Failed', + 418 => 'I\'m a teapot', # RFC 2324 + 422 => 'Unprocessable Entity', # RFC 2518 (WebDAV) + 423 => 'Locked', # RFC 2518 (WebDAV) + 424 => 'Failed Dependency', # RFC 2518 (WebDAV) + 425 => 'No code', # WebDAV Advanced Collections + 426 => 'Upgrade Required', # RFC 2817 + 428 => 'Precondition Required', + 429 => 'Too Many Requests', + 431 => 'Request Header Fields Too Large', + 449 => 'Retry with', # unofficial Microsoft + 500 => 'Internal Server Error', + 501 => 'Not Implemented', + 502 => 'Bad Gateway', + 503 => 'Service Unavailable', + 504 => 'Gateway Timeout', + 505 => 'HTTP Version Not Supported', + 506 => 'Variant Also Negotiates', # RFC 2295 + 507 => 'Insufficient Storage', # RFC 2518 (WebDAV) + 509 => 'Bandwidth Limit Exceeded', # unofficial + 510 => 'Not Extended', # RFC 2774 + 511 => 'Network Authentication Required', +); + +my $mnemonicCode = ''; +my ($code, $message); +while (($code, $message) = each %StatusCode) { + # create mnemonic subroutines + $message =~ s/I'm/I am/; + $message =~ tr/a-z \-/A-Z__/; + $mnemonicCode .= "sub HTTP_$message () { $code }\n"; + $mnemonicCode .= "*RC_$message = \\&HTTP_$message;\n"; # legacy + $mnemonicCode .= "push(\@EXPORT_OK, 'HTTP_$message');\n"; + $mnemonicCode .= "push(\@EXPORT, 'RC_$message');\n"; +} +eval $mnemonicCode; # only one eval for speed +die if $@; + +# backwards compatibility +*RC_MOVED_TEMPORARILY = \&RC_FOUND; # 302 was renamed in the standard +push(@EXPORT, "RC_MOVED_TEMPORARILY"); + +our %EXPORT_TAGS = ( + constants => [grep /^HTTP_/, @EXPORT_OK], + is => [grep /^is_/, @EXPORT, @EXPORT_OK], +); + + +sub status_message ($) { $StatusCode{$_[0]}; } + +sub is_info ($) { $_[0] >= 100 && $_[0] < 200; } +sub is_success ($) { $_[0] >= 200 && $_[0] < 300; } +sub is_redirect ($) { $_[0] >= 300 && $_[0] < 400; } +sub is_error ($) { $_[0] >= 400 && $_[0] < 600; } +sub is_client_error ($) { $_[0] >= 400 && $_[0] < 500; } +sub is_server_error ($) { $_[0] >= 500 && $_[0] < 600; } + +1; + + +__END__ + +=head1 NAME + +HTTP::Status - HTTP Status code processing + +=head1 SYNOPSIS + + use HTTP::Status qw(:constants :is status_message); + + if ($rc != HTTP_OK) { + print status_message($rc), "\n"; + } + + if (is_success($rc)) { ... } + if (is_error($rc)) { ... } + if (is_redirect($rc)) { ... } + +=head1 DESCRIPTION + +I<HTTP::Status> is a library of routines for defining and +classifying HTTP status codes for libwww-perl. Status codes are +used to encode the overall outcome of an HTTP response message. Codes +correspond to those defined in RFC 2616 and RFC 2518. + +=head1 CONSTANTS + +The following constant functions can be used as mnemonic status code +names. None of these are exported by default. Use the C<:constants> +tag to import them all. + + HTTP_CONTINUE (100) + HTTP_SWITCHING_PROTOCOLS (101) + HTTP_PROCESSING (102) + + HTTP_OK (200) + HTTP_CREATED (201) + HTTP_ACCEPTED (202) + HTTP_NON_AUTHORITATIVE_INFORMATION (203) + HTTP_NO_CONTENT (204) + HTTP_RESET_CONTENT (205) + HTTP_PARTIAL_CONTENT (206) + HTTP_MULTI_STATUS (207) + HTTP_ALREADY_REPORTED (208) + + HTTP_MULTIPLE_CHOICES (300) + HTTP_MOVED_PERMANENTLY (301) + HTTP_FOUND (302) + HTTP_SEE_OTHER (303) + HTTP_NOT_MODIFIED (304) + HTTP_USE_PROXY (305) + HTTP_TEMPORARY_REDIRECT (307) + HTTP_PERMANENT_REDIRECT (308) + + HTTP_BAD_REQUEST (400) + HTTP_UNAUTHORIZED (401) + HTTP_PAYMENT_REQUIRED (402) + HTTP_FORBIDDEN (403) + HTTP_NOT_FOUND (404) + HTTP_METHOD_NOT_ALLOWED (405) + HTTP_NOT_ACCEPTABLE (406) + HTTP_PROXY_AUTHENTICATION_REQUIRED (407) + HTTP_REQUEST_TIMEOUT (408) + HTTP_CONFLICT (409) + HTTP_GONE (410) + HTTP_LENGTH_REQUIRED (411) + HTTP_PRECONDITION_FAILED (412) + HTTP_REQUEST_ENTITY_TOO_LARGE (413) + HTTP_REQUEST_URI_TOO_LARGE (414) + HTTP_UNSUPPORTED_MEDIA_TYPE (415) + HTTP_REQUEST_RANGE_NOT_SATISFIABLE (416) + HTTP_EXPECTATION_FAILED (417) + HTTP_I_AM_A_TEAPOT (418) + HTTP_UNPROCESSABLE_ENTITY (422) + HTTP_LOCKED (423) + HTTP_FAILED_DEPENDENCY (424) + HTTP_NO_CODE (425) + HTTP_UPGRADE_REQUIRED (426) + HTTP_PRECONDITION_REQUIRED (428) + HTTP_TOO_MANY_REQUESTS (429) + HTTP_REQUEST_HEADER_FIELDS_TOO_LARGE (431) + HTTP_RETRY_WITH (449) + + HTTP_INTERNAL_SERVER_ERROR (500) + HTTP_NOT_IMPLEMENTED (501) + HTTP_BAD_GATEWAY (502) + HTTP_SERVICE_UNAVAILABLE (503) + HTTP_GATEWAY_TIMEOUT (504) + HTTP_HTTP_VERSION_NOT_SUPPORTED (505) + HTTP_VARIANT_ALSO_NEGOTIATES (506) + HTTP_INSUFFICIENT_STORAGE (507) + HTTP_BANDWIDTH_LIMIT_EXCEEDED (509) + HTTP_NOT_EXTENDED (510) + HTTP_NETWORK_AUTHENTICATION_REQUIRED (511) + +=head1 FUNCTIONS + +The following additional functions are provided. Most of them are +exported by default. The C<:is> import tag can be used to import all +the classification functions. + +=over 4 + +=item status_message( $code ) + +The status_message() function will translate status codes to human +readable strings. The string is the same as found in the constant +names above. If the $code is unknown, then C<undef> is returned. + +=item is_info( $code ) + +Return TRUE if C<$code> is an I<Informational> status code (1xx). This +class of status code indicates a provisional response which can't have +any content. + +=item is_success( $code ) + +Return TRUE if C<$code> is a I<Successful> status code (2xx). + +=item is_redirect( $code ) + +Return TRUE if C<$code> is a I<Redirection> status code (3xx). This class of +status code indicates that further action needs to be taken by the +user agent in order to fulfill the request. + +=item is_error( $code ) + +Return TRUE if C<$code> is an I<Error> status code (4xx or 5xx). The function +returns TRUE for both client and server error status codes. + +=item is_client_error( $code ) + +Return TRUE if C<$code> is a I<Client Error> status code (4xx). This class +of status code is intended for cases in which the client seems to have +erred. + +This function is B<not> exported by default. + +=item is_server_error( $code ) + +Return TRUE if C<$code> is a I<Server Error> status code (5xx). This class +of status codes is intended for cases in which the server is aware +that it has erred or is incapable of performing the request. + +This function is B<not> exported by default. + +=back + +=head1 BUGS + +For legacy reasons all the C<HTTP_> constants are exported by default +with the prefix C<RC_>. It's recommended to use explicit imports and +the C<:constants> tag instead of relying on this. |