summaryrefslogtreecommitdiff
path: root/bin/lwp-request
diff options
context:
space:
mode:
Diffstat (limited to 'bin/lwp-request')
-rwxr-xr-xbin/lwp-request552
1 files changed, 552 insertions, 0 deletions
diff --git a/bin/lwp-request b/bin/lwp-request
new file mode 100755
index 0000000..d934404
--- /dev/null
+++ b/bin/lwp-request
@@ -0,0 +1,552 @@
+#!/usr/bin/perl -w
+
+# Simple user agent using LWP library.
+
+=head1 NAME
+
+lwp-request, GET, POST, HEAD - Simple command line user agent
+
+=head1 SYNOPSIS
+
+B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>]
+ [B<-i> I<if-modified-since>] [B<-c> I<content-type>]
+ [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>...
+
+=head1 DESCRIPTION
+
+This program can be used to send requests to WWW servers and your
+local file system. The request content for POST and PUT
+methods is read from stdin. The content of the response is printed on
+stdout. Error messages are printed on stderr. The program returns a
+status value indicating the number of URLs that failed.
+
+The options are:
+
+=over 4
+
+=item -m <method>
+
+Set which method to use for the request. If this option is not used,
+then the method is derived from the name of the program.
+
+=item -f
+
+Force request through, even if the program believes that the method is
+illegal. The server might reject the request eventually.
+
+=item -b <uri>
+
+This URI will be used as the base URI for resolving all relative URIs
+given as argument.
+
+=item -t <timeout>
+
+Set the timeout value for the requests. The timeout is the amount of
+time that the program will wait for a response from the remote server
+before it fails. The default unit for the timeout value is seconds.
+You might append "m" or "h" to the timeout value to make it minutes or
+hours, respectively. The default timeout is '3m', i.e. 3 minutes.
+
+=item -i <time>
+
+Set the If-Modified-Since header in the request. If I<time> is the
+name of a file, use the modification timestamp for this file. If
+I<time> is not a file, it is parsed as a literal date. Take a look at
+L<HTTP::Date> for recognized formats.
+
+=item -c <content-type>
+
+Set the Content-Type for the request. This option is only allowed for
+requests that take a content, i.e. POST and PUT. You can
+force methods to take content by using the C<-f> option together with
+C<-c>. The default Content-Type for POST is
+C<application/x-www-form-urlencoded>. The default Content-type for
+the others is C<text/plain>.
+
+=item -p <proxy-url>
+
+Set the proxy to be used for the requests. The program also loads
+proxy settings from the environment. You can disable this with the
+C<-P> option.
+
+=item -P
+
+Don't load proxy settings from environment.
+
+=item -H <header>
+
+Send this HTTP header with each request. You can specify several, e.g.:
+
+ lwp-request \
+ -H 'Referer: http://other.url/' \
+ -H 'Host: somehost' \
+ http://this.url/
+
+=item -C <username>:<password>
+
+Provide credentials for documents that are protected by Basic
+Authentication. If the document is protected and you did not specify
+the username and password with this option, then you will be prompted
+to provide these values.
+
+=back
+
+The following options controls what is displayed by the program:
+
+=over 4
+
+=item -u
+
+Print request method and absolute URL as requests are made.
+
+=item -U
+
+Print request headers in addition to request method and absolute URL.
+
+=item -s
+
+Print response status code. This option is always on for HEAD requests.
+
+=item -S
+
+Print response status chain. This shows redirect and authorization
+requests that are handled by the library.
+
+=item -e
+
+Print response headers. This option is always on for HEAD requests.
+
+=item -E
+
+Print response status chain with full response headers.
+
+=item -d
+
+Do B<not> print the content of the response.
+
+=item -o <format>
+
+Process HTML content in various ways before printing it. If the
+content type of the response is not HTML, then this option has no
+effect. The legal format values are; I<text>, I<ps>, I<links>,
+I<html> and I<dump>.
+
+If you specify the I<text> format then the HTML will be formatted as
+plain latin1 text. If you specify the I<ps> format then it will be
+formatted as Postscript.
+
+The I<links> format will output all links found in the HTML document.
+Relative links will be expanded to absolute ones.
+
+The I<html> format will reformat the HTML code and the I<dump> format
+will just dump the HTML syntax tree.
+
+Note that the C<HTML-Tree> distribution needs to be installed for this
+option to work. In addition the C<HTML-Format> distribution needs to
+be installed for I<-o text> or I<-o ps> to work.
+
+=item -v
+
+Print the version number of the program and quit.
+
+=item -h
+
+Print usage message and quit.
+
+=item -a
+
+Set text(ascii) mode for content input and output. If this option is not
+used, content input and output is done in binary mode.
+
+=back
+
+Because this program is implemented using the LWP library, it will
+only support the protocols that LWP supports.
+
+=head1 SEE ALSO
+
+L<lwp-mirror>, L<LWP>
+
+=head1 COPYRIGHT
+
+Copyright 1995-1999 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Gisle Aas <gisle@aas.no>
+
+=cut
+
+$progname = $0;
+$progname =~ s,.*[\\/],,; # use basename only
+$progname =~ s/\.\w*$//; # strip extension, if any
+
+$VERSION = "6.09";
+
+
+require LWP;
+
+use URI;
+use URI::Heuristic qw(uf_uri);
+use Encode;
+use Encode::Locale;
+
+use HTTP::Status qw(status_message);
+use HTTP::Date qw(time2str str2time);
+
+
+# This table lists the methods that are allowed. It should really be
+# a superset for all methods supported for every scheme that may be
+# supported by the library. Currently it might be a bit too HTTP
+# specific. You might use the -f option to force a method through.
+#
+# "" = No content in request, "C" = Needs content in request
+#
+%allowed_methods = (
+ GET => "",
+ HEAD => "",
+ POST => "C",
+ PUT => "C",
+ DELETE => "",
+ TRACE => "",
+ OPTIONS => "",
+);
+
+
+# We make our own specialization of LWP::UserAgent that asks for
+# user/password if document is protected.
+{
+ package RequestAgent;
+ @ISA = qw(LWP::UserAgent);
+
+ sub new
+ {
+ my $self = LWP::UserAgent::new(@_);
+ $self->agent("lwp-request/$main::VERSION ");
+ $self;
+ }
+
+ sub get_basic_credentials
+ {
+ my($self, $realm, $uri) = @_;
+ if ($main::options{'C'}) {
+ return split(':', $main::options{'C'}, 2);
+ }
+ elsif (-t) {
+ my $netloc = $uri->host_port;
+ print STDERR "Enter username for $realm at $netloc: ";
+ my $user = <STDIN>;
+ chomp($user);
+ return (undef, undef) unless length $user;
+ print STDERR "Password: ";
+ system("stty -echo");
+ my $password = <STDIN>;
+ system("stty echo");
+ print STDERR "\n"; # because we disabled echo
+ chomp($password);
+ return ($user, $password);
+ }
+ else {
+ return (undef, undef)
+ }
+ }
+}
+
+$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
+
+# Parse command line
+use Getopt::Long;
+
+my @getopt_args = (
+ 'a', # content i/o in text(ascii) mode
+ 'm=s', # set method
+ 'f', # make request even if method is not in %allowed_methods
+ 'b=s', # base url
+ 't=s', # timeout
+ 'i=s', # if-modified-since
+ 'c=s', # content type for POST
+ 'C=s', # credentials for basic authorization
+ 'H=s@', # extra headers, form "Header: value string"
+ #
+ 'u', # display method and URL of request
+ 'U', # display request headers also
+ 's', # display status code
+ 'S', # display whole chain of status codes
+ 'e', # display response headers (default for HEAD)
+ 'E', # display whole chain of headers
+ 'd', # don't display content
+ #
+ 'h', # print usage
+ 'v', # print version
+ #
+ 'p=s', # proxy URL
+ 'P', # don't load proxy setting from environment
+ #
+ 'o=s', # output format
+);
+
+Getopt::Long::config("noignorecase", "bundling");
+unless (GetOptions(\%options, @getopt_args)) {
+ usage();
+}
+if ($options{'v'}) {
+ require LWP;
+ my $DISTNAME = 'libwww-perl-' . LWP::Version();
+ die <<"EOT";
+This is lwp-request version $VERSION ($DISTNAME)
+
+Copyright 1995-1999, Gisle Aas.
+
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+EOT
+}
+
+usage() if $options{'h'} || !@ARGV;
+
+# Create the user agent object
+$ua = RequestAgent->new;
+
+# Load proxy settings from *_proxy environment variables.
+$ua->env_proxy unless $options{'P'};
+
+$method = uc($options{'m'}) if defined $options{'m'};
+
+if ($options{'f'}) {
+ if ($options{'c'}) {
+ $allowed_methods{$method} = "C"; # force content
+ }
+ else {
+ $allowed_methods{$method} = "";
+ }
+}
+elsif (!defined $allowed_methods{$method}) {
+ die "$progname: $method is not an allowed method\n";
+}
+
+if ($options{'S'} || $options{'E'}) {
+ $options{'U'} = 1 if $options{'E'};
+ $options{'E'} = 1 if $options{'e'};
+ $options{'S'} = 1;
+ $options{'s'} = 1;
+ $options{'u'} = 1;
+}
+
+if ($method eq "HEAD") {
+ $options{'s'} = 1;
+ $options{'e'} = 1 unless $options{'d'};
+ $options{'d'} = 1;
+}
+
+$options{'u'} = 1 if $options{'U'};
+$options{'s'} = 1 if $options{'e'};
+
+if (defined $options{'t'}) {
+ $options{'t'} =~ /^(\d+)([smh])?/;
+ die "$progname: Illegal timeout value!\n" unless defined $1;
+ $timeout = $1;
+ if (defined $2) {
+ $timeout *= 60 if $2 eq "m";
+ $timeout *= 3600 if $2 eq "h";
+ }
+ $ua->timeout($timeout);
+}
+
+if (defined $options{'i'}) {
+ if (-e $options{'i'}) {
+ $time = (stat _)[9];
+ }
+ else {
+ $time = str2time($options{'i'});
+ die "$progname: Illegal time syntax for -i option\n"
+ unless defined $time;
+ }
+ $options{'i'} = time2str($time);
+}
+
+$content = undef;
+$user_ct = undef;
+if ($allowed_methods{$method} eq "C") {
+ # This request needs some content
+ unless (defined $options{'c'}) {
+ # set default content type
+ $options{'c'} = ($method eq "POST") ?
+ "application/x-www-form-urlencoded"
+ : "text/plain";
+ }
+ else {
+ die "$progname: Illegal Content-type format\n"
+ unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,;
+ $user_ct++;
+ }
+ print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n"
+ if -t;
+ binmode STDIN unless -t or $options{'a'};
+ $content = join("", <STDIN>);
+}
+else {
+ die "$progname: Can't set Content-type for $method requests\n"
+ if defined $options{'c'};
+}
+
+# Set up a request. We will use the same request object for all URLs.
+$request = HTTP::Request->new($method);
+$request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'};
+for my $user_header (@{ $options{'H'} || [] }) {
+ my ($header_name, $header_value) = split /\s*:\s*/, $user_header, 2;
+ $header_name =~ s/^\s+//;
+ if (lc($header_name) eq "user-agent") {
+ $header_value .= $ua->agent if $header_value =~ /\s\z/;
+ $ua->agent($header_value);
+ }
+ else {
+ $request->push_header($header_name, $header_value);
+ }
+}
+#$request->header('Accept', '*/*');
+if ($options{'c'}) { # will always be set for request that wants content
+ my $header = ($user_ct ? 'header' : 'init_header');
+ $request->$header('Content-Type', $options{'c'});
+ $request->header('Content-Length', length $content); # Not really needed
+ $request->content($content);
+}
+
+$errors = 0;
+
+sub show {
+ my $r = shift;
+ my $last = shift;
+ print $method, " ", $r->request->uri->as_string, "\n" if $options{'u'};
+ print $r->request->headers_as_string, "\n" if $options{'U'};
+ print $r->status_line, "\n" if $options{'s'};
+ print $r->headers_as_string, "\n" if $options{'E'} or $last;
+}
+
+# Ok, now we perform the requests, one URL at a time
+while ($url = shift) {
+ # Create the URL object, but protect us against bad URLs
+ eval {
+ if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification
+ $url = URI->new(decode(locale => $url), decode(locale => $options{'b'}));
+ $url = $url->abs(decode(locale => $options{'b'})) if $options{'b'};
+ }
+ else {
+ $url = uf_uri($url);
+ }
+ };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//;
+ print STDERR $@;
+ $errors++;
+ next;
+ }
+
+ $ua->proxy($url->scheme, decode(locale => $options{'p'})) if $options{'p'};
+
+ # Send the request and get a response back from the server
+ $request->uri($url);
+ $response = $ua->request($request);
+
+ if ($options{'S'}) {
+ for my $r ($response->redirects) {
+ show($r);
+ }
+ }
+ show($response, $options{'e'});
+
+ unless ($options{'d'}) {
+ if ($options{'o'} &&
+ $response->content_type eq 'text/html') {
+ eval {
+ require HTML::Parse;
+ };
+ if ($@) {
+ if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) {
+ die "The HTML-Tree distribution need to be installed for the -o option to be used.\n";
+ }
+ else {
+ die $@;
+ }
+ }
+ my $html = HTML::Parse::parse_html($response->content);
+ {
+ $options{'o'} eq 'ps' && do {
+ require HTML::FormatPS;
+ my $f = HTML::FormatPS->new;
+ print $f->format($html);
+ last;
+ };
+ $options{'o'} eq 'text' && do {
+ require HTML::FormatText;
+ my $f = HTML::FormatText->new;
+ print $f->format($html);
+ last;
+ };
+ $options{'o'} eq 'html' && do {
+ print $html->as_HTML;
+ last;
+ };
+ $options{'o'} eq 'links' && do {
+ my $base = $response->base;
+ $base = $options{'b'} if $options{'b'};
+ for ( @{ $html->extract_links } ) {
+ my($link, $elem) = @$_;
+ my $tag = uc $elem->tag;
+ $link = URI->new($link)->abs($base)->as_string;
+ print "$tag\t$link\n";
+ }
+ last;
+ };
+ $options{'o'} eq 'dump' && do {
+ $html->dump;
+ last;
+ };
+ # It is bad to not notice this before now :-(
+ die "Illegal -o option value ($options{'o'})\n";
+ }
+ }
+ else {
+ binmode STDOUT unless $options{'a'};
+ print $response->content;
+ }
+ }
+
+ $errors++ unless $response->is_success;
+}
+
+exit $errors;
+
+
+sub usage
+{
+ die <<"EOT";
+Usage: $progname [-options] <url>...
+ -m <method> use method for the request (default is '$method')
+ -f make request even if $progname believes method is illegal
+ -b <base> Use the specified URL as base
+ -t <timeout> Set timeout value
+ -i <time> Set the If-Modified-Since header on the request
+ -c <conttype> use this content-type for POST, PUT, CHECKIN
+ -a Use text mode for content I/O
+ -p <proxyurl> use this as a proxy
+ -P don't load proxy settings from environment
+ -H <header> send this HTTP header (you can specify several)
+ -C <username>:<password>
+ provide credentials for basic authentication
+
+ -u Display method and URL before any response
+ -U Display request headers (implies -u)
+ -s Display response status code
+ -S Display response status chain (implies -u)
+ -e Display response headers (implies -s)
+ -E Display whole chain of headers (implies -S and -U)
+ -d Do not display content
+ -o <format> Process HTML content in various ways
+
+ -v Show program version
+ -h Print this message
+EOT
+}